summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/ima_policy6
-rw-r--r--Documentation/ABI/testing/sysfs-bus-css23
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu23
-rw-r--r--Documentation/RCU/rcuref.txt21
-rw-r--r--Documentation/RCU/stallwarn.txt2
-rw-r--r--Documentation/RCU/whatisRCU.txt8
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst6
-rw-r--r--Documentation/admin-guide/hw-vuln/l1tf.rst2
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt25
-rw-r--r--Documentation/admin-guide/mm/numa_memory_policy.rst2
-rw-r--r--Documentation/arm64/elf_hwcaps.txt8
-rw-r--r--Documentation/atomic_t.txt26
-rw-r--r--Documentation/block/bfq-iosched.txt2
-rw-r--r--Documentation/cgroup-v1/blkio-controller.rst (renamed from Documentation/cgroup-v1/blkio-controller.txt)79
-rw-r--r--Documentation/cgroup-v1/cgroups.rst (renamed from Documentation/cgroup-v1/cgroups.txt)186
-rw-r--r--Documentation/cgroup-v1/cpuacct.rst (renamed from Documentation/cgroup-v1/cpuacct.txt)15
-rw-r--r--Documentation/cgroup-v1/cpusets.rst (renamed from Documentation/cgroup-v1/cpusets.txt)209
-rw-r--r--Documentation/cgroup-v1/devices.rst (renamed from Documentation/cgroup-v1/devices.txt)40
-rw-r--r--Documentation/cgroup-v1/freezer-subsystem.rst (renamed from Documentation/cgroup-v1/freezer-subsystem.txt)14
-rw-r--r--Documentation/cgroup-v1/hugetlb.rst (renamed from Documentation/cgroup-v1/hugetlb.txt)41
-rw-r--r--Documentation/cgroup-v1/index.rst30
-rw-r--r--Documentation/cgroup-v1/memcg_test.rst (renamed from Documentation/cgroup-v1/memcg_test.txt)265
-rw-r--r--Documentation/cgroup-v1/memory.rst (renamed from Documentation/cgroup-v1/memory.txt)463
-rw-r--r--Documentation/cgroup-v1/net_cls.rst (renamed from Documentation/cgroup-v1/net_cls.txt)37
-rw-r--r--Documentation/cgroup-v1/net_prio.rst (renamed from Documentation/cgroup-v1/net_prio.txt)24
-rw-r--r--Documentation/cgroup-v1/pids.rst (renamed from Documentation/cgroup-v1/pids.txt)82
-rw-r--r--Documentation/cgroup-v1/rdma.rst (renamed from Documentation/cgroup-v1/rdma.txt)66
-rw-r--r--Documentation/core-api/circular-buffers.rst2
-rw-r--r--Documentation/core-api/timekeeping.rst12
-rw-r--r--Documentation/cputopology.txt48
-rw-r--r--Documentation/crypto/api-samples.rst176
-rw-r--r--Documentation/crypto/api-skcipher.rst2
-rw-r--r--Documentation/crypto/architecture.rst4
-rw-r--r--Documentation/crypto/crypto_engine.rst111
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel-crypto.txt13
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-davinci.txt18
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.txt10
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.yaml69
-rw-r--r--Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt2
-rw-r--r--Documentation/devicetree/bindings/i3c/i3c.txt4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt29
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt20
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt43
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm36274.txt85
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lm3697.txt73
-rw-r--r--Documentation/devicetree/bindings/leds/leds-spi-byte.txt44
-rw-r--r--Documentation/devicetree/bindings/mfd/ti-lmu.txt88
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt21
-rw-r--r--Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt5
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sifive.txt33
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt9
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-stm32.txt3
-rw-r--r--Documentation/devicetree/bindings/regulator/arizona-regulator.txt3
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.yaml5
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.txt57
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.yaml118
-rw-r--r--Documentation/devicetree/bindings/regulator/max8660.txt47
-rw-r--r--Documentation/devicetree/bindings/regulator/max8660.yaml77
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt22
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.txt140
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.yaml200
-rw-r--r--Documentation/devicetree/bindings/regulator/slg51000.txt88
-rw-r--r--Documentation/devicetree/bindings/regulator/st,stm32-booster.txt18
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt1
-rw-r--r--Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/trivial-devices.yaml4
-rw-r--r--Documentation/driver-api/gpio/consumer.rst4
-rw-r--r--Documentation/driver-api/gpio/driver.rst16
-rw-r--r--Documentation/driver-api/s390-drivers.rst4
-rw-r--r--Documentation/filesystems/proc.txt40
-rw-r--r--Documentation/filesystems/tmpfs.txt2
-rw-r--r--Documentation/fmc/API.txt47
-rw-r--r--Documentation/fmc/FMC-and-SDB.txt88
-rw-r--r--Documentation/fmc/carrier.txt311
-rw-r--r--Documentation/fmc/fmc-chardev.txt64
-rw-r--r--Documentation/fmc/fmc-fakedev.txt36
-rw-r--r--Documentation/fmc/fmc-trivial.txt17
-rw-r--r--Documentation/fmc/fmc-write-eeprom.txt98
-rw-r--r--Documentation/fmc/identifiers.txt168
-rw-r--r--Documentation/fmc/mezzanine.txt123
-rw-r--r--Documentation/fmc/parameters.txt56
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt4
-rw-r--r--Documentation/leds/index.rst25
-rw-r--r--Documentation/leds/leds-blinkm.rst (renamed from Documentation/leds/leds-blinkm.txt)64
-rw-r--r--Documentation/leds/leds-class-flash.rst (renamed from Documentation/leds/leds-class-flash.txt)49
-rw-r--r--Documentation/leds/leds-class.rst (renamed from Documentation/leds/leds-class.txt)15
-rw-r--r--Documentation/leds/leds-lm3556.rst (renamed from Documentation/leds/leds-lm3556.txt)100
-rw-r--r--Documentation/leds/leds-lp3944.rst (renamed from Documentation/leds/leds-lp3944.txt)23
-rw-r--r--Documentation/leds/leds-lp5521.rst115
-rw-r--r--Documentation/leds/leds-lp5521.txt101
-rw-r--r--Documentation/leds/leds-lp5523.rst147
-rw-r--r--Documentation/leds/leds-lp5523.txt130
-rw-r--r--Documentation/leds/leds-lp5562.rst137
-rw-r--r--Documentation/leds/leds-lp5562.txt120
-rw-r--r--Documentation/leds/leds-lp55xx.rst224
-rw-r--r--Documentation/leds/leds-lp55xx.txt194
-rw-r--r--Documentation/leds/leds-mlxcpld.rst118
-rw-r--r--Documentation/leds/leds-mlxcpld.txt110
-rw-r--r--Documentation/leds/ledtrig-oneshot.rst (renamed from Documentation/leds/ledtrig-oneshot.txt)11
-rw-r--r--Documentation/leds/ledtrig-transient.rst (renamed from Documentation/leds/ledtrig-transient.txt)65
-rw-r--r--Documentation/leds/ledtrig-usbport.rst (renamed from Documentation/leds/ledtrig-usbport.txt)11
-rw-r--r--Documentation/leds/uleds.rst (renamed from Documentation/leds/uleds.txt)5
-rw-r--r--Documentation/locking/lockdep-design.txt112
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--Documentation/process/changes.rst6
-rw-r--r--Documentation/pwm.txt7
-rw-r--r--Documentation/s390/3270.rst (renamed from Documentation/s390/3270.txt)85
-rw-r--r--Documentation/s390/Debugging390.txt2142
-rw-r--r--Documentation/s390/cds.rst (renamed from Documentation/s390/cds.txt)368
-rw-r--r--Documentation/s390/common_io.rst (renamed from Documentation/s390/CommonIO)49
-rw-r--r--Documentation/s390/dasd.rst (renamed from Documentation/s390/DASD)33
-rw-r--r--Documentation/s390/debugging390.rst2613
-rw-r--r--Documentation/s390/driver-model.rst (renamed from Documentation/s390/driver-model.txt)179
-rw-r--r--Documentation/s390/index.rst30
-rw-r--r--Documentation/s390/monreader.rst (renamed from Documentation/s390/monreader.txt)85
-rw-r--r--Documentation/s390/qeth.rst (renamed from Documentation/s390/qeth.txt)36
-rw-r--r--Documentation/s390/s390dbf.rst487
-rw-r--r--Documentation/s390/s390dbf.txt667
-rw-r--r--Documentation/s390/text_files.rst11
-rw-r--r--Documentation/s390/vfio-ap.rst (renamed from Documentation/s390/vfio-ap.txt)499
-rw-r--r--Documentation/s390/vfio-ccw.rst (renamed from Documentation/s390/vfio-ccw.txt)92
-rw-r--r--Documentation/s390/zfcpdump.rst (renamed from Documentation/s390/zfcpdump.txt)2
-rw-r--r--Documentation/scheduler/sched-deadline.txt2
-rw-r--r--Documentation/scheduler/sched-design-CFS.txt2
-rw-r--r--Documentation/scheduler/sched-pelt.c3
-rw-r--r--Documentation/scheduler/sched-rt-group.txt2
-rw-r--r--Documentation/security/IMA-templates.rst7
-rw-r--r--Documentation/security/keys/core.rst217
-rw-r--r--Documentation/security/keys/request-key.rst57
-rw-r--r--Documentation/sysctl/kernel.txt16
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt2
-rw-r--r--Documentation/vm/numa.rst4
-rw-r--r--Documentation/vm/page_migration.rst2
-rw-r--r--Documentation/vm/unevictable-lru.rst2
-rw-r--r--Documentation/x86/exception-tables.rst2
-rw-r--r--Documentation/x86/topology.rst4
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets.rst4
-rw-r--r--MAINTAINERS47
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/atomic.h20
-rw-r--r--arch/alpha/kernel/signal.c4
-rw-r--r--arch/alpha/kernel/smp.c19
-rw-r--r--arch/alpha/kernel/traps.c2
-rw-r--r--arch/alpha/mm/fault.c4
-rw-r--r--arch/alpha/oprofile/common.c6
-rw-r--r--arch/arc/include/asm/atomic.h41
-rw-r--r--arch/arc/kernel/process.c4
-rw-r--r--arch/arc/kernel/signal.c2
-rw-r--r--arch/arc/kernel/traps.c2
-rw-r--r--arch/arc/mm/fault.c4
-rw-r--r--arch/arm/Kconfig30
-rw-r--r--arch/arm/boot/dts/armada-xp-98dx3236.dtsi8
-rw-r--r--arch/arm/boot/dts/imx7ulp.dtsi23
-rw-r--r--arch/arm/common/bL_switcher.c6
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c2
-rw-r--r--arch/arm/crypto/sha512-glue.c2
-rw-r--r--arch/arm/include/asm/arch_timer.h10
-rw-r--r--arch/arm/include/asm/atomic.h50
-rw-r--r--arch/arm/include/asm/bug.h2
-rw-r--r--arch/arm/include/asm/cacheflush.h7
-rw-r--r--arch/arm/include/asm/traps.h2
-rw-r--r--arch/arm/kernel/ptrace.c6
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/kernel/smp.c1
-rw-r--r--arch/arm/kernel/topology.c2
-rw-r--r--arch/arm/kernel/traps.c7
-rw-r--r--arch/arm/mach-davinci/board-da830-evm.c5
-rw-r--r--arch/arm/mach-davinci/board-omapl138-hawk.c3
-rw-r--r--arch/arm/mach-omap1/ams-delta-fiq.c4
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c5
-rw-r--r--arch/arm/mach-s3c64xx/mach-crag6410.c21
-rw-r--r--arch/arm/mach-stm32/Kconfig1
-rw-r--r--arch/arm/mm/Kconfig8
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/mm/cache-v7.S16
-rw-r--r--arch/arm/mm/fault.c33
-rw-r--r--arch/arm/mm/init.c22
-rw-r--r--arch/arm/mm/mm.h2
-rw-r--r--arch/arm/mm/proc-v7.S10
-rw-r--r--arch/arm/vdso/Makefile3
-rw-r--r--arch/arm64/Kconfig38
-rw-r--r--arch/arm64/Makefile23
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi17
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/aes-ce.S60
-rw-r--r--arch/arm64/crypto/aes-modes.S118
-rw-r--r--arch/arm64/crypto/aes-neon.S48
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c2
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c2
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c2
-rw-r--r--arch/arm64/include/asm/acpi.h3
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h4
-rw-r--r--arch/arm64/include/asm/arch_timer.h13
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h20
-rw-r--r--arch/arm64/include/asm/atomic_lse.h34
-rw-r--r--arch/arm64/include/asm/cache.h5
-rw-r--r--arch/arm64/include/asm/cacheflush.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/daifflags.h75
-rw-r--r--arch/arm64/include/asm/elf.h14
-rw-r--r--arch/arm64/include/asm/fpsimd.h5
-rw-r--r--arch/arm64/include/asm/hwcap.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h79
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h3
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h56
-rw-r--r--arch/arm64/include/asm/ptrace.h10
-rw-r--r--arch/arm64/include/asm/signal32.h46
-rw-r--r--arch/arm64/include/asm/simd.h10
-rw-r--r--arch/arm64/include/asm/sysreg.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h5
-rw-r--r--arch/arm64/include/asm/unistd.h5
-rw-r--r--arch/arm64/include/asm/vdso.h3
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h44
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h126
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h103
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h53
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h2
-rw-r--r--arch/arm64/include/uapi/asm/ptrace.h3
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/acpi.c10
-rw-r--r--arch/arm64/kernel/asm-offsets.c34
-rw-r--r--arch/arm64/kernel/cacheinfo.c9
-rw-r--r--arch/arm64/kernel/cpufeature.c8
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/entry.S84
-rw-r--r--arch/arm64/kernel/fpsimd.c139
-rw-r--r--arch/arm64/kernel/image.h6
-rw-r--r--arch/arm64/kernel/irq.c26
-rw-r--r--arch/arm64/kernel/module.c10
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c2
-rw-r--r--arch/arm64/kernel/ptrace.c6
-rw-r--r--arch/arm64/kernel/signal32.c72
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kernel/smp.c27
-rw-r--r--arch/arm64/kernel/traps.c29
-rw-r--r--arch/arm64/kernel/vdso.c356
-rw-r--r--arch/arm64/kernel/vdso/Makefile41
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S323
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c27
-rw-r--r--arch/arm64/kernel/vdso32/.gitignore2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile186
-rw-r--r--arch/arm64/kernel/vdso32/note.c15
-rw-r--r--arch/arm64/kernel/vdso32/sigreturn.S62
-rw-r--r--arch/arm64/kernel/vdso32/vdso.S19
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S82
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c59
-rw-r--r--arch/arm64/kvm/fpsimd.c4
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/hyp/switch.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c12
-rw-r--r--arch/arm64/mm/fault.c61
-rw-r--r--arch/arm64/mm/hugetlbpage.c12
-rw-r--r--arch/arm64/mm/init.c5
-rw-r--r--arch/arm64/mm/mmu.c14
-rw-r--r--arch/arm64/mm/pageattr.c48
-rw-r--r--arch/arm64/net/bpf_jit_comp.c2
-rw-r--r--arch/c6x/kernel/signal.c2
-rw-r--r--arch/c6x/kernel/traps.c2
-rw-r--r--arch/csky/abiv1/alignment.c2
-rw-r--r--arch/csky/abiv2/fpu.c2
-rw-r--r--arch/csky/kernel/signal.c4
-rw-r--r--arch/csky/kernel/traps.c2
-rw-r--r--arch/csky/mm/fault.c4
-rw-r--r--arch/h8300/kernel/ptrace_h.c4
-rw-r--r--arch/h8300/kernel/ptrace_s.c2
-rw-r--r--arch/h8300/kernel/signal.c2
-rw-r--r--arch/hexagon/kernel/signal.c2
-rw-r--r--arch/hexagon/kernel/traps.c12
-rw-r--r--arch/hexagon/mm/vm_fault.c4
-rw-r--r--arch/ia64/include/asm/atomic.h20
-rw-r--r--arch/ia64/kernel/brl_emu.c6
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/ia64/kernel/perfmon.c12
-rw-r--r--arch/ia64/kernel/signal.c8
-rw-r--r--arch/ia64/kernel/traps.c24
-rw-r--r--arch/ia64/kernel/unaligned.c2
-rw-r--r--arch/ia64/kernel/uncached.c8
-rw-r--r--arch/ia64/mm/fault.c2
-rw-r--r--arch/m68k/Kconfig3
-rw-r--r--arch/m68k/configs/amiga_defconfig17
-rw-r--r--arch/m68k/configs/apollo_defconfig17
-rw-r--r--arch/m68k/configs/atari_defconfig17
-rw-r--r--arch/m68k/configs/bvme6000_defconfig17
-rw-r--r--arch/m68k/configs/hp300_defconfig17
-rw-r--r--arch/m68k/configs/mac_defconfig17
-rw-r--r--arch/m68k/configs/multi_defconfig17
-rw-r--r--arch/m68k/configs/mvme147_defconfig17
-rw-r--r--arch/m68k/configs/mvme16x_defconfig17
-rw-r--r--arch/m68k/configs/q40_defconfig17
-rw-r--r--arch/m68k/configs/sun3_defconfig17
-rw-r--r--arch/m68k/configs/sun3x_defconfig17
-rw-r--r--arch/m68k/kernel/dma.c57
-rw-r--r--arch/m68k/kernel/signal.c4
-rw-r--r--arch/m68k/kernel/traps.c20
-rw-r--r--arch/m68k/mm/fault.c4
-rw-r--r--arch/microblaze/kernel/exceptions.c2
-rw-r--r--arch/microblaze/kernel/signal.c2
-rw-r--r--arch/microblaze/mm/fault.c2
-rw-r--r--arch/mips/Makefile3
-rw-r--r--arch/mips/boot/compressed/Makefile2
-rw-r--r--arch/mips/boot/compressed/calc_vmlinuz_load_addr.c2
-rw-r--r--arch/mips/include/asm/atomic.h22
-rw-r--r--arch/mips/include/asm/mach-ath79/ar933x_uart.h4
-rw-r--r--arch/mips/include/asm/switch_to.h4
-rw-r--r--arch/mips/kernel/branch.c18
-rw-r--r--arch/mips/kernel/kprobes.c2
-rw-r--r--arch/mips/kernel/mips-mt-fpaff.c2
-rw-r--r--arch/mips/kernel/signal.c8
-rw-r--r--arch/mips/kernel/signal_n32.c4
-rw-r--r--arch/mips/kernel/signal_o32.c8
-rw-r--r--arch/mips/kernel/traps.c56
-rw-r--r--arch/mips/kernel/unaligned.c20
-rw-r--r--arch/mips/mm/fault.c4
-rw-r--r--arch/mips/mm/mmap.c2
-rw-r--r--arch/mips/mm/tlbex.c29
-rw-r--r--arch/mips/sgi-ip22/ip22-berr.c2
-rw-r--r--arch/mips/sgi-ip22/ip28-berr.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-berr.c2
-rw-r--r--arch/nds32/kernel/fpu.c2
-rw-r--r--arch/nds32/kernel/signal.c2
-rw-r--r--arch/nds32/kernel/traps.c17
-rw-r--r--arch/nds32/mm/fault.c4
-rw-r--r--arch/nios2/kernel/signal.c4
-rw-r--r--arch/nios2/kernel/traps.c2
-rw-r--r--arch/openrisc/kernel/signal.c2
-rw-r--r--arch/openrisc/kernel/traps.c12
-rw-r--r--arch/openrisc/mm/fault.c4
-rw-r--r--arch/parisc/kernel/ptrace.c6
-rw-r--r--arch/parisc/kernel/signal.c2
-rw-r--r--arch/parisc/kernel/traps.c14
-rw-r--r--arch/parisc/kernel/unaligned.c4
-rw-r--r--arch/parisc/math-emu/driver.c2
-rw-r--r--arch/parisc/mm/fault.c4
-rw-r--r--arch/powerpc/include/asm/atomic.h44
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/ptrace.c1
-rw-r--r--arch/powerpc/kernel/rtas.c3
-rw-r--r--arch/powerpc/kernel/signal_32.c6
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/kernel/traps.c4
-rw-r--r--arch/powerpc/mm/fault.c5
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/riscv/include/asm/atomic.h44
-rw-r--r--arch/riscv/include/asm/bug.h2
-rw-r--r--arch/riscv/kernel/signal.c2
-rw-r--r--arch/riscv/kernel/traps.c11
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/s390/Kconfig41
-rw-r--r--arch/s390/configs/debug_defconfig2
-rw-r--r--arch/s390/configs/defconfig600
-rw-r--r--arch/s390/configs/performance_defconfig678
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/ghash_s390.c2
-rw-r--r--arch/s390/crypto/prng.c4
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/s390/include/asm/airq.h2
-rw-r--r--arch/s390/include/asm/atomic.h38
-rw-r--r--arch/s390/include/asm/ccwdev.h4
-rw-r--r--arch/s390/include/asm/cio.h41
-rw-r--r--arch/s390/include/asm/ctl_reg.h9
-rw-r--r--arch/s390/include/asm/debug.h153
-rw-r--r--arch/s390/include/asm/facility.h21
-rw-r--r--arch/s390/include/asm/idals.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h7
-rw-r--r--arch/s390/include/asm/mem_encrypt.h17
-rw-r--r--arch/s390/include/asm/pci.h5
-rw-r--r--arch/s390/include/asm/percpu.h2
-rw-r--r--arch/s390/include/asm/processor.h7
-rw-r--r--arch/s390/include/asm/smp.h35
-rw-r--r--arch/s390/include/asm/spinlock.h4
-rw-r--r--arch/s390/include/asm/tlbflush.h17
-rw-r--r--arch/s390/include/asm/unwind.h19
-rw-r--r--arch/s390/include/uapi/asm/runtime_instr.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/compat_signal.c4
-rw-r--r--arch/s390/kernel/debug.c105
-rw-r--r--arch/s390/kernel/dis.c5
-rw-r--r--arch/s390/kernel/dumpstack.c2
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/jump_label.c23
-rw-r--r--arch/s390/kernel/machine_kexec.c3
-rw-r--r--arch/s390/kernel/processor.c19
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/signal.c4
-rw-r--r--arch/s390/kernel/smp.c21
-rw-r--r--arch/s390/kernel/swsusp.S2
-rw-r--r--arch/s390/kernel/traps.c16
-rw-r--r--arch/s390/kernel/unwind_bc.c16
-rw-r--r--arch/s390/kvm/kvm-s390.c3
-rw-r--r--arch/s390/kvm/priv.c86
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/mm/fault.c6
-rw-r--r--arch/s390/mm/init.c47
-rw-r--r--arch/s390/mm/maccess.c9
-rw-r--r--arch/s390/mm/mmap.c2
-rw-r--r--arch/s390/pci/pci.c15
-rw-r--r--arch/s390/pci/pci_clp.c2
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/s390/purgatory/.gitignore3
-rw-r--r--arch/s390/tools/Makefile7
-rw-r--r--arch/s390/tools/opcodes.txt51
-rw-r--r--arch/sh/kernel/cpu/sh2a/fpu.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/fpu.c2
-rw-r--r--arch/sh/kernel/cpu/sh5/fpu.c4
-rw-r--r--arch/sh/kernel/hw_breakpoint.c2
-rw-r--r--arch/sh/kernel/ptrace_64.c4
-rw-r--r--arch/sh/kernel/signal_32.c4
-rw-r--r--arch/sh/kernel/signal_64.c4
-rw-r--r--arch/sh/kernel/traps.c4
-rw-r--r--arch/sh/kernel/traps_32.c12
-rw-r--r--arch/sh/kernel/traps_64.c2
-rw-r--r--arch/sh/math-emu/math.c2
-rw-r--r--arch/sh/mm/fault.c11
-rw-r--r--arch/sparc/include/asm/atomic_64.h8
-rw-r--r--arch/sparc/kernel/process_64.c4
-rw-r--r--arch/sparc/kernel/signal32.c8
-rw-r--r--arch/sparc/kernel/signal_32.c4
-rw-r--r--arch/sparc/kernel/signal_64.c8
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c2
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/kernel/traps_32.c4
-rw-r--r--arch/sparc/kernel/traps_64.c41
-rw-r--r--arch/sparc/mm/fault_32.c4
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/um/kernel/exec.c2
-rw-r--r--arch/um/kernel/ptrace.c7
-rw-r--r--arch/um/kernel/skas/mmu.c2
-rw-r--r--arch/um/kernel/tlb.c4
-rw-r--r--arch/um/kernel/trap.c16
-rw-r--r--arch/unicore32/kernel/signal.c4
-rw-r--r--arch/unicore32/kernel/traps.c2
-rw-r--r--arch/unicore32/mm/fault.c13
-rw-r--r--arch/x86/Kconfig53
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/Kconfig.debug44
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c45
-rw-r--r--arch/x86/crypto/chacha_glue.c2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/common.c17
-rw-r--r--arch/x86/entry/entry_32.S169
-rw-r--r--arch/x86/entry/entry_64.S41
-rw-r--r--arch/x86/entry/vdso/Makefile9
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c256
-rw-r--r--arch/x86/entry/vdso/vdso.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso32.lds.S2
-rw-r--r--arch/x86/entry/vdso/vdsox32.lds.S1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/entry/vsyscall/Makefile2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c41
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_gtod.c83
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/events/intel/cstate.c14
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/events/intel/rapl.c20
-rw-r--r--arch/x86/events/intel/uncore.c81
-rw-r--r--arch/x86/events/intel/uncore.h4
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/hyperv/hv_init.c91
-rw-r--r--arch/x86/include/asm/acrn.h11
-rw-r--r--arch/x86/include/asm/apic.h5
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_32.h66
-rw-r--r--arch/x86/include/asm/atomic64_64.h46
-rw-r--r--arch/x86/include/asm/barrier.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h21
-rw-r--r--arch/x86/include/asm/fpu/xstate.h1
-rw-r--r--arch/x86/include/asm/frame.h49
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h6
-rw-r--r--arch/x86/include/asm/hypervisor.h1
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/irq_regs.h4
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/kexec.h17
-rw-r--r--arch/x86/include/asm/mshyperv.h81
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/mwait.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h21
-rw-r--r--arch/x86/include/asm/percpu.h236
-rw-r--r--arch/x86/include/asm/processor.h7
-rw-r--r--arch/x86/include/asm/ptrace.h20
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/smp.h4
-rw-r--r--arch/x86/include/asm/special_insns.h37
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/text-patching.h17
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h261
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h44
-rw-r--r--arch/x86/include/asm/vgtod.h75
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/vvar.h7
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/cstate.c15
-rw-r--r--arch/x86/kernel/alternative.c288
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c87
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c50
-rw-r--r--arch/x86/kernel/apic/msi.c4
-rw-r--r--arch/x86/kernel/apic/vector.c4
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile6
-rw-r--r--arch/x86/kernel/cpu/acrn.c69
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c12
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c3
-rw-r--r--arch/x86/kernel/cpu/common.c79
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c9
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c27
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c92
-rw-r--r--arch/x86/kernel/cpu/mce/core.c179
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c37
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h12
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c14
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c12
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c88
-rw-r--r--arch/x86/kernel/cpu/umwait.c200
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c167
-rw-r--r--arch/x86/kernel/crash.c12
-rw-r--r--arch/x86/kernel/fpu/core.c52
-rw-r--r--arch/x86/kernel/fpu/init.c19
-rw-r--r--arch/x86/kernel/fpu/xstate.c58
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/ftrace_32.S78
-rw-r--r--arch/x86/kernel/ftrace_64.S3
-rw-r--r--arch/x86/kernel/hpet.c935
-rw-r--r--arch/x86/kernel/i8253.c25
-rw-r--r--arch/x86/kernel/idt.c3
-rw-r--r--arch/x86/kernel/ima_arch.c12
-rw-r--r--arch/x86/kernel/io_delay.c38
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/jailhouse.c4
-rw-r--r--arch/x86/kernel/jump_label.c121
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes/common.h28
-rw-r--r--arch/x86/kernel/kprobes/core.c29
-rw-r--r--arch/x86/kernel/kprobes/opt.c36
-rw-r--r--arch/x86/kernel/paravirt.c46
-rw-r--r--arch/x86/kernel/paravirt_patch.c126
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c67
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c75
-rw-r--r--arch/x86/kernel/process_32.c16
-rw-r--r--arch/x86/kernel/ptrace.c59
-rw-r--r--arch/x86/kernel/pvclock.c1
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/smpboot.c77
-rw-r--r--arch/x86/kernel/time.c10
-rw-r--r--arch/x86/kernel/tls.c9
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kernel/tsc.c61
-rw-r--r--arch/x86/kernel/tsc_msr.c4
-rw-r--r--arch/x86/kernel/umip.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c32
-rw-r--r--arch/x86/kernel/unwind_orc.c2
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/vmx/nested.c30
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lib/cache-smp.c3
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/platform/geode/alix.c1
-rw-r--r--arch/x86/platform/geode/geos.c1
-rw-r--r--arch/x86/platform/geode/net5501.c1
-rw-r--r--arch/x86/ras/Kconfig10
-rw-r--r--arch/x86/tools/insn_decoder_test.c8
-rw-r--r--arch/x86/tools/insn_sanity.c28
-rw-r--r--arch/x86/um/signal.c4
-rw-r--r--arch/x86/xen/Kconfig1
-rw-r--r--arch/x86/xen/smp_pv.c1
-rw-r--r--arch/xtensa/kernel/signal.c2
-rw-r--r--arch/xtensa/kernel/traps.c8
-rw-r--r--arch/xtensa/mm/fault.c4
-rw-r--r--block/Kconfig2
-rw-r--r--block/blk-mq-debugfs.c7
-rw-r--r--certs/blacklist.c9
-rw-r--r--certs/system_keyring.c12
-rw-r--r--crypto/Kconfig39
-rw-r--r--crypto/Makefile3
-rw-r--r--crypto/aead.c36
-rw-r--r--crypto/algapi.c35
-rw-r--r--crypto/anubis.c1
-rw-r--r--crypto/arc4.c125
-rw-r--r--crypto/asymmetric_keys/Kconfig3
-rw-r--r--crypto/asymmetric_keys/asymmetric_type.c2
-rw-r--r--crypto/chacha20poly1305.c73
-rw-r--r--crypto/chacha_generic.c4
-rw-r--r--crypto/cryptd.c27
-rw-r--r--crypto/crypto_null.c3
-rw-r--r--crypto/crypto_user_base.c3
-rw-r--r--crypto/crypto_wq.c35
-rw-r--r--crypto/deflate.c1
-rw-r--r--crypto/drbg.c94
-rw-r--r--crypto/fcrypt.c1
-rw-r--r--crypto/ghash-generic.c8
-rw-r--r--crypto/jitterentropy-kcapi.c5
-rw-r--r--crypto/jitterentropy.c305
-rw-r--r--crypto/khazad.c1
-rw-r--r--crypto/lrw.c2
-rw-r--r--crypto/lz4.c1
-rw-r--r--crypto/lz4hc.c1
-rw-r--r--crypto/lzo-rle.c1
-rw-r--r--crypto/lzo.c1
-rw-r--r--crypto/md4.c7
-rw-r--r--crypto/md5.c7
-rw-r--r--crypto/michael_mic.c1
-rw-r--r--crypto/rmd128.c1
-rw-r--r--crypto/rmd160.c1
-rw-r--r--crypto/rmd256.c1
-rw-r--r--crypto/rmd320.c1
-rw-r--r--crypto/serpent_generic.c9
-rw-r--r--crypto/skcipher.c34
-rw-r--r--crypto/tea.c3
-rw-r--r--crypto/testmgr.c478
-rw-r--r--crypto/testmgr.h116
-rw-r--r--crypto/tgr192.c21
-rw-r--r--crypto/wp512.c21
-rw-r--r--crypto/xxhash_generic.c108
-rw-r--r--crypto/zstd.c1
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/acpi_pad.c1
-rw-r--r--drivers/acpi/irq.c26
-rw-r--r--drivers/acpi/pptt.c61
-rw-r--r--drivers/acpi/processor_idle.c1
-rw-r--r--drivers/base/arch_topology.c6
-rw-r--r--drivers/base/cacheinfo.c5
-rw-r--r--drivers/base/regmap/Kconfig6
-rw-r--r--drivers/base/regmap/Makefile1
-rw-r--r--drivers/base/regmap/regcache-lzo.c8
-rw-r--r--drivers/base/regmap/regmap-debugfs.c2
-rw-r--r--drivers/base/regmap/regmap-i3c.c60
-rw-r--r--drivers/base/regmap/regmap.c2
-rw-r--r--drivers/base/topology.c22
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/char/agp/generic.c3
-rw-r--r--drivers/char/hw_random/iproc-rng200.c1
-rw-r--r--drivers/char/hw_random/meson-rng.c52
-rw-r--r--drivers/char/tpm/eventlog/efi.c59
-rw-r--r--drivers/char/tpm/eventlog/tpm2.c47
-rw-r--r--drivers/char/tpm/tpm-chip.c6
-rw-r--r--drivers/char/tpm/tpm1-cmd.c7
-rw-r--r--drivers/char/tpm/tpm2-cmd.c7
-rw-r--r--drivers/clocksource/Kconfig14
-rw-r--r--drivers/clocksource/Makefile5
-rw-r--r--drivers/clocksource/arc_timer.c3
-rw-r--r--drivers/clocksource/arm_arch_timer.c15
-rw-r--r--drivers/clocksource/exynos_mct.c4
-rw-r--r--drivers/clocksource/hyperv_timer.c339
-rw-r--r--drivers/clocksource/timer-davinci.c369
-rw-r--r--drivers/clocksource/timer-imx-sysctr.c145
-rw-r--r--drivers/clocksource/timer-ixp4xx.c16
-rw-r--r--drivers/clocksource/timer-meson6.c5
-rw-r--r--drivers/clocksource/timer-tegra.c416
-rw-r--r--drivers/clocksource/timer-tegra20.c379
-rw-r--r--drivers/crypto/Kconfig20
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/amcc/crypto4xx_alg.c36
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c25
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.h10
-rw-r--r--drivers/crypto/atmel-ecc.c403
-rw-r--r--drivers/crypto/atmel-ecc.h116
-rw-r--r--drivers/crypto/atmel-i2c.c364
-rw-r--r--drivers/crypto/atmel-i2c.h197
-rw-r--r--drivers/crypto/atmel-sha204a.c171
-rw-r--r--drivers/crypto/bcm/cipher.c8
-rw-r--r--drivers/crypto/bcm/spu2.c10
-rw-r--r--drivers/crypto/caam/Kconfig46
-rw-r--r--drivers/crypto/caam/Makefile18
-rw-r--r--drivers/crypto/caam/caamalg.c338
-rw-r--r--drivers/crypto/caam/caamalg_desc.c147
-rw-r--r--drivers/crypto/caam/caamalg_desc.h4
-rw-r--r--drivers/crypto/caam/caamalg_qi.c267
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c202
-rw-r--r--drivers/crypto/caam/caamhash.c329
-rw-r--r--drivers/crypto/caam/caampkc.c177
-rw-r--r--drivers/crypto/caam/caampkc.h9
-rw-r--r--drivers/crypto/caam/caamrng.c76
-rw-r--r--drivers/crypto/caam/ctrl.c56
-rw-r--r--drivers/crypto/caam/desc_constr.h11
-rw-r--r--drivers/crypto/caam/error.c8
-rw-r--r--drivers/crypto/caam/error.h2
-rw-r--r--drivers/crypto/caam/intern.h102
-rw-r--r--drivers/crypto/caam/jr.c43
-rw-r--r--drivers/crypto/caam/key_gen.c28
-rw-r--r--drivers/crypto/caam/qi.c52
-rw-r--r--drivers/crypto/caam/sg_sw_qm.h18
-rw-r--r--drivers/crypto/caam/sg_sw_qm2.h18
-rw-r--r--drivers/crypto/caam/sg_sw_sec4.h26
-rw-r--r--drivers/crypto/cavium/cpt/cptvf_algs.c1
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_debugfs.h2
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.h2
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c7
-rw-r--r--drivers/crypto/ccp/ccp-dev.c96
-rw-r--r--drivers/crypto/ccp/ccp-dev.h2
-rw-r--r--drivers/crypto/ccp/ccp-ops.c20
-rw-r--r--drivers/crypto/ccree/cc_driver.c70
-rw-r--r--drivers/crypto/ccree/cc_driver.h6
-rw-r--r--drivers/crypto/ccree/cc_host_regs.h20
-rw-r--r--drivers/crypto/ccree/cc_pm.c11
-rw-r--r--drivers/crypto/ccree/cc_pm.h7
-rw-r--r--drivers/crypto/hisilicon/sec/sec_drv.h2
-rw-r--r--drivers/crypto/inside-secure/safexcel.c13
-rw-r--r--drivers/crypto/inside-secure/safexcel.h17
-rw-r--r--drivers/crypto/inside-secure/safexcel_cipher.c116
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c92
-rw-r--r--drivers/crypto/inside-secure/safexcel_ring.c3
-rw-r--r--drivers/crypto/ixp4xx_crypto.c15
-rw-r--r--drivers/crypto/mxs-dcp.c5
-rw-r--r--drivers/crypto/nx/nx-842-powernv.c8
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c6
-rw-r--r--drivers/crypto/nx/nx.c4
-rw-r--r--drivers/crypto/nx/nx.h12
-rw-r--r--drivers/crypto/nx/nx_debugfs.c71
-rw-r--r--drivers/crypto/qat/qat_common/qat_algs.c294
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.h2
-rw-r--r--drivers/crypto/sahara.c4
-rw-r--r--drivers/crypto/stm32/Makefile2
-rw-r--r--drivers/crypto/stm32/stm32-crc32.c (renamed from drivers/crypto/stm32/stm32_crc32.c)0
-rw-r--r--drivers/crypto/stm32/stm32-hash.c6
-rw-r--r--drivers/crypto/sunxi-ss/sun4i-ss-cipher.c47
-rw-r--r--drivers/crypto/talitos.c368
-rw-r--r--drivers/crypto/talitos.h73
-rw-r--r--drivers/crypto/vmx/aes_cbc.c183
-rw-r--r--drivers/crypto/vmx/aes_ctr.c165
-rw-r--r--drivers/crypto/vmx/aes_xts.c175
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h2
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl22
-rw-r--r--drivers/crypto/vmx/vmx.c72
-rw-r--r--drivers/dma/dma-jz4780.c5
-rw-r--r--drivers/dma/imx-sdma.c52
-rw-r--r--drivers/dma/qcom/bam_dma.c3
-rw-r--r--drivers/firmware/efi/efi.c2
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c15
-rw-r--r--drivers/firmware/efi/libstub/efistub.h2
-rw-r--r--drivers/firmware/efi/libstub/fdt.c27
-rw-r--r--drivers/firmware/efi/libstub/tpm.c80
-rw-r--r--drivers/firmware/efi/tpm.c63
-rw-r--r--drivers/fmc/Kconfig52
-rw-r--r--drivers/fmc/Makefile15
-rw-r--r--drivers/fmc/fmc-chardev.c199
-rw-r--r--drivers/fmc/fmc-core.c388
-rw-r--r--drivers/fmc/fmc-debug.c172
-rw-r--r--drivers/fmc/fmc-dump.c58
-rw-r--r--drivers/fmc/fmc-fakedev.c355
-rw-r--r--drivers/fmc/fmc-match.c113
-rw-r--r--drivers/fmc/fmc-private.h8
-rw-r--r--drivers/fmc/fmc-sdb.c219
-rw-r--r--drivers/fmc/fmc-trivial.c103
-rw-r--r--drivers/fmc/fmc-write-eeprom.c175
-rw-r--r--drivers/fmc/fru-parse.c80
-rw-r--r--drivers/gpio/Kconfig20
-rw-r--r--drivers/gpio/Makefile296
-rw-r--r--drivers/gpio/TODO40
-rw-r--r--drivers/gpio/gpio-altera.c65
-rw-r--r--drivers/gpio/gpio-amd-fch.c4
-rw-r--r--drivers/gpio/gpio-amdpt.c10
-rw-r--r--drivers/gpio/gpio-ath79.c66
-rw-r--r--drivers/gpio/gpio-davinci.c7
-rw-r--r--drivers/gpio/gpio-eic-sprd.c9
-rw-r--r--drivers/gpio/gpio-em.c34
-rw-r--r--drivers/gpio/gpio-ep93xx.c7
-rw-r--r--drivers/gpio/gpio-ftgpio010.c35
-rw-r--r--drivers/gpio/gpio-grgpio.c4
-rw-r--r--drivers/gpio/gpio-ixp4xx.c14
-rw-r--r--drivers/gpio/gpio-janz-ttl.c9
-rw-r--r--drivers/gpio/gpio-madera.c6
-rw-r--r--drivers/gpio/gpio-max732x.c45
-rw-r--r--drivers/gpio/gpio-mb86s7x.c51
-rw-r--r--drivers/gpio/gpio-mockup.c21
-rw-r--r--drivers/gpio/gpio-mvebu.c11
-rw-r--r--drivers/gpio/gpio-omap.c509
-rw-r--r--drivers/gpio/gpio-pca953x.c1
-rw-r--r--drivers/gpio/gpio-pl061.c30
-rw-r--r--drivers/gpio/gpio-rcar.c2
-rw-r--r--drivers/gpio/gpio-siox.c51
-rw-r--r--drivers/gpio/gpio-stp-xway.c33
-rw-r--r--drivers/gpio/gpio-tegra.c4
-rw-r--r--drivers/gpio/gpio-vf610.c14
-rw-r--r--drivers/gpio/gpio-vr41xx.c19
-rw-r--r--drivers/gpio/gpio-xilinx.c90
-rw-r--r--drivers/gpio/gpiolib-acpi.c6
-rw-r--r--drivers/gpio/gpiolib-of.c52
-rw-r--r--drivers/gpio/gpiolib.c94
-rw-r--r--drivers/gpio/gpiolib.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/hwmgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c7
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c6
-rw-r--r--drivers/gpu/drm/imx/ipuv3-crtc.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_vq.c2
-rw-r--r--drivers/hid/hid-cp2112.c7
-rw-r--r--drivers/hv/Kconfig4
-rw-r--r--drivers/hv/hv.c156
-rw-r--r--drivers/hv/hv_util.c1
-rw-r--r--drivers/hv/hyperv_vmbus.h3
-rw-r--r--drivers/hv/vmbus_drv.c42
-rw-r--r--drivers/hwmon/coretemp.c36
-rw-r--r--drivers/i2c/i2c-core-acpi.c6
-rw-r--r--drivers/i3c/master.c82
-rw-r--r--drivers/i3c/master/dw-i3c-master.c7
-rw-r--r--drivers/i3c/master/i3c-master-cdns.c10
-rw-r--r--drivers/iio/humidity/dht11.c8
-rw-r--r--drivers/iio/industrialio-core.c4
-rw-r--r--drivers/infiniband/core/device.c8
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c6
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c3
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c7
-rw-r--r--drivers/irqchip/Kconfig32
-rw-r--r--drivers/irqchip/Makefile2
-rw-r--r--drivers/irqchip/irq-al-fic.c278
-rw-r--r--drivers/irqchip/irq-csky-mpintc.c86
-rw-r--r--drivers/irqchip/irq-gic-v2m.c85
-rw-r--r--drivers/irqchip/irq-gic-v3.c10
-rw-r--r--drivers/irqchip/irq-mbigen.c3
-rw-r--r--drivers/irqchip/irq-meson-gpio.c1
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c3
-rw-r--r--drivers/irqchip/irq-renesas-irqc.c91
-rw-r--r--drivers/irqchip/irq-renesas-rza1.c283
-rw-r--r--drivers/irqchip/irq-sni-exiu.c142
-rw-r--r--drivers/irqchip/qcom-irq-combiner.c5
-rw-r--r--drivers/leds/Kconfig35
-rw-r--r--drivers/leds/Makefile4
-rw-r--r--drivers/leds/leds-lm36274.c172
-rw-r--r--drivers/leds/leds-lm3697.c395
-rw-r--r--drivers/leds/leds-max77650.c2
-rw-r--r--drivers/leds/leds-pca955x.c2
-rw-r--r--drivers/leds/leds-pwm.c45
-rw-r--r--drivers/leds/leds-spi-byte.c161
-rw-r--r--drivers/leds/leds-tca6507.c2
-rw-r--r--drivers/leds/leds-ti-lmu-common.c156
-rw-r--r--drivers/leds/trigger/Kconfig2
-rw-r--r--drivers/leds/trigger/ledtrig-activity.c2
-rw-r--r--drivers/leds/trigger/ledtrig-transient.c2
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/memory/omap-gpmc.c4
-rw-r--r--drivers/mfd/Kconfig5
-rw-r--r--drivers/mfd/ti-lmu.c23
-rw-r--r--drivers/misc/lkdtm/bugs.c2
-rw-r--r--drivers/mtd/nand/raw/ingenic/Kconfig2
-rw-r--r--drivers/mtd/nand/raw/ingenic/Makefile4
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_ecc.c9
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c (renamed from drivers/mtd/nand/raw/ingenic/ingenic_nand.c)0
-rw-r--r--drivers/mtd/nand/raw/sunxi_nand.c40
-rw-r--r--drivers/mtd/nand/spi/gigadevice.c2
-rw-r--r--drivers/mtd/nand/spi/macronix.c4
-rw-r--r--drivers/net/ppp/Kconfig3
-rw-r--r--drivers/net/ppp/ppp_mppe.c97
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c2
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/main.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/rx.c2
-rw-r--r--drivers/net/wireless/ti/wlcore/tx.c2
-rw-r--r--drivers/net/wireless/virt_wifi.c2
-rw-r--r--drivers/nvdimm/security.c2
-rw-r--r--drivers/perf/Kconfig8
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/arm_pmu_acpi.c72
-rw-r--r--drivers/perf/arm_spe_pmu.c12
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c554
-rw-r--r--drivers/powercap/intel_rapl.c75
-rw-r--r--drivers/pwm/Kconfig11
-rw-r--r--drivers/pwm/Makefile1
-rw-r--r--drivers/pwm/core.c172
-rw-r--r--drivers/pwm/pwm-atmel-hlcdc.c1
-rw-r--r--drivers/pwm/pwm-bcm2835.c8
-rw-r--r--drivers/pwm/pwm-fsl-ftm.c383
-rw-r--r--drivers/pwm/pwm-jz4740.c49
-rw-r--r--drivers/pwm/pwm-meson.c386
-rw-r--r--drivers/pwm/pwm-rcar.c39
-rw-r--r--drivers/pwm/pwm-sifive.c339
-rw-r--r--drivers/pwm/pwm-stm32-lp.c25
-rw-r--r--drivers/pwm/pwm-stm32.c2
-rw-r--r--drivers/pwm/sysfs.c102
-rw-r--r--drivers/ras/cec.c132
-rw-r--r--drivers/regulator/88pm800-regulator.c (renamed from drivers/regulator/88pm800.c)0
-rw-r--r--drivers/regulator/Kconfig39
-rw-r--r--drivers/regulator/Makefile4
-rw-r--r--drivers/regulator/arizona-ldo1.c83
-rw-r--r--drivers/regulator/arizona-micsupp.c72
-rw-r--r--drivers/regulator/bd70528-regulator.c1
-rw-r--r--drivers/regulator/bd718x7-regulator.c1
-rw-r--r--drivers/regulator/core.c278
-rw-r--r--drivers/regulator/cpcap-regulator.c2
-rw-r--r--drivers/regulator/da9062-regulator.c40
-rw-r--r--drivers/regulator/da9063-regulator.c61
-rw-r--r--drivers/regulator/da9211-regulator.c2
-rw-r--r--drivers/regulator/helpers.c11
-rw-r--r--drivers/regulator/lm363x-regulator.c78
-rw-r--r--drivers/regulator/max77620-regulator.c28
-rw-r--r--drivers/regulator/max77650-regulator.c170
-rw-r--r--drivers/regulator/max77802-regulator.c2
-rw-r--r--drivers/regulator/max8952.c64
-rw-r--r--drivers/regulator/of_regulator.c63
-rw-r--r--drivers/regulator/qcom_spmi-regulator.c252
-rw-r--r--drivers/regulator/s2mps11.c255
-rw-r--r--drivers/regulator/s5m8767.c4
-rw-r--r--drivers/regulator/slg51000-regulator.c523
-rw-r--r--drivers/regulator/slg51000-regulator.h505
-rw-r--r--drivers/regulator/stm32-booster.c132
-rw-r--r--drivers/regulator/tps65090-regulator.c7
-rw-r--r--drivers/regulator/wm831x-dcdc.c29
-rw-r--r--drivers/s390/block/Kconfig2
-rw-r--r--drivers/s390/block/dasd_devmap.c2
-rw-r--r--drivers/s390/char/Kconfig22
-rw-r--r--drivers/s390/char/Makefile1
-rw-r--r--drivers/s390/char/sclp_async.c189
-rw-r--r--drivers/s390/char/zcore.c2
-rw-r--r--drivers/s390/cio/airq.c37
-rw-r--r--drivers/s390/cio/ccwreq.c9
-rw-r--r--drivers/s390/cio/chsc.c30
-rw-r--r--drivers/s390/cio/cio.h3
-rw-r--r--drivers/s390/cio/css.c187
-rw-r--r--drivers/s390/cio/device.c68
-rw-r--r--drivers/s390/cio/device_fsm.c49
-rw-r--r--drivers/s390/cio/device_id.c20
-rw-r--r--drivers/s390/cio/device_ops.c21
-rw-r--r--drivers/s390/cio/device_pgid.c22
-rw-r--r--drivers/s390/cio/device_status.c24
-rw-r--r--drivers/s390/cio/io_sch.h20
-rw-r--r--drivers/s390/cio/qdio_main.c1
-rw-r--r--drivers/s390/cio/qdio_setup.c2
-rw-r--r--drivers/s390/cio/qdio_thinint.c6
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.c524
-rw-r--r--drivers/s390/cio/vfio_ccw_cp.h7
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c13
-rw-r--r--drivers/s390/crypto/pkey_api.c8
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c34
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c380
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h15
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c4
-rw-r--r--drivers/s390/net/Kconfig8
-rw-r--r--drivers/s390/virtio/virtio_ccw.c246
-rw-r--r--drivers/soc/Makefile2
-rw-r--r--drivers/soc/ti/Kconfig4
-rw-r--r--drivers/target/iscsi/iscsi_target_auth.c16
-rw-r--r--drivers/target/target_core_iblock.c2
-rw-r--r--drivers/thermal/intel/x86_pkg_temp_thermal.c142
-rw-r--r--drivers/tty/tty_ldisc.c8
-rw-r--r--drivers/usb/core/devio.c48
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/addr_list.c4
-rw-r--r--fs/afs/dynroot.c8
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/cifs_spnego.c25
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsencrypt.c62
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dns_resolve.c3
-rw-r--r--fs/cifs/smb2ops.c64
-rw-r--r--fs/cifs/smb2pdu.h14
-rw-r--r--fs/crypto/keyinfo.c2
-rw-r--r--fs/dax.c11
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fscache/object-list.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dns_resolve.c3
-rw-r--r--fs/nfs/nfs4idmap.c30
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/ubifs/auth.c2
-rw-r--r--fs/userfaultfd.c42
-rw-r--r--include/asm-generic/atomic64.h20
-rw-r--r--include/asm-generic/vdso/vsyscall.h50
-rw-r--r--include/clocksource/hyperv_timer.h107
-rw-r--r--include/clocksource/timer-davinci.h44
-rw-r--r--include/crypto/aead.h34
-rw-r--r--include/crypto/algapi.h7
-rw-r--r--include/crypto/arc4.h10
-rw-r--r--include/crypto/chacha.h2
-rw-r--r--include/crypto/crypto_wq.h8
-rw-r--r--include/crypto/drbg.h2
-rw-r--r--include/crypto/internal/hash.h6
-rw-r--r--include/crypto/internal/skcipher.h60
-rw-r--r--include/crypto/skcipher.h92
-rw-r--r--include/keys/request_key_auth-type.h1
-rw-r--r--include/linux/acpi.h12
-rw-r--r--include/linux/arch_topology.h2
-rw-r--r--include/linux/audit.h9
-rw-r--r--include/linux/cacheinfo.h2
-rw-r--r--include/linux/cgroup-defs.h2
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/crypto.h12
-rw-r--r--include/linux/device.h3
-rw-r--r--include/linux/dns_resolver.h3
-rw-r--r--include/linux/efi.h10
-rw-r--r--include/linux/energy_model.h2
-rw-r--r--include/linux/fmc-sdb.h39
-rw-r--r--include/linux/fmc.h269
-rw-r--r--include/linux/gpio/driver.h29
-rw-r--r--include/linux/gpio/gpio-reg.h2
-rw-r--r--include/linux/gpio/machine.h4
-rw-r--r--include/linux/hrtimer.h16
-rw-r--r--include/linux/hrtimer_defs.h27
-rw-r--r--include/linux/i3c/master.h10
-rw-r--r--include/linux/ima.h2
-rw-r--r--include/linux/irqchip/arm-gic-common.h5
-rw-r--r--include/linux/irqchip/arm-gic.h3
-rw-r--r--include/linux/jump_label.h3
-rw-r--r--include/linux/key-type.h3
-rw-r--r--include/linux/key.h213
-rw-r--r--include/linux/leds-ti-lmu-common.h47
-rw-r--r--include/linux/lockdep.h43
-rw-r--r--include/linux/log2.h34
-rw-r--r--include/linux/mfd/da9062/registers.h3
-rw-r--r--include/linux/mfd/da9063/pdata.h49
-rw-r--r--include/linux/mfd/samsung/core.h1
-rw-r--r--include/linux/mfd/samsung/s2mps11.h9
-rw-r--r--include/linux/mfd/ti-lmu-register.h63
-rw-r--r--include/linux/mfd/ti-lmu.h5
-rw-r--r--include/linux/mfd/wm831x/pdata.h1
-rw-r--r--include/linux/module.h5
-rw-r--r--include/linux/pagemap.h13
-rw-r--r--include/linux/percpu-rwsem.h14
-rw-r--r--include/linux/perf/arm_pmu.h2
-rw-r--r--include/linux/platform_data/gpio-omap.h2
-rw-r--r--include/linux/proc_fs.h9
-rw-r--r--include/linux/processor.h9
-rw-r--r--include/linux/ptrace.h2
-rw-r--r--include/linux/pwm.h16
-rw-r--r--include/linux/rcu_sync.h40
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/regmap.h20
-rw-r--r--include/linux/regulator/coupler.h97
-rw-r--r--include/linux/regulator/driver.h12
-rw-r--r--include/linux/regulator/machine.h2
-rw-r--r--include/linux/regulator/max8952.h3
-rw-r--r--include/linux/rwsem.h16
-rw-r--r--include/linux/sched.h90
-rw-r--r--include/linux/sched/nohz.h8
-rw-r--r--include/linux/sched/signal.h15
-rw-r--r--include/linux/sched/sysctl.h11
-rw-r--r--include/linux/sched/topology.h25
-rw-r--r--include/linux/sched/user.h14
-rw-r--r--include/linux/sched/wake_q.h5
-rw-r--r--include/linux/security.h12
-rw-r--r--include/linux/siox.h10
-rw-r--r--include/linux/smp.h52
-rw-r--r--include/linux/srcutree.h14
-rw-r--r--include/linux/stop_machine.h1
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/timekeeping.h32
-rw-r--r--include/linux/timer.h27
-rw-r--r--include/linux/topology.h6
-rw-r--r--include/linux/torture.h2
-rw-r--r--include/linux/tpm_eventlog.h152
-rw-r--r--include/linux/tracehook.h7
-rw-r--r--include/linux/types.h2
-rw-r--r--include/linux/user_namespace.h12
-rw-r--r--include/linux/workqueue.h4
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/net_namespace.h3
-rw-r--r--include/trace/events/sched.h31
-rw-r--r--include/uapi/linux/audit.h1
-rw-r--r--include/uapi/linux/bpf.h2
-rw-r--r--include/uapi/linux/keyctl.h84
-rw-r--r--include/uapi/linux/sched.h14
-rw-r--r--include/uapi/linux/sched/types.h66
-rw-r--r--include/vdso/datapage.h89
-rw-r--r--include/vdso/helpers.h56
-rw-r--r--include/vdso/vsyscall.h11
-rw-r--r--init/Kconfig55
-rw-r--r--init/init_task.c5
-rw-r--r--kernel/audit.c27
-rw-r--r--kernel/audit.h8
-rw-r--r--kernel/auditfilter.c62
-rw-r--r--kernel/auditsc.c42
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/cgroup/cgroup.c46
-rw-r--r--kernel/cgroup/cpuset.c4
-rw-r--r--kernel/cpu.c9
-rw-r--r--kernel/cred.c13
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/events/uprobes.c8
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/futex.c69
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/affinity.c12
-rw-r--r--kernel/irq/autoprobe.c6
-rw-r--r--kernel/irq/chip.c10
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/internals.h26
-rw-r--r--kernel/irq/irqdesc.c16
-rw-r--r--kernel/irq/irqdomain.c4
-rw-r--r--kernel/irq/manage.c90
-rw-r--r--kernel/irq/timings.c453
-rw-r--r--kernel/jump_label.c64
-rw-r--r--kernel/kexec_file.c9
-rw-r--r--kernel/locking/Makefile2
-rw-r--r--kernel/locking/lock_events.h45
-rw-r--r--kernel/locking/lock_events_list.h12
-rw-r--r--kernel/locking/lockdep.c742
-rw-r--r--kernel/locking/lockdep_internals.h36
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/locking/rwsem-xadd.c745
-rw-r--r--kernel/locking/rwsem.c1453
-rw-r--r--kernel/locking/rwsem.h306
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/energy_model.c2
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/rcu/rcu.h5
-rw-r--r--kernel/rcu/rcutorture.c96
-rw-r--r--kernel/rcu/srcutree.c69
-rw-r--r--kernel/rcu/sync.c214
-rw-r--r--kernel/rcu/tree.c164
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/tree_exp.h53
-rw-r--r--kernel/rcu/tree_plugin.h195
-rw-r--r--kernel/rcu/tree_stall.h4
-rw-r--r--kernel/rcu/update.c13
-rw-r--r--kernel/rseq.c4
-rw-r--r--kernel/sched/autogroup.c2
-rw-r--r--kernel/sched/core.c533
-rw-r--r--kernel/sched/cpudeadline.c4
-rw-r--r--kernel/sched/cpufreq_schedutil.c24
-rw-r--r--kernel/sched/cpupri.c4
-rw-r--r--kernel/sched/deadline.c8
-rw-r--r--kernel/sched/debug.c43
-rw-r--r--kernel/sched/fair.c628
-rw-r--r--kernel/sched/features.h1
-rw-r--r--kernel/sched/pelt.c13
-rw-r--r--kernel/sched/pelt.h2
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/sched/sched-pelt.h2
-rw-r--r--kernel/sched/sched.h134
-rw-r--r--kernel/sched/topology.c18
-rw-r--r--kernel/sched/wait.c8
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/signal.c251
-rw-r--r--kernel/smp.c12
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c19
-rw-r--r--kernel/sysctl.c16
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/alarmtimer.c1
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/hrtimer.c8
-rw-r--r--kernel/time/ntp.c4
-rw-r--r--kernel/time/posix-timers.c13
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/time.c4
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_list.c36
-rw-r--r--kernel/time/vsyscall.c133
-rw-r--r--kernel/torture.c23
-rw-r--r--kernel/trace/ftrace.c10
-rw-r--r--kernel/trace/trace.c24
-rw-r--r--kernel/trace/trace_hwlat.c2
-rw-r--r--kernel/up.c3
-rw-r--r--kernel/user.c8
-rw-r--r--kernel/user_namespace.c9
-rw-r--r--kernel/workqueue.c28
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug28
-rw-r--r--lib/Makefile2
-rw-r--r--lib/atomic64.c32
-rw-r--r--lib/crypto/Makefile4
-rw-r--r--lib/crypto/arc4.c74
-rw-r--r--lib/debugobjects.c321
-rw-r--r--lib/devres.c3
-rw-r--r--lib/digsig.c4
-rw-r--r--lib/mpi/mpi-pow.c6
-rw-r--r--lib/raid6/s390vx.uc2
-rw-r--r--lib/reed_solomon/Makefile2
-rw-r--r--lib/reed_solomon/decode_rs.c115
-rw-r--r--lib/reed_solomon/reed_solomon.c12
-rw-r--r--lib/reed_solomon/test_rslib.c518
-rw-r--r--lib/scatterlist.c9
-rw-r--r--lib/smp_processor_id.c2
-rw-r--r--lib/vdso/Kconfig36
-rw-r--r--lib/vdso/Makefile22
-rw-r--r--lib/vdso/gettimeofday.c239
-rw-r--r--mm/filemap.c146
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/khugepaged.c4
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_io.c13
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmalloc.c11
-rw-r--r--mm/vmscan.c27
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bpfilter/bpfilter_kern.c2
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/ceph/messenger.c3
-rw-r--r--net/core/net_namespace.c20
-rw-r--r--net/dns_resolver/dns_key.c13
-rw-r--r--net/dns_resolver/dns_query.c20
-rw-r--r--net/mac80211/Kconfig2
-rw-r--r--net/mac80211/cfg.c4
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/key.h1
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mlme.c3
-rw-r--r--net/mac80211/tkip.c8
-rw-r--r--net/mac80211/tkip.h4
-rw-r--r--net/mac80211/wep.c49
-rw-r--r--net/mac80211/wep.h5
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/rxrpc/key.c21
-rw-r--r--net/rxrpc/security.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c7
-rw-r--r--net/wireless/Kconfig2
-rw-r--r--net/wireless/lib80211_crypt_tkip.c48
-rw-r--r--net/wireless/lib80211_crypt_wep.c51
-rw-r--r--net/wireless/reg.c6
-rw-r--r--samples/trace_events/trace-events-sample.c2
-rwxr-xr-xscripts/atomic/check-atomics.sh2
-rw-r--r--security/apparmor/label.c8
-rw-r--r--security/device_cgroup.c2
-rw-r--r--security/integrity/digsig.c34
-rw-r--r--security/integrity/digsig_asymmetric.c6
-rw-r--r--security/integrity/evm/evm_crypto.c2
-rw-r--r--security/integrity/evm/evm_main.c8
-rw-r--r--security/integrity/ima/Kconfig3
-rw-r--r--security/integrity/ima/ima.h21
-rw-r--r--security/integrity/ima/ima_api.c38
-rw-r--r--security/integrity/ima/ima_appraise.c9
-rw-r--r--security/integrity/ima/ima_init.c6
-rw-r--r--security/integrity/ima/ima_main.c123
-rw-r--r--security/integrity/ima/ima_mok.c13
-rw-r--r--security/integrity/ima/ima_policy.c163
-rw-r--r--security/integrity/ima/ima_template.c23
-rw-r--r--security/integrity/ima/ima_template_lib.c21
-rw-r--r--security/integrity/ima/ima_template_lib.h4
-rw-r--r--security/integrity/integrity.h12
-rw-r--r--security/integrity/platform_certs/platform_keyring.c14
-rw-r--r--security/keys/Kconfig18
-rw-r--r--security/keys/compat.c8
-rw-r--r--security/keys/encrypted-keys/encrypted.c2
-rw-r--r--security/keys/encrypted-keys/masterkey_trusted.c2
-rw-r--r--security/keys/gc.c4
-rw-r--r--security/keys/internal.h37
-rw-r--r--security/keys/key.c65
-rw-r--r--security/keys/keyctl.c198
-rw-r--r--security/keys/keyring.c582
-rw-r--r--security/keys/permission.c361
-rw-r--r--security/keys/persistent.c37
-rw-r--r--security/keys/proc.c25
-rw-r--r--security/keys/process_keys.c399
-rw-r--r--security/keys/request_key.c238
-rw-r--r--security/keys/request_key_auth.c80
-rw-r--r--security/safesetid/lsm.c4
-rw-r--r--security/security.c23
-rw-r--r--security/selinux/hooks.c29
-rw-r--r--security/selinux/selinuxfs.c2
-rw-r--r--security/selinux/ss/ebitmap.c10
-rw-r--r--security/selinux/ss/services.c33
-rw-r--r--security/smack/smack_lsm.c3
-rw-r--r--sound/core/seq/oss/seq_oss_ioctl.c2
-rw-r--r--sound/core/seq/oss/seq_oss_rw.c2
-rw-r--r--sound/firewire/amdtp-am824.c2
-rw-r--r--sound/hda/hdac_device.c18
-rw-r--r--sound/pci/hda/patch_realtek.c8
-rw-r--r--sound/usb/line6/pcm.c5
-rw-r--r--sound/usb/mixer_quirks.c4
-rw-r--r--tools/gpio/.gitignore2
-rw-r--r--tools/include/linux/rcu.h4
-rw-r--r--tools/include/uapi/linux/bpf.h2
-rw-r--r--tools/memory-model/linux-kernel.bell6
-rw-r--r--tools/memory-model/linux-kernel.cat102
-rw-r--r--tools/memory-model/linux-kernel.def1
-rw-r--r--tools/memory-model/litmus-tests/MP+poonceonces.litmus2
-rw-r--r--tools/memory-model/litmus-tests/README2
-rw-r--r--tools/memory-model/lock.cat2
-rw-r--r--tools/memory-model/scripts/README4
-rwxr-xr-xtools/memory-model/scripts/checkalllitmus.sh2
-rwxr-xr-xtools/memory-model/scripts/checklitmus.sh2
-rw-r--r--tools/memory-model/scripts/parseargs.sh2
-rw-r--r--tools/memory-model/scripts/runlitmushist.sh2
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h2
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c1
-rw-r--r--tools/testing/selftests/rcutorture/Makefile3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh5
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot3
-rw-r--r--tools/testing/selftests/timers/freq-step.c6
-rw-r--r--tools/testing/selftests/x86/Makefile5
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c223
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c112
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c120
1346 files changed, 39534 insertions, 25480 deletions
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 74c6702de74e..fc376a323908 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -24,11 +24,11 @@ Description:
[euid=] [fowner=] [fsname=]]
lsm: [[subj_user=] [subj_role=] [subj_type=]
[obj_user=] [obj_role=] [obj_type=]]
- option: [[appraise_type=]] [permit_directio]
-
+ option: [[appraise_type=]] [template=] [permit_directio]
base: func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
[FIRMWARE_CHECK]
[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
+ [KEXEC_CMDLINE]
mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
[[^]MAY_EXEC]
fsmagic:= hex value
@@ -38,6 +38,8 @@ Description:
fowner:= decimal value
lsm: are LSM specific
option: appraise_type:= [imasig]
+ template:= name of a defined IMA template type
+ (eg, ima-ng). Only valid when action is "measure".
pcr:= decimal value
default policy:
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
index 2979c40c10e9..966f8504bd7b 100644
--- a/Documentation/ABI/testing/sysfs-bus-css
+++ b/Documentation/ABI/testing/sysfs-bus-css
@@ -33,3 +33,26 @@ Description: Contains the PIM/PAM/POM values, as reported by the
in sync with the values current in the channel subsystem).
Note: This is an I/O-subchannel specific attribute.
Users: s390-tools, HAL
+
+What: /sys/bus/css/devices/.../driver_override
+Date: June 2019
+Contact: Cornelia Huck <cohuck@redhat.com>
+ linux-s390@vger.kernel.org
+Description: This file allows the driver for a device to be specified. When
+ specified, only a driver with a name matching the value written
+ to driver_override will have an opportunity to bind to the
+ device. The override is specified by writing a string to the
+ driver_override file (echo vfio-ccw > driver_override) and
+ may be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device
+ will not bind to any driver. This also allows devices to
+ opt-out of driver binding using a driver_override name such as
+ "none". Only a single driver may be specified in the override,
+ there is no support for parsing delimiters.
+ Note that unlike the mechanism of the same name for pci, this
+ file does not allow to override basic matching rules. I.e.,
+ the driver must still match the subchannel type of the device.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 1528239f69b2..923fe2001472 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -538,3 +538,26 @@ Description: Intel Energy and Performance Bias Hint (EPB)
This attribute is present for all online CPUs supporting the
Intel EPB feature.
+
+What: /sys/devices/system/cpu/umwait_control
+ /sys/devices/system/cpu/umwait_control/enable_c02
+ /sys/devices/system/cpu/umwait_control/max_time
+Date: May 2019
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Umwait control
+
+ enable_c02: Read/write interface to control umwait C0.2 state
+ Read returns C0.2 state status:
+ 0: C0.2 is disabled
+ 1: C0.2 is enabled
+
+ Write 'y' or '1' or 'on' to enable C0.2 state.
+ Write 'n' or '0' or 'off' to disable C0.2 state.
+
+ The interface is case insensitive.
+
+ max_time: Read/write interface to control umwait maximum time
+ in TSC-quanta that the CPU can reside in either C0.1
+ or C0.2 state. The time is an unsigned 32-bit number.
+ Note that a value of zero means there is no limit.
+ Low order two bits must be zero.
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
index 613033ff2b9b..5e6429d66c24 100644
--- a/Documentation/RCU/rcuref.txt
+++ b/Documentation/RCU/rcuref.txt
@@ -12,6 +12,7 @@ please read on.
Reference counting on elements of lists which are protected by traditional
reader/writer spinlocks or semaphores are straightforward:
+CODE LISTING A:
1. 2.
add() search_and_reference()
{ {
@@ -28,7 +29,8 @@ add() search_and_reference()
release_referenced() delete()
{ {
... write_lock(&list_lock);
- atomic_dec(&el->rc, relfunc) ...
+ if(atomic_dec_and_test(&el->rc)) ...
+ kfree(el);
... remove_element
} write_unlock(&list_lock);
...
@@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which
has already been deleted from the list/array. Use atomic_inc_not_zero()
in this scenario as follows:
+CODE LISTING B:
1. 2.
add() search_and_reference()
{ {
@@ -79,6 +82,7 @@ search_and_reference() code path. In such cases, the
atomic_dec_and_test() may be moved from delete() to el_free()
as follows:
+CODE LISTING C:
1. 2.
add() search_and_reference()
{ {
@@ -114,6 +118,17 @@ element can therefore safely be freed. This in turn guarantees that if
any reader finds the element, that reader may safely acquire a reference
without checking the value of the reference counter.
+A clear advantage of the RCU-based pattern in listing C over the one
+in listing B is that any call to search_and_reference() that locates
+a given object will succeed in obtaining a reference to that object,
+even given a concurrent invocation of delete() for that same object.
+Similarly, a clear advantage of both listings B and C over listing A is
+that a call to delete() is not delayed even if there are an arbitrarily
+large number of calls to search_and_reference() searching for the same
+object that delete() was invoked on. Instead, all that is delayed is
+the eventual invocation of kfree(), which is usually not a problem on
+modern computer systems, even the small ones.
+
In cases where delete() can sleep, synchronize_rcu() can be called from
delete(), so that el_free() can be subsumed into delete as follows:
@@ -130,3 +145,7 @@ delete()
kfree(el);
...
}
+
+As additional examples in the kernel, the pattern in listing C is used by
+reference counting of struct pid, while the pattern in listing B is used by
+struct posix_acl.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 1ab70c37921f..13e88fc00f01 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout
This boot/sysfs parameter controls the RCU-tasks stall warning
interval. A value of zero or less suppresses RCU-tasks stall
warnings. A positive value sets the stall-warning interval
- in jiffies. An RCU-tasks stall warning starts with the line:
+ in seconds. An RCU-tasks stall warning starts with the line:
INFO: rcu_tasks detected stalls on tasks:
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 981651a8b65d..7e1a8721637a 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -212,7 +212,7 @@ synchronize_rcu()
rcu_assign_pointer()
- typeof(p) rcu_assign_pointer(p, typeof(p) v);
+ void rcu_assign_pointer(p, typeof(p) v);
Yes, rcu_assign_pointer() -is- implemented as a macro, though it
would be cool to be able to declare a function in this manner.
@@ -220,9 +220,9 @@ rcu_assign_pointer()
The updater uses this function to assign a new value to an
RCU-protected pointer, in order to safely communicate the change
- in value from the updater to the reader. This function returns
- the new value, and also executes any memory-barrier instructions
- required for a given CPU architecture.
+ in value from the updater to the reader. This macro does not
+ evaluate to an rvalue, but it does execute any memory-barrier
+ instructions required for a given CPU architecture.
Perhaps just as important, it serves to document (1) which
pointers are protected by RCU and (2) the point at which a
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index cf88c1f98270..a5c845338d6d 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -705,6 +705,12 @@ Conventions
informational files on the root cgroup which end up showing global
information available elsewhere shouldn't exist.
+- The default time unit is microseconds. If a different unit is ever
+ used, an explicit unit suffix must be present.
+
+- A parts-per quantity should use a percentage decimal with at least
+ two digit fractional part - e.g. 13.40.
+
- If a controller implements weight based resource distribution, its
interface file should be named "weight" and have the range [1,
10000] with 100 as the default. The values are chosen to allow
diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 31653a9f0e1b..656aee262e23 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -241,7 +241,7 @@ Guest mitigation mechanisms
For further information about confining guests to a single or to a group
of cores consult the cpusets documentation:
- https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+ https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.rst
.. _interrupt_isolation:
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..74d28efa1c40 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -478,7 +478,7 @@
others).
ccw_timeout_log [S390]
- See Documentation/s390/CommonIO for details.
+ See Documentation/s390/common_io.rst for details.
cgroup_disable= [KNL] Disable a particular controller
Format: {name of the controller(s) to disable}
@@ -516,7 +516,7 @@
/selinux/checkreqprot.
cio_ignore= [S390]
- See Documentation/s390/CommonIO for details.
+ See Documentation/s390/common_io.rst for details.
clk_ignore_unused
[CLK]
Prevents the clock framework from automatically gating
@@ -3752,6 +3752,12 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
+ rcutree.use_softirq= [KNL]
+ If set to zero, move all RCU_SOFTIRQ processing to
+ per-CPU rcuc kthreads. Defaults to a non-zero
+ value, meaning that RCU_SOFTIRQ is used by default.
+ Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
rcutree.rcu_fanout_exact= [KNL]
Disable autobalancing of the rcu_node combining
tree. This is used by rcutorture, and might
@@ -4078,7 +4084,7 @@
relax_domain_level=
[KNL, SMP] Set scheduler's default relax_domain_level.
- See Documentation/cgroup-v1/cpusets.txt.
+ See Documentation/cgroup-v1/cpusets.rst.
reserve= [KNL,BUGS] Force kernel to ignore I/O ports or memory
Format: <base1>,<size1>[,<base2>,<size2>,...]
@@ -4588,7 +4594,7 @@
swapaccount=[0|1]
[KNL] Enable accounting of swap in memory resource
controller if no parameter or 1 is given or disable
- it if 0 is given (See Documentation/cgroup-v1/memory.txt)
+ it if 0 is given (See Documentation/cgroup-v1/memory.rst)
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
Format: { <int> | force | noforce }
@@ -5100,13 +5106,12 @@
targets for exploits that can control RIP.
emulate [default] Vsyscalls turn into traps and are
- emulated reasonably safely.
+ emulated reasonably safely. The vsyscall
+ page is readable.
- native Vsyscalls are native syscall instructions.
- This is a little bit faster than trapping
- and makes a few dynamic recompilers work
- better than they would in emulation mode.
- It also makes exploits much easier to write.
+ xonly Vsyscalls turn into traps and are
+ emulated reasonably safely. The vsyscall
+ page is not readable.
none Vsyscalls don't work at all. This makes
them quite hard to use for exploits but
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
index d78c5b315f72..546f174e5d6a 100644
--- a/Documentation/admin-guide/mm/numa_memory_policy.rst
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -15,7 +15,7 @@ document attempts to describe the concepts and APIs of the 2.6 memory policy
support.
Memory policies should not be confused with cpusets
-(``Documentation/cgroup-v1/cpusets.txt``)
+(``Documentation/cgroup-v1/cpusets.rst``)
which is an administrative mechanism for restricting the nodes from which
memory may be allocated by a set of processes. Memory policies are a
programming interface that a NUMA-aware application can take advantage of. When
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index b73a2519ecf2..5ae2ef2c12f3 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -207,6 +207,10 @@ HWCAP_FLAGM
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+HWCAP2_FLAGM2
+
+ Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+
HWCAP_SSBS
Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
@@ -223,6 +227,10 @@ HWCAP_PACG
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arm64/pointer-authentication.txt.
+HWCAP2_FRINT
+
+ Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
+
4. Unused AT_HWCAP bits
-----------------------
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index dca3fb0554db..0ab747e0d5ac 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -81,9 +81,11 @@ Non-RMW ops:
The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
-smp_store_release() respectively.
+smp_store_release() respectively. Therefore, if you find yourself only using
+the Non-RMW operations of atomic_t, you do not in fact need atomic_t at all
+and are doing it wrong.
-The one detail to this is that atomic_set{}() should be observable to the RMW
+A subtle detail of atomic_set{}() is that it should be observable to the RMW
ops. That is:
C atomic-set
@@ -187,13 +189,22 @@ The barriers:
smp_mb__{before,after}_atomic()
-only apply to the RMW ops and can be used to augment/upgrade the ordering
-inherent to the used atomic op. These barriers provide a full smp_mb().
+only apply to the RMW atomic ops and can be used to augment/upgrade the
+ordering inherent to the op. These barriers act almost like a full smp_mb():
+smp_mb__before_atomic() orders all earlier accesses against the RMW op
+itself and all accesses following it, and smp_mb__after_atomic() orders all
+later accesses against the RMW op and all accesses preceding it. However,
+accesses between the smp_mb__{before,after}_atomic() and the RMW op are not
+ordered, so it is advisable to place the barrier right next to the RMW atomic
+op whenever possible.
These helper barriers exist because architectures have varying implicit
ordering on their SMP atomic primitives. For example our TSO architectures
provide full ordered atomics and these barriers are no-ops.
+NOTE: when the atomic RmW ops are fully ordered, they should also imply a
+compiler barrier.
+
Thus:
atomic_fetch_add();
@@ -212,7 +223,9 @@ Further, while something like:
atomic_dec(&X);
is a 'typical' RELEASE pattern, the barrier is strictly stronger than
-a RELEASE. Similarly for something like:
+a RELEASE because it orders preceding instructions against both the read
+and write parts of the atomic_dec(), and against all following instructions
+as well. Similarly, something like:
atomic_inc(&X);
smp_mb__after_atomic();
@@ -244,7 +257,8 @@ strictly stronger than ACQUIRE. As illustrated:
This should not happen; but a hypothetical atomic_inc_acquire() --
(void)atomic_fetch_inc_acquire() for instance -- would allow the outcome,
-since then:
+because it would not order the W part of the RMW against the following
+WRITE_ONCE. Thus:
P1 P2
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 1a0f2ac02eb6..b2265cf6c9c3 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -539,7 +539,7 @@ As for cgroups-v1 (blkio controller), the exact set of stat files
created, and kept up-to-date by bfq, depends on whether
CONFIG_DEBUG_BLK_CGROUP is set. If it is set, then bfq creates all
the stat files documented in
-Documentation/cgroup-v1/blkio-controller.txt. If, instead,
+Documentation/cgroup-v1/blkio-controller.rst. If, instead,
CONFIG_DEBUG_BLK_CGROUP is not set, then bfq creates only the files
blkio.bfq.io_service_bytes
blkio.bfq.io_service_bytes_recursive
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.rst
index d1a1b7bdd03a..fd3184537d23 100644
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ b/Documentation/cgroup-v1/blkio-controller.rst
@@ -1,5 +1,7 @@
- Block IO Controller
- ===================
+===================
+Block IO Controller
+===================
+
Overview
========
cgroup subsys "blkio" implements the block io controller. There seems to be
@@ -17,24 +19,27 @@ HOWTO
=====
Throttling/Upper Limit policy
-----------------------------
-- Enable Block IO controller
+- Enable Block IO controller::
+
CONFIG_BLK_CGROUP=y
-- Enable throttling in block layer
+- Enable throttling in block layer::
+
CONFIG_BLK_DEV_THROTTLING=y
-- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
+
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
- Specify a bandwidth rate on particular device for root group. The format
- for policy is "<major>:<minor> <bytes_per_second>".
+ for policy is "<major>:<minor> <bytes_per_second>"::
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
Above will put a limit of 1MB/second on reads happening for root group
on device having major/minor number 8:16.
-- Run dd to read a file and see if rate is throttled to 1MB/s or not.
+- Run dd to read a file and see if rate is throttled to 1MB/s or not::
# dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
1024+0 records in
@@ -51,7 +56,7 @@ throttling's hierarchy support is enabled iff "sane_behavior" is
enabled from cgroup side, which currently is a development option and
not publicly available.
-If somebody created a hierarchy like as follows.
+If somebody created a hierarchy like as follows::
root
/ \
@@ -66,7 +71,7 @@ directly generated by tasks in that cgroup.
Throttling without "sane_behavior" enabled from cgroup side will
practically treat all groups at same level as if it looks like the
-following.
+following::
pivot
/ / \ \
@@ -99,27 +104,31 @@ Proportional weight policy files
These rules override the default value of group weight as specified
by blkio.weight.
- Following is the format.
+ Following is the format::
+
+ # echo dev_maj:dev_minor weight > blkio.weight_device
+
+ Configure weight=300 on /dev/sdb (8:16) in this cgroup::
+
+ # echo 8:16 300 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:16 300
+
+ Configure weight=500 on /dev/sda (8:0) in this cgroup::
- # echo dev_maj:dev_minor weight > blkio.weight_device
- Configure weight=300 on /dev/sdb (8:16) in this cgroup
- # echo 8:16 300 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:16 300
+ # echo 8:0 500 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:0 500
+ 8:16 300
- Configure weight=500 on /dev/sda (8:0) in this cgroup
- # echo 8:0 500 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:0 500
- 8:16 300
+ Remove specific weight for /dev/sda in this cgroup::
- Remove specific weight for /dev/sda in this cgroup
- # echo 8:0 0 > blkio.weight_device
- # cat blkio.weight_device
- dev weight
- 8:16 300
+ # echo 8:0 0 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:16 300
- blkio.leaf_weight[_device]
- Equivalents of blkio.weight[_device] for the purpose of
@@ -244,30 +253,30 @@ Throttling/Upper limit policy files
- blkio.throttle.read_bps_device
- Specifies upper limit on READ rate from the device. IO rate is
specified in bytes per second. Rules are per device. Following is
- the format.
+ the format::
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
- blkio.throttle.write_bps_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in bytes per second. Rules are per device. Following is
- the format.
+ the format::
- echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
- blkio.throttle.read_iops_device
- Specifies upper limit on READ rate from the device. IO rate is
specified in IO per second. Rules are per device. Following is
- the format.
+ the format::
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
- blkio.throttle.write_iops_device
- Specifies upper limit on WRITE rate to the device. IO rate is
specified in io per second. Rules are per device. Following is
- the format.
+ the format::
- echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
Note: If both BW and IOPS rules are specified for a device, then IO is
subjected to both the constraints.
diff --git a/Documentation/cgroup-v1/cgroups.txt b/Documentation/cgroup-v1/cgroups.rst
index 059f7063eea6..46bbe7e022d4 100644
--- a/Documentation/cgroup-v1/cgroups.txt
+++ b/Documentation/cgroup-v1/cgroups.rst
@@ -1,35 +1,39 @@
- CGROUPS
- -------
+==============
+Control Groups
+==============
Written by Paul Menage <menage@google.com> based on
-Documentation/cgroup-v1/cpusets.txt
+Documentation/cgroup-v1/cpusets.rst
Original copyright statements from cpusets.txt:
+
Portions Copyright (C) 2004 BULL SA.
+
Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+
Modified by Paul Jackson <pj@sgi.com>
+
Modified by Christoph Lameter <cl@linux.com>
-CONTENTS:
-=========
-
-1. Control Groups
- 1.1 What are cgroups ?
- 1.2 Why are cgroups needed ?
- 1.3 How are cgroups implemented ?
- 1.4 What does notify_on_release do ?
- 1.5 What does clone_children do ?
- 1.6 How do I use cgroups ?
-2. Usage Examples and Syntax
- 2.1 Basic Usage
- 2.2 Attaching processes
- 2.3 Mounting hierarchies by name
-3. Kernel API
- 3.1 Overview
- 3.2 Synchronization
- 3.3 Subsystem API
-4. Extended attributes usage
-5. Questions
+.. CONTENTS:
+
+ 1. Control Groups
+ 1.1 What are cgroups ?
+ 1.2 Why are cgroups needed ?
+ 1.3 How are cgroups implemented ?
+ 1.4 What does notify_on_release do ?
+ 1.5 What does clone_children do ?
+ 1.6 How do I use cgroups ?
+ 2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Attaching processes
+ 2.3 Mounting hierarchies by name
+ 3. Kernel API
+ 3.1 Overview
+ 3.2 Synchronization
+ 3.3 Subsystem API
+ 4. Extended attributes usage
+ 5. Questions
1. Control Groups
=================
@@ -72,7 +76,7 @@ On their own, the only use for cgroups is for simple job
tracking. The intention is that other subsystems hook into the generic
cgroup support to provide new attributes for cgroups, such as
accounting/limiting the resources which processes in a cgroup can
-access. For example, cpusets (see Documentation/cgroup-v1/cpusets.txt) allow
+access. For example, cpusets (see Documentation/cgroup-v1/cpusets.rst) allow
you to associate a set of CPUs and a set of memory nodes with the
tasks in each cgroup.
@@ -108,7 +112,7 @@ As an example of a scenario (originally proposed by vatsa@in.ibm.com)
that can benefit from multiple hierarchies, consider a large
university server with various users - students, professors, system
tasks etc. The resource planning for this server could be along the
-following lines:
+following lines::
CPU : "Top cpuset"
/ \
@@ -136,7 +140,7 @@ depending on who launched it (prof/student).
With the ability to classify tasks differently for different resources
(by putting those resource subsystems in different hierarchies),
the admin can easily set up a script which receives exec notifications
-and depending on who is launching the browser he can
+and depending on who is launching the browser he can::
# echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
@@ -151,7 +155,7 @@ wants to do online gaming :)) OR give one of the student's simulation
apps enhanced CPU power.
With ability to write PIDs directly to resource classes, it's just a
-matter of:
+matter of::
# echo pid > /sys/fs/cgroup/network/<new_class>/tasks
(after some time)
@@ -306,7 +310,7 @@ configuration from the parent during initialization.
--------------------------
To start a new job that is to be contained within a cgroup, using
-the "cpuset" cgroup subsystem, the steps are something like:
+the "cpuset" cgroup subsystem, the steps are something like::
1) mount -t tmpfs cgroup_root /sys/fs/cgroup
2) mkdir /sys/fs/cgroup/cpuset
@@ -320,7 +324,7 @@ the "cpuset" cgroup subsystem, the steps are something like:
For example, the following sequence of commands will setup a cgroup
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cgroup:
+and then start a subshell 'sh' in that cgroup::
mount -t tmpfs cgroup_root /sys/fs/cgroup
mkdir /sys/fs/cgroup/cpuset
@@ -345,8 +349,9 @@ and then start a subshell 'sh' in that cgroup:
Creating, modifying, using cgroups can be done through the cgroup
virtual filesystem.
-To mount a cgroup hierarchy with all available subsystems, type:
-# mount -t cgroup xxx /sys/fs/cgroup
+To mount a cgroup hierarchy with all available subsystems, type::
+
+ # mount -t cgroup xxx /sys/fs/cgroup
The "xxx" is not interpreted by the cgroup code, but will appear in
/proc/mounts so may be any useful identifying string that you like.
@@ -355,18 +360,19 @@ Note: Some subsystems do not work without some user input first. For instance,
if cpusets are enabled the user will have to populate the cpus and mems files
for each new cgroup created before that group can be used.
-As explained in section `1.2 Why are cgroups needed?' you should create
+As explained in section `1.2 Why are cgroups needed?` you should create
different hierarchies of cgroups for each single resource or group of
resources you want to control. Therefore, you should mount a tmpfs on
/sys/fs/cgroup and create directories for each cgroup resource or resource
-group.
+group::
-# mount -t tmpfs cgroup_root /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/rg1
+ # mount -t tmpfs cgroup_root /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/rg1
To mount a cgroup hierarchy with just the cpuset and memory
-subsystems, type:
-# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
+subsystems, type::
+
+ # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
While remounting cgroups is currently supported, it is not recommend
to use it. Remounting allows changing bound subsystems and
@@ -375,9 +381,10 @@ hierarchy is empty and release_agent itself should be replaced with
conventional fsnotify. The support for remounting will be removed in
the future.
-To Specify a hierarchy's release_agent:
-# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
- xxx /sys/fs/cgroup/rg1
+To Specify a hierarchy's release_agent::
+
+ # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+ xxx /sys/fs/cgroup/rg1
Note that specifying 'release_agent' more than once will return failure.
@@ -390,32 +397,39 @@ Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
is the cgroup that holds the whole system.
-If you want to change the value of release_agent:
-# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
+If you want to change the value of release_agent::
+
+ # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
It can also be changed via remount.
-If you want to create a new cgroup under /sys/fs/cgroup/rg1:
-# cd /sys/fs/cgroup/rg1
-# mkdir my_cgroup
+If you want to create a new cgroup under /sys/fs/cgroup/rg1::
+
+ # cd /sys/fs/cgroup/rg1
+ # mkdir my_cgroup
+
+Now you want to do something with this cgroup:
+
+ # cd my_cgroup
-Now you want to do something with this cgroup.
-# cd my_cgroup
+In this directory you can find several files::
-In this directory you can find several files:
-# ls
-cgroup.procs notify_on_release tasks
-(plus whatever files added by the attached subsystems)
+ # ls
+ cgroup.procs notify_on_release tasks
+ (plus whatever files added by the attached subsystems)
-Now attach your shell to this cgroup:
-# /bin/echo $$ > tasks
+Now attach your shell to this cgroup::
+
+ # /bin/echo $$ > tasks
You can also create cgroups inside your cgroup by using mkdir in this
-directory.
-# mkdir my_sub_cs
+directory::
+
+ # mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir::
-To remove a cgroup, just use rmdir:
-# rmdir my_sub_cs
+ # rmdir my_sub_cs
This will fail if the cgroup is in use (has cgroups inside, or
has processes attached, or is held alive by other subsystem-specific
@@ -424,19 +438,21 @@ reference).
2.2 Attaching processes
-----------------------
-# /bin/echo PID > tasks
+::
+
+ # /bin/echo PID > tasks
Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
+If you have several tasks to attach, you have to do it one after another::
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
- ...
-# /bin/echo PIDn > tasks
+ # /bin/echo PID1 > tasks
+ # /bin/echo PID2 > tasks
+ ...
+ # /bin/echo PIDn > tasks
-You can attach the current shell task by echoing 0:
+You can attach the current shell task by echoing 0::
-# echo 0 > tasks
+ # echo 0 > tasks
You can use the cgroup.procs file instead of the tasks file to move all
threads in a threadgroup at once. Echoing the PID of any task in a
@@ -529,7 +545,7 @@ Each subsystem may export the following methods. The only mandatory
methods are css_alloc/free. Any others that are null are presumed to
be successful no-ops.
-struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)
+``struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)``
(cgroup_mutex held by caller)
Called to allocate a subsystem state object for a cgroup. The
@@ -544,7 +560,7 @@ identified by the passed cgroup object having a NULL parent (since
it's the root of the hierarchy) and may be an appropriate place for
initialization code.
-int css_online(struct cgroup *cgrp)
+``int css_online(struct cgroup *cgrp)``
(cgroup_mutex held by caller)
Called after @cgrp successfully completed all allocations and made
@@ -554,7 +570,7 @@ callback can be used to implement reliable state sharing and
propagation along the hierarchy. See the comment on
cgroup_for_each_descendant_pre() for details.
-void css_offline(struct cgroup *cgrp);
+``void css_offline(struct cgroup *cgrp);``
(cgroup_mutex held by caller)
This is the counterpart of css_online() and called iff css_online()
@@ -564,7 +580,7 @@ all references it's holding on @cgrp. When all references are dropped,
cgroup removal will proceed to the next step - css_free(). After this
callback, @cgrp should be considered dead to the subsystem.
-void css_free(struct cgroup *cgrp)
+``void css_free(struct cgroup *cgrp)``
(cgroup_mutex held by caller)
The cgroup system is about to free @cgrp; the subsystem should free
@@ -573,7 +589,7 @@ is completely unused; @cgrp->parent is still valid. (Note - can also
be called for a newly-created cgroup if an error occurs after this
subsystem's create() method has been called for the new cgroup).
-int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+``int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
(cgroup_mutex held by caller)
Called prior to moving one or more tasks into a cgroup; if the
@@ -594,7 +610,7 @@ fork. If this method returns 0 (success) then this should remain valid
while the caller holds cgroup_mutex and it is ensured that either
attach() or cancel_attach() will be called in future.
-void css_reset(struct cgroup_subsys_state *css)
+``void css_reset(struct cgroup_subsys_state *css)``
(cgroup_mutex held by caller)
An optional operation which should restore @css's configuration to the
@@ -608,7 +624,7 @@ This prevents unexpected resource control from a hidden css and
ensures that the configuration is in the initial state when it is made
visible again later.
-void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+``void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
(cgroup_mutex held by caller)
Called when a task attach operation has failed after can_attach() has succeeded.
@@ -617,26 +633,26 @@ function, so that the subsystem can implement a rollback. If not, not necessary.
This will be called only about subsystems whose can_attach() operation have
succeeded. The parameters are identical to can_attach().
-void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+``void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)``
(cgroup_mutex held by caller)
Called after the task has been attached to the cgroup, to allow any
post-attachment activity that requires memory allocations or blocking.
The parameters are identical to can_attach().
-void fork(struct task_struct *task)
+``void fork(struct task_struct *task)``
Called when a task is forked into a cgroup.
-void exit(struct task_struct *task)
+``void exit(struct task_struct *task)``
Called during task exit.
-void free(struct task_struct *task)
+``void free(struct task_struct *task)``
Called when the task_struct is freed.
-void bind(struct cgroup *root)
+``void bind(struct cgroup *root)``
(cgroup_mutex held by caller)
Called when a cgroup subsystem is rebound to a different hierarchy
@@ -649,6 +665,7 @@ that is being created/destroyed (and hence has no sub-cgroups).
cgroup filesystem supports certain types of extended attributes in its
directories and files. The current supported types are:
+
- Trusted (XATTR_TRUSTED)
- Security (XATTR_SECURITY)
@@ -666,12 +683,13 @@ in containers and systemd for assorted meta data like main PID in a cgroup
5. Questions
============
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
- errors. If you use it in the cgroup file system, you won't be
- able to tell whether a command succeeded or failed.
+::
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
- put only ONE PID.
+ Q: what's up with this '/bin/echo' ?
+ A: bash's builtin 'echo' command does not check calls to write() against
+ errors. If you use it in the cgroup file system, you won't be
+ able to tell whether a command succeeded or failed.
+ Q: When I attach processes, only the first of the line gets really attached !
+ A: We can only return one error code per call to write(). So you should also
+ put only ONE PID.
diff --git a/Documentation/cgroup-v1/cpuacct.txt b/Documentation/cgroup-v1/cpuacct.rst
index 9d73cc0cadb9..d30ed81d2ad7 100644
--- a/Documentation/cgroup-v1/cpuacct.txt
+++ b/Documentation/cgroup-v1/cpuacct.rst
@@ -1,5 +1,6 @@
+=========================
CPU Accounting Controller
--------------------------
+=========================
The CPU accounting controller is used to group tasks using cgroups and
account the CPU usage of these groups of tasks.
@@ -8,9 +9,9 @@ The CPU accounting controller supports multi-hierarchy groups. An accounting
group accumulates the CPU usage of all of its child groups and the tasks
directly present in its group.
-Accounting groups can be created by first mounting the cgroup filesystem.
+Accounting groups can be created by first mounting the cgroup filesystem::
-# mount -t cgroup -ocpuacct none /sys/fs/cgroup
+ # mount -t cgroup -ocpuacct none /sys/fs/cgroup
With the above step, the initial or the parent accounting group becomes
visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
@@ -19,11 +20,11 @@ the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
by this group which is essentially the CPU time obtained by all the tasks
in the system.
-New accounting groups can be created under the parent group /sys/fs/cgroup.
+New accounting groups can be created under the parent group /sys/fs/cgroup::
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
+ # cd /sys/fs/cgroup
+ # mkdir g1
+ # echo $$ > g1/tasks
The above steps create a new group g1 and move the current shell
process (bash) into it. CPU time consumed by this bash and its children
diff --git a/Documentation/cgroup-v1/cpusets.txt b/Documentation/cgroup-v1/cpusets.rst
index 8402dd6de8df..b6a42cdea72b 100644
--- a/Documentation/cgroup-v1/cpusets.txt
+++ b/Documentation/cgroup-v1/cpusets.rst
@@ -1,35 +1,36 @@
- CPUSETS
- -------
+=======
+CPUSETS
+=======
Copyright (C) 2004 BULL SA.
-Written by Simon.Derr@bull.net
-
-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
-Modified by Paul Jackson <pj@sgi.com>
-Modified by Christoph Lameter <cl@linux.com>
-Modified by Paul Menage <menage@google.com>
-Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
-CONTENTS:
-=========
+Written by Simon.Derr@bull.net
-1. Cpusets
- 1.1 What are cpusets ?
- 1.2 Why are cpusets needed ?
- 1.3 How are cpusets implemented ?
- 1.4 What are exclusive cpusets ?
- 1.5 What is memory_pressure ?
- 1.6 What is memory spread ?
- 1.7 What is sched_load_balance ?
- 1.8 What is sched_relax_domain_level ?
- 1.9 How do I use cpusets ?
-2. Usage Examples and Syntax
- 2.1 Basic Usage
- 2.2 Adding/removing cpus
- 2.3 Setting flags
- 2.4 Attaching processes
-3. Questions
-4. Contact
+- Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+- Modified by Paul Jackson <pj@sgi.com>
+- Modified by Christoph Lameter <cl@linux.com>
+- Modified by Paul Menage <menage@google.com>
+- Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+
+.. CONTENTS:
+
+ 1. Cpusets
+ 1.1 What are cpusets ?
+ 1.2 Why are cpusets needed ?
+ 1.3 How are cpusets implemented ?
+ 1.4 What are exclusive cpusets ?
+ 1.5 What is memory_pressure ?
+ 1.6 What is memory spread ?
+ 1.7 What is sched_load_balance ?
+ 1.8 What is sched_relax_domain_level ?
+ 1.9 How do I use cpusets ?
+ 2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Adding/removing cpus
+ 2.3 Setting flags
+ 2.4 Attaching processes
+ 3. Questions
+ 4. Contact
1. Cpusets
==========
@@ -48,7 +49,7 @@ hooks, beyond what is already present, required to manage dynamic
job placement on large systems.
Cpusets use the generic cgroup subsystem described in
-Documentation/cgroup-v1/cgroups.txt.
+Documentation/cgroup-v1/cgroups.rst.
Requests by a task, using the sched_setaffinity(2) system call to
include CPUs in its CPU affinity mask, and using the mbind(2) and
@@ -157,7 +158,7 @@ modifying cpusets is via this cpuset file system.
The /proc/<pid>/status file for each task has four added lines,
displaying the task's cpus_allowed (on which CPUs it may be scheduled)
and mems_allowed (on which Memory Nodes it may obtain memory),
-in the two formats seen in the following example:
+in the two formats seen in the following example::
Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
Cpus_allowed_list: 0-127
@@ -181,6 +182,7 @@ files describing that cpuset:
- cpuset.sched_relax_domain_level: the searching range when migrating tasks
In addition, only the root cpuset has the following file:
+
- cpuset.memory_pressure_enabled flag: compute memory_pressure?
New cpusets are created using the mkdir system call or shell
@@ -266,7 +268,8 @@ to monitor a cpuset for signs of memory pressure. It's up to the
batch manager or other user code to decide what to do about it and
take action.
-==> Unless this feature is enabled by writing "1" to the special file
+==>
+ Unless this feature is enabled by writing "1" to the special file
/dev/cpuset/memory_pressure_enabled, the hook in the rebalance
code of __alloc_pages() for this metric reduces to simply noticing
that the cpuset_memory_pressure_enabled flag is zero. So only
@@ -399,6 +402,7 @@ have tasks running on them unless explicitly assigned.
This default load balancing across all CPUs is not well suited for
the following two situations:
+
1) On large systems, load balancing across many CPUs is expensive.
If the system is managed using cpusets to place independent jobs
on separate sets of CPUs, full load balancing is unnecessary.
@@ -501,6 +505,7 @@ all the CPUs that must be load balanced.
The cpuset code builds a new such partition and passes it to the
scheduler sched domain setup code, to have the sched domains rebuilt
as necessary, whenever:
+
- the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
- or CPUs come or go from a cpuset with this flag enabled,
- or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
@@ -553,13 +558,15 @@ this searching range as you like. This file takes int value which
indicates size of searching range in levels ideally as follows,
otherwise initial value -1 that indicates the cpuset has no request.
- -1 : no request. use system default or follow request of others.
- 0 : no search.
- 1 : search siblings (hyperthreads in a core).
- 2 : search cores in a package.
- 3 : search cpus in a node [= system wide on non-NUMA system]
- 4 : search nodes in a chunk of node [on NUMA system]
- 5 : search system wide [on NUMA system]
+====== ===========================================================
+ -1 no request. use system default or follow request of others.
+ 0 no search.
+ 1 search siblings (hyperthreads in a core).
+ 2 search cores in a package.
+ 3 search cpus in a node [= system wide on non-NUMA system]
+ 4 search nodes in a chunk of node [on NUMA system]
+ 5 search system wide [on NUMA system]
+====== ===========================================================
The system default is architecture dependent. The system default
can be changed using the relax_domain_level= boot parameter.
@@ -578,13 +585,14 @@ and whether it is acceptable or not depends on your situation.
Don't modify this file if you are not sure.
If your situation is:
+
- The migration costs between each cpu can be assumed considerably
small(for you) due to your special application's behavior or
special hardware support for CPU cache etc.
- The searching cost doesn't have impact(for you) or you can make
the searching cost enough small by managing cpuset to compact etc.
- The latency is required even it sacrifices cache hit rate etc.
-then increasing 'sched_relax_domain_level' would benefit you.
+ then increasing 'sched_relax_domain_level' would benefit you.
1.9 How do I use cpusets ?
@@ -678,7 +686,7 @@ To start a new job that is to be contained within a cpuset, the steps are:
For example, the following sequence of commands will setup a cpuset
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
-and then start a subshell 'sh' in that cpuset:
+and then start a subshell 'sh' in that cpuset::
mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
cd /sys/fs/cgroup/cpuset
@@ -693,6 +701,7 @@ and then start a subshell 'sh' in that cpuset:
cat /proc/self/cpuset
There are ways to query or modify cpusets:
+
- via the cpuset file system directly, using the various cd, mkdir, echo,
cat, rmdir commands from the shell, or their equivalent from C.
- via the C library libcpuset.
@@ -722,115 +731,133 @@ Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
is the cpuset that holds the whole system.
-If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
-# cd /sys/fs/cgroup/cpuset
-# mkdir my_cpuset
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset::
+
+ # cd /sys/fs/cgroup/cpuset
+ # mkdir my_cpuset
-Now you want to do something with this cpuset.
-# cd my_cpuset
+Now you want to do something with this cpuset::
-In this directory you can find several files:
-# ls
-cgroup.clone_children cpuset.memory_pressure
-cgroup.event_control cpuset.memory_spread_page
-cgroup.procs cpuset.memory_spread_slab
-cpuset.cpu_exclusive cpuset.mems
-cpuset.cpus cpuset.sched_load_balance
-cpuset.mem_exclusive cpuset.sched_relax_domain_level
-cpuset.mem_hardwall notify_on_release
-cpuset.memory_migrate tasks
+ # cd my_cpuset
+
+In this directory you can find several files::
+
+ # ls
+ cgroup.clone_children cpuset.memory_pressure
+ cgroup.event_control cpuset.memory_spread_page
+ cgroup.procs cpuset.memory_spread_slab
+ cpuset.cpu_exclusive cpuset.mems
+ cpuset.cpus cpuset.sched_load_balance
+ cpuset.mem_exclusive cpuset.sched_relax_domain_level
+ cpuset.mem_hardwall notify_on_release
+ cpuset.memory_migrate tasks
Reading them will give you information about the state of this cpuset:
the CPUs and Memory Nodes it can use, the processes that are using
it, its properties. By writing to these files you can manipulate
the cpuset.
-Set some flags:
-# /bin/echo 1 > cpuset.cpu_exclusive
+Set some flags::
+
+ # /bin/echo 1 > cpuset.cpu_exclusive
+
+Add some cpus::
+
+ # /bin/echo 0-7 > cpuset.cpus
+
+Add some mems::
-Add some cpus:
-# /bin/echo 0-7 > cpuset.cpus
+ # /bin/echo 0-7 > cpuset.mems
-Add some mems:
-# /bin/echo 0-7 > cpuset.mems
+Now attach your shell to this cpuset::
-Now attach your shell to this cpuset:
-# /bin/echo $$ > tasks
+ # /bin/echo $$ > tasks
You can also create cpusets inside your cpuset by using mkdir in this
-directory.
-# mkdir my_sub_cs
+directory::
+
+ # mkdir my_sub_cs
+
+To remove a cpuset, just use rmdir::
+
+ # rmdir my_sub_cs
-To remove a cpuset, just use rmdir:
-# rmdir my_sub_cs
This will fail if the cpuset is in use (has cpusets inside, or has
processes attached).
Note that for legacy reasons, the "cpuset" filesystem exists as a
wrapper around the cgroup filesystem.
-The command
+The command::
-mount -t cpuset X /sys/fs/cgroup/cpuset
+ mount -t cpuset X /sys/fs/cgroup/cpuset
-is equivalent to
+is equivalent to::
-mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
-echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
+ mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+ echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
2.2 Adding/removing cpus
------------------------
This is the syntax to use when writing in the cpus or mems files
-in cpuset directories:
+in cpuset directories::
-# /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
-# /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+ # /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+ # /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
To add a CPU to a cpuset, write the new list of CPUs including the
-CPU to be added. To add 6 to the above cpuset:
+CPU to be added. To add 6 to the above cpuset::
-# /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6
+ # /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6
Similarly to remove a CPU from a cpuset, write the new list of CPUs
without the CPU to be removed.
-To remove all the CPUs:
+To remove all the CPUs::
-# /bin/echo "" > cpuset.cpus -> clear cpus list
+ # /bin/echo "" > cpuset.cpus -> clear cpus list
2.3 Setting flags
-----------------
-The syntax is very simple:
+The syntax is very simple::
-# /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive'
-# /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive'
+ # /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive'
+ # /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive'
2.4 Attaching processes
-----------------------
-# /bin/echo PID > tasks
+::
+
+ # /bin/echo PID > tasks
Note that it is PID, not PIDs. You can only attach ONE task at a time.
-If you have several tasks to attach, you have to do it one after another:
+If you have several tasks to attach, you have to do it one after another::
-# /bin/echo PID1 > tasks
-# /bin/echo PID2 > tasks
+ # /bin/echo PID1 > tasks
+ # /bin/echo PID2 > tasks
...
-# /bin/echo PIDn > tasks
+ # /bin/echo PIDn > tasks
3. Questions
============
-Q: what's up with this '/bin/echo' ?
-A: bash's builtin 'echo' command does not check calls to write() against
+Q:
+ what's up with this '/bin/echo' ?
+
+A:
+ bash's builtin 'echo' command does not check calls to write() against
errors. If you use it in the cpuset file system, you won't be
able to tell whether a command succeeded or failed.
-Q: When I attach processes, only the first of the line gets really attached !
-A: We can only return one error code per call to write(). So you should also
+Q:
+ When I attach processes, only the first of the line gets really attached !
+
+A:
+ We can only return one error code per call to write(). So you should also
put only ONE pid.
4. Contact
diff --git a/Documentation/cgroup-v1/devices.txt b/Documentation/cgroup-v1/devices.rst
index 3c1095ca02ea..e1886783961e 100644
--- a/Documentation/cgroup-v1/devices.txt
+++ b/Documentation/cgroup-v1/devices.rst
@@ -1,6 +1,9 @@
+===========================
Device Whitelist Controller
+===========================
-1. Description:
+1. Description
+==============
Implement a cgroup to track and enforce open and mknod restrictions
on device files. A device cgroup associates a device access
@@ -16,24 +19,26 @@ devices from the whitelist or add new entries. A child cgroup can
never receive a device access which is denied by its parent.
2. User Interface
+=================
An entry is added using devices.allow, and removed using
-devices.deny. For instance
+devices.deny. For instance::
echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
allows cgroup 1 to read and mknod the device usually known as
-/dev/null. Doing
+/dev/null. Doing::
echo a > /sys/fs/cgroup/1/devices.deny
-will remove the default 'a *:* rwm' entry. Doing
+will remove the default 'a *:* rwm' entry. Doing::
echo a > /sys/fs/cgroup/1/devices.allow
will add the 'a *:* rwm' entry to the whitelist.
3. Security
+===========
Any task can move itself between cgroups. This clearly won't
suffice, but we can decide the best way to adequately restrict
@@ -50,6 +55,7 @@ A cgroup may not be granted more permissions than the cgroup's
parent has.
4. Hierarchy
+============
device cgroups maintain hierarchy by making sure a cgroup never has more
access permissions than its parent. Every time an entry is written to
@@ -58,7 +64,8 @@ from their whitelist and all the locally set whitelist entries will be
re-evaluated. In case one of the locally set whitelist entries would provide
more access than the cgroup's parent, it'll be removed from the whitelist.
-Example:
+Example::
+
A
/ \
B
@@ -67,10 +74,12 @@ Example:
A allow "b 8:* rwm", "c 116:1 rw"
B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
-If a device is denied in group A:
+If a device is denied in group A::
+
# echo "c 116:* r" > A/devices.deny
+
it'll propagate down and after revalidating B's entries, the whitelist entry
-"c 116:2 rwm" will be removed:
+"c 116:2 rwm" will be removed::
group whitelist entries denied devices
A all "b 8:* rwm", "c 116:* rw"
@@ -79,7 +88,8 @@ it'll propagate down and after revalidating B's entries, the whitelist entry
In case parent's exceptions change and local exceptions are not allowed
anymore, they'll be deleted.
-Notice that new whitelist entries will not be propagated:
+Notice that new whitelist entries will not be propagated::
+
A
/ \
B
@@ -88,24 +98,30 @@ Notice that new whitelist entries will not be propagated:
A "c 1:3 rwm", "c 1:5 r" all the rest
B "c 1:3 rwm", "c 1:5 r" all the rest
-when adding "c *:3 rwm":
+when adding ``c *:3 rwm``::
+
# echo "c *:3 rwm" >A/devices.allow
-the result:
+the result::
+
group whitelist entries denied devices
A "c *:3 rwm", "c 1:5 r" all the rest
B "c 1:3 rwm", "c 1:5 r" all the rest
-but now it'll be possible to add new entries to B:
+but now it'll be possible to add new entries to B::
+
# echo "c 2:3 rwm" >B/devices.allow
# echo "c 50:3 r" >B/devices.allow
-or even
+
+or even::
+
# echo "c *:3 rwm" >B/devices.allow
Allowing or denying all by writing 'a' to devices.allow or devices.deny will
not be possible once the device cgroups has children.
4.1 Hierarchy (internal implementation)
+---------------------------------------
device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
list of exceptions. The internal state is controlled using the same user
diff --git a/Documentation/cgroup-v1/freezer-subsystem.txt b/Documentation/cgroup-v1/freezer-subsystem.rst
index e831cb2b8394..582d3427de3f 100644
--- a/Documentation/cgroup-v1/freezer-subsystem.txt
+++ b/Documentation/cgroup-v1/freezer-subsystem.rst
@@ -1,3 +1,7 @@
+==============
+Cgroup Freezer
+==============
+
The cgroup freezer is useful to batch job management system which start
and stop sets of tasks in order to schedule the resources of a machine
according to the desires of a system administrator. This sort of program
@@ -23,7 +27,7 @@ blocked, or ignored it can be seen by waiting or ptracing parent tasks.
SIGCONT is especially unsuitable since it can be caught by the task. Any
programs designed to watch for SIGSTOP and SIGCONT could be broken by
attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
-demonstrate this problem using nested bash shells:
+demonstrate this problem using nested bash shells::
$ echo $$
16644
@@ -93,19 +97,19 @@ The following cgroupfs files are created by cgroup freezer.
The root cgroup is non-freezable and the above interface files don't
exist.
-* Examples of usage :
+* Examples of usage::
# mkdir /sys/fs/cgroup/freezer
# mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
# mkdir /sys/fs/cgroup/freezer/0
# echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
-to get status of the freezer subsystem :
+to get status of the freezer subsystem::
# cat /sys/fs/cgroup/freezer/0/freezer.state
THAWED
-to freeze all tasks in the container :
+to freeze all tasks in the container::
# echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
# cat /sys/fs/cgroup/freezer/0/freezer.state
@@ -113,7 +117,7 @@ to freeze all tasks in the container :
# cat /sys/fs/cgroup/freezer/0/freezer.state
FROZEN
-to unfreeze all tasks in the container :
+to unfreeze all tasks in the container::
# echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
# cat /sys/fs/cgroup/freezer/0/freezer.state
diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.rst
index 1260e5369b9b..a3902aa253a9 100644
--- a/Documentation/cgroup-v1/hugetlb.txt
+++ b/Documentation/cgroup-v1/hugetlb.rst
@@ -1,5 +1,6 @@
+==================
HugeTLB Controller
--------------------
+==================
The HugeTLB controller allows to limit the HugeTLB usage per control group and
enforces the controller limit during page fault. Since HugeTLB doesn't
@@ -16,16 +17,16 @@ With the above step, the initial or the parent HugeTLB group becomes
visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
-New groups can be created under the parent group /sys/fs/cgroup.
+New groups can be created under the parent group /sys/fs/cgroup::
-# cd /sys/fs/cgroup
-# mkdir g1
-# echo $$ > g1/tasks
+ # cd /sys/fs/cgroup
+ # mkdir g1
+ # echo $$ > g1/tasks
The above steps create a new group g1 and move the current shell
process (bash) into it.
-Brief summary of control files
+Brief summary of control files::
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
@@ -33,17 +34,17 @@ Brief summary of control files
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
For a system supporting three hugepage sizes (64k, 32M and 1G), the control
-files include:
-
-hugetlb.1GB.limit_in_bytes
-hugetlb.1GB.max_usage_in_bytes
-hugetlb.1GB.usage_in_bytes
-hugetlb.1GB.failcnt
-hugetlb.64KB.limit_in_bytes
-hugetlb.64KB.max_usage_in_bytes
-hugetlb.64KB.usage_in_bytes
-hugetlb.64KB.failcnt
-hugetlb.32MB.limit_in_bytes
-hugetlb.32MB.max_usage_in_bytes
-hugetlb.32MB.usage_in_bytes
-hugetlb.32MB.failcnt
+files include::
+
+ hugetlb.1GB.limit_in_bytes
+ hugetlb.1GB.max_usage_in_bytes
+ hugetlb.1GB.usage_in_bytes
+ hugetlb.1GB.failcnt
+ hugetlb.64KB.limit_in_bytes
+ hugetlb.64KB.max_usage_in_bytes
+ hugetlb.64KB.usage_in_bytes
+ hugetlb.64KB.failcnt
+ hugetlb.32MB.limit_in_bytes
+ hugetlb.32MB.max_usage_in_bytes
+ hugetlb.32MB.usage_in_bytes
+ hugetlb.32MB.failcnt
diff --git a/Documentation/cgroup-v1/index.rst b/Documentation/cgroup-v1/index.rst
new file mode 100644
index 000000000000..fe76d42edc11
--- /dev/null
+++ b/Documentation/cgroup-v1/index.rst
@@ -0,0 +1,30 @@
+:orphan:
+
+========================
+Control Groups version 1
+========================
+
+.. toctree::
+ :maxdepth: 1
+
+ cgroups
+
+ blkio-controller
+ cpuacct
+ cpusets
+ devices
+ freezer-subsystem
+ hugetlb
+ memcg_test
+ memory
+ net_cls
+ net_prio
+ pids
+ rdma
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/cgroup-v1/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.rst
index 621e29ffb358..91bd18c6a514 100644
--- a/Documentation/cgroup-v1/memcg_test.txt
+++ b/Documentation/cgroup-v1/memcg_test.rst
@@ -1,32 +1,43 @@
-Memory Resource Controller(Memcg) Implementation Memo.
+=====================================================
+Memory Resource Controller(Memcg) Implementation Memo
+=====================================================
+
Last Updated: 2010/2
+
Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
is complex. This is a document for memcg's internal behavior.
Please note that implementation details can be changed.
-(*) Topics on API should be in Documentation/cgroup-v1/memory.txt)
+(*) Topics on API should be in Documentation/cgroup-v1/memory.rst)
0. How to record usage ?
+========================
+
2 objects are used.
page_cgroup ....an object per page.
+
Allocated at boot or memory hotplug. Freed at memory hot removal.
swap_cgroup ... an entry per swp_entry.
+
Allocated at swapon(). Freed at swapoff().
The page_cgroup has USED bit and double count against a page_cgroup never
occurs. swap_cgroup is used only when a charged page is swapped-out.
1. Charge
+=========
a page/swp_entry may be charged (usage += PAGE_SIZE) at
mem_cgroup_try_charge()
2. Uncharge
+===========
+
a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
mem_cgroup_uncharge()
@@ -37,9 +48,12 @@ Please note that implementation details can be changed.
disappears.
3. charge-commit-cancel
+=======================
+
Memcg pages are charged in two steps:
- mem_cgroup_try_charge()
- mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
+
+ - mem_cgroup_try_charge()
+ - mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
At try_charge(), there are no flags to say "this page is charged".
at this point, usage += PAGE_SIZE.
@@ -51,6 +65,8 @@ Please note that implementation details can be changed.
Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
4. Anonymous
+============
+
Anonymous page is newly allocated at
- page fault into MAP_ANONYMOUS mapping.
- Copy-On-Write.
@@ -78,34 +94,45 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
(e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
5. Page Cache
- Page Cache is charged at
+=============
+
+ Page Cache is charged at
- add_to_page_cache_locked().
The logic is very clear. (About migration, see below)
- Note: __remove_from_page_cache() is called by remove_from_page_cache()
- and __remove_mapping().
+
+ Note:
+ __remove_from_page_cache() is called by remove_from_page_cache()
+ and __remove_mapping().
6. Shmem(tmpfs) Page Cache
+===========================
+
The best way to understand shmem's page state transition is to read
mm/shmem.c.
+
But brief explanation of the behavior of memcg around shmem will be
helpful to understand the logic.
Shmem's page (just leaf page, not direct/indirect block) can be on
+
- radix-tree of shmem's inode.
- SwapCache.
- Both on radix-tree and SwapCache. This happens at swap-in
and swap-out,
It's charged when...
+
- A new page is added to shmem's radix-tree.
- A swp page is read. (move a charge from swap_cgroup to page_cgroup)
7. Page Migration
+=================
mem_cgroup_migrate()
8. LRU
+======
Each memcg has its own private LRU. Now, its handling is under global
VM's control (means that it's handled under global pgdat->lru_lock).
Almost all routines around memcg's LRU is called by global LRU's
@@ -114,163 +141,211 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
A special function is mem_cgroup_isolate_pages(). This scans
memcg's private LRU and call __isolate_lru_page() to extract a page
from LRU.
+
(By __isolate_lru_page(), the page is removed from both of global and
- private LRU.)
+ private LRU.)
9. Typical Tests.
+=================
Tests for racy cases.
- 9.1 Small limit to memcg.
+9.1 Small limit to memcg.
+-------------------------
+
When you do test to do racy case, it's good test to set memcg's limit
to be very small rather than GB. Many races found in the test under
xKB or xxMB limits.
+
(Memory behavior under GB and Memory behavior under MB shows very
- different situation.)
+ different situation.)
+
+9.2 Shmem
+---------
- 9.2 Shmem
Historically, memcg's shmem handling was poor and we saw some amount
of troubles here. This is because shmem is page-cache but can be
SwapCache. Test with shmem/tmpfs is always good test.
- 9.3 Migration
+9.3 Migration
+-------------
+
For NUMA, migration is an another special case. To do easy test, cpuset
- is useful. Following is a sample script to do migration.
+ is useful. Following is a sample script to do migration::
- mount -t cgroup -o cpuset none /opt/cpuset
+ mount -t cgroup -o cpuset none /opt/cpuset
- mkdir /opt/cpuset/01
- echo 1 > /opt/cpuset/01/cpuset.cpus
- echo 0 > /opt/cpuset/01/cpuset.mems
- echo 1 > /opt/cpuset/01/cpuset.memory_migrate
- mkdir /opt/cpuset/02
- echo 1 > /opt/cpuset/02/cpuset.cpus
- echo 1 > /opt/cpuset/02/cpuset.mems
- echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+ mkdir /opt/cpuset/01
+ echo 1 > /opt/cpuset/01/cpuset.cpus
+ echo 0 > /opt/cpuset/01/cpuset.mems
+ echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+ mkdir /opt/cpuset/02
+ echo 1 > /opt/cpuset/02/cpuset.cpus
+ echo 1 > /opt/cpuset/02/cpuset.mems
+ echo 1 > /opt/cpuset/02/cpuset.memory_migrate
In above set, when you moves a task from 01 to 02, page migration to
node 0 to node 1 will occur. Following is a script to migrate all
- under cpuset.
- --
- move_task()
- {
- for pid in $1
- do
- /bin/echo $pid >$2/tasks 2>/dev/null
- echo -n $pid
- echo -n " "
- done
- echo END
- }
-
- G1_TASK=`cat ${G1}/tasks`
- G2_TASK=`cat ${G2}/tasks`
- move_task "${G1_TASK}" ${G2} &
- --
- 9.4 Memory hotplug.
+ under cpuset.::
+
+ --
+ move_task()
+ {
+ for pid in $1
+ do
+ /bin/echo $pid >$2/tasks 2>/dev/null
+ echo -n $pid
+ echo -n " "
+ done
+ echo END
+ }
+
+ G1_TASK=`cat ${G1}/tasks`
+ G2_TASK=`cat ${G2}/tasks`
+ move_task "${G1_TASK}" ${G2} &
+ --
+
+9.4 Memory hotplug
+------------------
+
memory hotplug test is one of good test.
- to offline memory, do following.
- # echo offline > /sys/devices/system/memory/memoryXXX/state
+
+ to offline memory, do following::
+
+ # echo offline > /sys/devices/system/memory/memoryXXX/state
+
(XXX is the place of memory)
+
This is an easy way to test page migration, too.
- 9.5 mkdir/rmdir
+9.5 mkdir/rmdir
+---------------
+
When using hierarchy, mkdir/rmdir test should be done.
- Use tests like the following.
+ Use tests like the following::
+
+ echo 1 >/opt/cgroup/01/memory/use_hierarchy
+ mkdir /opt/cgroup/01/child_a
+ mkdir /opt/cgroup/01/child_b
- echo 1 >/opt/cgroup/01/memory/use_hierarchy
- mkdir /opt/cgroup/01/child_a
- mkdir /opt/cgroup/01/child_b
+ set limit to 01.
+ add limit to 01/child_b
+ run jobs under child_a and child_b
- set limit to 01.
- add limit to 01/child_b
- run jobs under child_a and child_b
+ create/delete following groups at random while jobs are running::
- create/delete following groups at random while jobs are running.
- /opt/cgroup/01/child_a/child_aa
- /opt/cgroup/01/child_b/child_bb
- /opt/cgroup/01/child_c
+ /opt/cgroup/01/child_a/child_aa
+ /opt/cgroup/01/child_b/child_bb
+ /opt/cgroup/01/child_c
running new jobs in new group is also good.
- 9.6 Mount with other subsystems.
+9.6 Mount with other subsystems
+-------------------------------
+
Mounting with other subsystems is a good test because there is a
race and lock dependency with other cgroup subsystems.
- example)
- # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
+ example::
+
+ # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
and do task move, mkdir, rmdir etc...under this.
- 9.7 swapoff.
+9.7 swapoff
+-----------
+
Besides management of swap is one of complicated parts of memcg,
call path of swap-in at swapoff is not same as usual swap-in path..
It's worth to be tested explicitly.
- For example, test like following is good.
- (Shell-A)
- # mount -t cgroup none /cgroup -o memory
- # mkdir /cgroup/test
- # echo 40M > /cgroup/test/memory.limit_in_bytes
- # echo 0 > /cgroup/test/tasks
+ For example, test like following is good:
+
+ (Shell-A)::
+
+ # mount -t cgroup none /cgroup -o memory
+ # mkdir /cgroup/test
+ # echo 40M > /cgroup/test/memory.limit_in_bytes
+ # echo 0 > /cgroup/test/tasks
+
Run malloc(100M) program under this. You'll see 60M of swaps.
- (Shell-B)
- # move all tasks in /cgroup/test to /cgroup
- # /sbin/swapoff -a
- # rmdir /cgroup/test
- # kill malloc task.
+
+ (Shell-B)::
+
+ # move all tasks in /cgroup/test to /cgroup
+ # /sbin/swapoff -a
+ # rmdir /cgroup/test
+ # kill malloc task.
Of course, tmpfs v.s. swapoff test should be tested, too.
- 9.8 OOM-Killer
+9.8 OOM-Killer
+--------------
+
Out-of-memory caused by memcg's limit will kill tasks under
the memcg. When hierarchy is used, a task under hierarchy
will be killed by the kernel.
+
In this case, panic_on_oom shouldn't be invoked and tasks
in other groups shouldn't be killed.
It's not difficult to cause OOM under memcg as following.
- Case A) when you can swapoff
- #swapoff -a
- #echo 50M > /memory.limit_in_bytes
+
+ Case A) when you can swapoff::
+
+ #swapoff -a
+ #echo 50M > /memory.limit_in_bytes
+
run 51M of malloc
- Case B) when you use mem+swap limitation.
- #echo 50M > memory.limit_in_bytes
- #echo 50M > memory.memsw.limit_in_bytes
+ Case B) when you use mem+swap limitation::
+
+ #echo 50M > memory.limit_in_bytes
+ #echo 50M > memory.memsw.limit_in_bytes
+
run 51M of malloc
- 9.9 Move charges at task migration
+9.9 Move charges at task migration
+----------------------------------
+
Charges associated with a task can be moved along with task migration.
- (Shell-A)
- #mkdir /cgroup/A
- #echo $$ >/cgroup/A/tasks
+ (Shell-A)::
+
+ #mkdir /cgroup/A
+ #echo $$ >/cgroup/A/tasks
+
run some programs which uses some amount of memory in /cgroup/A.
- (Shell-B)
- #mkdir /cgroup/B
- #echo 1 >/cgroup/B/memory.move_charge_at_immigrate
- #echo "pid of the program running in group A" >/cgroup/B/tasks
+ (Shell-B)::
+
+ #mkdir /cgroup/B
+ #echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+ #echo "pid of the program running in group A" >/cgroup/B/tasks
- You can see charges have been moved by reading *.usage_in_bytes or
+ You can see charges have been moved by reading ``*.usage_in_bytes`` or
memory.stat of both A and B.
- See 8.2 of Documentation/cgroup-v1/memory.txt to see what value should be
- written to move_charge_at_immigrate.
- 9.10 Memory thresholds
+ See 8.2 of Documentation/cgroup-v1/memory.rst to see what value should
+ be written to move_charge_at_immigrate.
+
+9.10 Memory thresholds
+----------------------
+
Memory controller implements memory thresholds using cgroups notification
API. You can use tools/cgroup/cgroup_event_listener.c to test it.
- (Shell-A) Create cgroup and run event listener
- # mkdir /cgroup/A
- # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+ (Shell-A) Create cgroup and run event listener::
+
+ # mkdir /cgroup/A
+ # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+ (Shell-B) Add task to cgroup and try to allocate and free memory::
- (Shell-B) Add task to cgroup and try to allocate and free memory
- # echo $$ >/cgroup/A/tasks
- # a="$(dd if=/dev/zero bs=1M count=10)"
- # a=
+ # echo $$ >/cgroup/A/tasks
+ # a="$(dd if=/dev/zero bs=1M count=10)"
+ # a=
You will see message from cgroup_event_listener every time you cross
the thresholds.
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.rst
index a33cedf85427..41bdc038dad9 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.rst
@@ -1,22 +1,26 @@
+==========================
Memory Resource Controller
+==========================
-NOTE: This document is hopelessly outdated and it asks for a complete
+NOTE:
+ This document is hopelessly outdated and it asks for a complete
rewrite. It still contains a useful information so we are keeping it
here but make sure to check the current code if you need a deeper
understanding.
-NOTE: The Memory Resource Controller has generically been referred to as the
+NOTE:
+ The Memory Resource Controller has generically been referred to as the
memory controller in this document. Do not confuse memory controller
used here with the memory controller that is used in hardware.
-(For editors)
-In this document:
+(For editors) In this document:
When we mention a cgroup (cgroupfs's directory) with memory controller,
we call it "memory cgroup". When you see git-log and source code, you'll
see patch's title and function names tend to use "memcg".
In this document, we avoid using it.
Benefits and Purpose of the memory controller
+=============================================
The memory controller isolates the memory behaviour of a group of tasks
from the rest of the system. The article on LWN [12] mentions some probable
@@ -38,6 +42,7 @@ e. There are several other use cases; find one or use the controller just
Current Status: linux-2.6.34-mmotm(development version of 2010/April)
Features:
+
- accounting anonymous pages, file caches, swap caches usage and limiting them.
- pages are linked to per-memcg LRU exclusively, and there is no global LRU.
- optionally, memory+swap usage can be accounted and limited.
@@ -54,41 +59,48 @@ Features:
Brief summary of control files.
- tasks # attach a task(thread) and show list of threads
- cgroup.procs # show list of processes
- cgroup.event_control # an interface for event_fd()
- memory.usage_in_bytes # show current usage for memory
- (See 5.5 for details)
- memory.memsw.usage_in_bytes # show current usage for memory+Swap
- (See 5.5 for details)
- memory.limit_in_bytes # set/show limit of memory usage
- memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
- memory.failcnt # show the number of memory usage hits limits
- memory.memsw.failcnt # show the number of memory+Swap hits limits
- memory.max_usage_in_bytes # show max memory usage recorded
- memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
- memory.soft_limit_in_bytes # set/show soft limit of memory usage
- memory.stat # show various statistics
- memory.use_hierarchy # set/show hierarchical account enabled
- memory.force_empty # trigger forced page reclaim
- memory.pressure_level # set memory pressure notifications
- memory.swappiness # set/show swappiness parameter of vmscan
- (See sysctl's vm.swappiness)
- memory.move_charge_at_immigrate # set/show controls of moving charges
- memory.oom_control # set/show oom controls.
- memory.numa_stat # show the number of memory usage per numa node
-
- memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
- memory.kmem.usage_in_bytes # show current kernel memory allocation
- memory.kmem.failcnt # show the number of kernel memory usage hits limits
- memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded
-
- memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
- memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
- memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits
- memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded
+==================================== ==========================================
+ tasks attach a task(thread) and show list of
+ threads
+ cgroup.procs show list of processes
+ cgroup.event_control an interface for event_fd()
+ memory.usage_in_bytes show current usage for memory
+ (See 5.5 for details)
+ memory.memsw.usage_in_bytes show current usage for memory+Swap
+ (See 5.5 for details)
+ memory.limit_in_bytes set/show limit of memory usage
+ memory.memsw.limit_in_bytes set/show limit of memory+Swap usage
+ memory.failcnt show the number of memory usage hits limits
+ memory.memsw.failcnt show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes show max memory usage recorded
+ memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes set/show soft limit of memory usage
+ memory.stat show various statistics
+ memory.use_hierarchy set/show hierarchical account enabled
+ memory.force_empty trigger forced page reclaim
+ memory.pressure_level set memory pressure notifications
+ memory.swappiness set/show swappiness parameter of vmscan
+ (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate set/show controls of moving charges
+ memory.oom_control set/show oom controls.
+ memory.numa_stat show the number of memory usage per numa
+ node
+
+ memory.kmem.limit_in_bytes set/show hard limit for kernel memory
+ memory.kmem.usage_in_bytes show current kernel memory allocation
+ memory.kmem.failcnt show the number of kernel memory usage
+ hits limits
+ memory.kmem.max_usage_in_bytes show max kernel memory usage recorded
+
+ memory.kmem.tcp.limit_in_bytes set/show hard limit for tcp buf memory
+ memory.kmem.tcp.usage_in_bytes show current tcp buf memory allocation
+ memory.kmem.tcp.failcnt show the number of tcp buf memory usage
+ hits limits
+ memory.kmem.tcp.max_usage_in_bytes show max tcp buf memory usage recorded
+==================================== ==========================================
1. History
+==========
The memory controller has a long history. A request for comments for the memory
controller was posted by Balbir Singh [1]. At the time the RFC was posted
@@ -103,6 +115,7 @@ at version 6; it combines both mapped (RSS) and unmapped Page
Cache Control [11].
2. Memory Control
+=================
Memory is a unique resource in the sense that it is present in a limited
amount. If a task requires a lot of CPU processing, the task can spread
@@ -120,6 +133,7 @@ are:
The memory controller is the first controller developed.
2.1. Design
+-----------
The core of the design is a counter called the page_counter. The
page_counter tracks the current memory usage and limit of the group of
@@ -127,6 +141,9 @@ processes associated with the controller. Each cgroup has a memory controller
specific data structure (mem_cgroup) associated with it.
2.2. Accounting
+---------------
+
+::
+--------------------+
| mem_cgroup |
@@ -165,6 +182,7 @@ updated. page_cgroup has its own LRU on cgroup.
(*) page_cgroup structure is allocated at boot/memory-hotplug time.
2.2.1 Accounting details
+------------------------
All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
Some pages which are never reclaimable and will not be on the LRU
@@ -191,6 +209,7 @@ Note: we just account pages-on-LRU because our purpose is to control amount
of used pages; not-on-LRU pages tend to be out-of-control from VM view.
2.3 Shared Page Accounting
+--------------------------
Shared pages are accounted on the basis of the first touch approach. The
cgroup that first touches a page is accounted for the page. The principle
@@ -207,11 +226,13 @@ be backed into memory in force, charges for pages are accounted against the
caller of swapoff rather than the users of shmem.
2.4 Swap Extension (CONFIG_MEMCG_SWAP)
+--------------------------------------
Swap Extension allows you to record charge for swap. A swapped-in page is
charged back to original page allocator if possible.
When swap is accounted, following files are added.
+
- memory.memsw.usage_in_bytes.
- memory.memsw.limit_in_bytes.
@@ -224,14 +245,16 @@ In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
By using the memsw limit, you can avoid system OOM which can be caused by swap
shortage.
-* why 'memory+swap' rather than swap.
+**why 'memory+swap' rather than swap**
+
The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
to move account from memory to swap...there is no change in usage of
memory+swap. In other words, when we want to limit the usage of swap without
affecting global LRU, memory+swap limit is better than just limiting swap from
an OS point of view.
-* What happens when a cgroup hits memory.memsw.limit_in_bytes
+**What happens when a cgroup hits memory.memsw.limit_in_bytes**
+
When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
in this cgroup. Then, swap-out will not be done by cgroup routine and file
caches are dropped. But as mentioned above, global LRU can do swapout memory
@@ -239,6 +262,7 @@ from it for sanity of the system's memory management state. You can't forbid
it by cgroup.
2.5 Reclaim
+-----------
Each cgroup maintains a per cgroup LRU which has the same structure as
global VM. When a cgroup goes over its limit, we first try
@@ -251,29 +275,36 @@ The reclaim algorithm has not been modified for cgroups, except that
pages that are selected for reclaiming come from the per-cgroup LRU
list.
-NOTE: Reclaim does not work for the root cgroup, since we cannot set any
-limits on the root cgroup.
+NOTE:
+ Reclaim does not work for the root cgroup, since we cannot set any
+ limits on the root cgroup.
-Note2: When panic_on_oom is set to "2", the whole system will panic.
+Note2:
+ When panic_on_oom is set to "2", the whole system will panic.
When oom event notifier is registered, event will be delivered.
(See oom_control section)
2.6 Locking
+-----------
lock_page_cgroup()/unlock_page_cgroup() should not be called under
the i_pages lock.
Other lock order is following:
+
PG_locked.
- mm->page_table_lock
- pgdat->lru_lock
- lock_page_cgroup.
+ mm->page_table_lock
+ pgdat->lru_lock
+ lock_page_cgroup.
+
In many cases, just lock_page_cgroup() is called.
+
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
pgdat->lru_lock, it has no lock of its own.
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
+-----------------------------------------------
With the Kernel memory extension, the Memory Controller is able to limit
the amount of kernel memory used by the system. Kernel memory is fundamentally
@@ -288,6 +319,7 @@ Kernel memory limits are not imposed for the root cgroup. Usage for the root
cgroup may or may not be accounted. The memory used is accumulated into
memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
(currently only for tcp).
+
The main "kmem" counter is fed into the main counter, so kmem charges will
also be visible from the user counter.
@@ -295,36 +327,42 @@ Currently no soft limit is implemented for kernel memory. It is future work
to trigger slab reclaim when those limits are reached.
2.7.1 Current Kernel Memory resources accounted
+-----------------------------------------------
-* stack pages: every process consumes some stack pages. By accounting into
-kernel memory, we prevent new processes from being created when the kernel
-memory usage is too high.
+stack pages:
+ every process consumes some stack pages. By accounting into
+ kernel memory, we prevent new processes from being created when the kernel
+ memory usage is too high.
-* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
-of each kmem_cache is created every time the cache is touched by the first time
-from inside the memcg. The creation is done lazily, so some objects can still be
-skipped while the cache is being created. All objects in a slab page should
-belong to the same memcg. This only fails to hold when a task is migrated to a
-different memcg during the page allocation by the cache.
+slab pages:
+ pages allocated by the SLAB or SLUB allocator are tracked. A copy
+ of each kmem_cache is created every time the cache is touched by the first time
+ from inside the memcg. The creation is done lazily, so some objects can still be
+ skipped while the cache is being created. All objects in a slab page should
+ belong to the same memcg. This only fails to hold when a task is migrated to a
+ different memcg during the page allocation by the cache.
-* sockets memory pressure: some sockets protocols have memory pressure
-thresholds. The Memory Controller allows them to be controlled individually
-per cgroup, instead of globally.
+sockets memory pressure:
+ some sockets protocols have memory pressure
+ thresholds. The Memory Controller allows them to be controlled individually
+ per cgroup, instead of globally.
-* tcp memory pressure: sockets memory pressure for the tcp protocol.
+tcp memory pressure:
+ sockets memory pressure for the tcp protocol.
2.7.2 Common use cases
+----------------------
Because the "kmem" counter is fed to the main user counter, kernel memory can
never be limited completely independently of user memory. Say "U" is the user
limit, and "K" the kernel limit. There are three possible ways limits can be
set:
- U != 0, K = unlimited:
+U != 0, K = unlimited:
This is the standard memcg limitation mechanism already present before kmem
accounting. Kernel memory is completely ignored.
- U != 0, K < U:
+U != 0, K < U:
Kernel memory is a subset of the user memory. This setup is useful in
deployments where the total amount of memory per-cgroup is overcommited.
Overcommiting kernel memory limits is definitely not recommended, since the
@@ -332,19 +370,23 @@ set:
In this case, the admin could set up K so that the sum of all groups is
never greater than the total memory, and freely set U at the cost of his
QoS.
- WARNING: In the current implementation, memory reclaim will NOT be
+
+WARNING:
+ In the current implementation, memory reclaim will NOT be
triggered for a cgroup when it hits K while staying below U, which makes
this setup impractical.
- U != 0, K >= U:
+U != 0, K >= U:
Since kmem charges will also be fed to the user counter and reclaim will be
triggered for the cgroup for both kinds of memory. This setup gives the
admin a unified view of memory, and it is also useful for people who just
want to track kernel memory usage.
3. User Interface
+=================
3.0. Configuration
+------------------
a. Enable CONFIG_CGROUPS
b. Enable CONFIG_MEMCG
@@ -352,39 +394,53 @@ c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
-# mount -t tmpfs none /sys/fs/cgroup
-# mkdir /sys/fs/cgroup/memory
-# mount -t cgroup none /sys/fs/cgroup/memory -o memory
+-------------------------------------------------------------------
+
+::
+
+ # mount -t tmpfs none /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/memory
+ # mount -t cgroup none /sys/fs/cgroup/memory -o memory
+
+3.2. Make the new group and move bash into it::
+
+ # mkdir /sys/fs/cgroup/memory/0
+ # echo $$ > /sys/fs/cgroup/memory/0/tasks
-3.2. Make the new group and move bash into it
-# mkdir /sys/fs/cgroup/memory/0
-# echo $$ > /sys/fs/cgroup/memory/0/tasks
+Since now we're in the 0 cgroup, we can alter the memory limit::
-Since now we're in the 0 cgroup, we can alter the memory limit:
-# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+ # echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
-mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
+NOTE:
+ We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes,
+ Gibibytes.)
-NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
-NOTE: We cannot set limits on the root cgroup any more.
+NOTE:
+ We can write "-1" to reset the ``*.limit_in_bytes(unlimited)``.
-# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
-4194304
+NOTE:
+ We cannot set limits on the root cgroup any more.
-We can check the usage:
-# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
-1216512
+::
+
+ # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+ 4194304
+
+We can check the usage::
+
+ # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
+ 1216512
A successful write to this file does not guarantee a successful setting of
this limit to the value written into the file. This can be due to a
number of factors, such as rounding up to page boundaries or the total
availability of memory on the system. The user is required to re-read
-this file after a write to guarantee the value committed by the kernel.
+this file after a write to guarantee the value committed by the kernel::
-# echo 1 > memory.limit_in_bytes
-# cat memory.limit_in_bytes
-4096
+ # echo 1 > memory.limit_in_bytes
+ # cat memory.limit_in_bytes
+ 4096
The memory.failcnt field gives the number of times that the cgroup limit was
exceeded.
@@ -393,6 +449,7 @@ The memory.stat file gives accounting information. Now, the number of
caches, RSS and Active pages/Inactive pages are shown.
4. Testing
+==========
For testing features and implementation, see memcg_test.txt.
@@ -408,6 +465,7 @@ But the above two are testing extreme situations.
Trying usual test under memory controller is always helpful.
4.1 Troubleshooting
+-------------------
Sometimes a user might find that the application under a cgroup is
terminated by the OOM killer. There are several causes for this:
@@ -422,6 +480,7 @@ To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
seeing what happens will be helpful.
4.2 Task migration
+------------------
When a task migrates from one cgroup to another, its charge is not
carried forward by default. The pages allocated from the original cgroup still
@@ -432,6 +491,7 @@ You can move charges of a task along with task migration.
See 8. "Move charges at task migration"
4.3 Removing a cgroup
+---------------------
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
cgroup might have some charge associated with it, even though all
@@ -448,13 +508,15 @@ will be charged as a new owner of it.
About use_hierarchy, see Section 6.
-5. Misc. interfaces.
+5. Misc. interfaces
+===================
5.1 force_empty
+---------------
memory.force_empty interface is provided to make cgroup's memory usage empty.
- When writing anything to this
+ When writing anything to this::
- # echo 0 > memory.force_empty
+ # echo 0 > memory.force_empty
the cgroup will be reclaimed and as many pages reclaimed as possible.
@@ -471,50 +533,61 @@ About use_hierarchy, see Section 6.
About use_hierarchy, see Section 6.
5.2 stat file
+-------------
memory.stat file includes following statistics
-# per-memory cgroup local status
-cache - # of bytes of page cache memory.
-rss - # of bytes of anonymous and swap cache memory (includes
+per-memory cgroup local status
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+=============== ===============================================================
+cache # of bytes of page cache memory.
+rss # of bytes of anonymous and swap cache memory (includes
transparent hugepages).
-rss_huge - # of bytes of anonymous transparent hugepages.
-mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
-pgpgin - # of charging events to the memory cgroup. The charging
+rss_huge # of bytes of anonymous transparent hugepages.
+mapped_file # of bytes of mapped file (includes tmpfs/shmem)
+pgpgin # of charging events to the memory cgroup. The charging
event happens each time a page is accounted as either mapped
anon page(RSS) or cache page(Page Cache) to the cgroup.
-pgpgout - # of uncharging events to the memory cgroup. The uncharging
+pgpgout # of uncharging events to the memory cgroup. The uncharging
event happens each time a page is unaccounted from the cgroup.
-swap - # of bytes of swap usage
-dirty - # of bytes that are waiting to get written back to the disk.
-writeback - # of bytes of file/anon cache that are queued for syncing to
+swap # of bytes of swap usage
+dirty # of bytes that are waiting to get written back to the disk.
+writeback # of bytes of file/anon cache that are queued for syncing to
disk.
-inactive_anon - # of bytes of anonymous and swap cache memory on inactive
+inactive_anon # of bytes of anonymous and swap cache memory on inactive
LRU list.
-active_anon - # of bytes of anonymous and swap cache memory on active
+active_anon # of bytes of anonymous and swap cache memory on active
LRU list.
-inactive_file - # of bytes of file-backed memory on inactive LRU list.
-active_file - # of bytes of file-backed memory on active LRU list.
-unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
-
-# status considering hierarchy (see memory.use_hierarchy settings)
-
-hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
- under which the memory cgroup is
-hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
- hierarchy under which memory cgroup is.
-
-total_<counter> - # hierarchical version of <counter>, which in
- addition to the cgroup's own value includes the
- sum of all hierarchical children's values of
- <counter>, i.e. total_cache
-
-# The following additional stats are dependent on CONFIG_DEBUG_VM.
-
-recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
-recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
-recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
-recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
+inactive_file # of bytes of file-backed memory on inactive LRU list.
+active_file # of bytes of file-backed memory on active LRU list.
+unevictable # of bytes of memory that cannot be reclaimed (mlocked etc).
+=============== ===============================================================
+
+status considering hierarchy (see memory.use_hierarchy settings)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ===================================================
+hierarchical_memory_limit # of bytes of memory limit with regard to hierarchy
+ under which the memory cgroup is
+hierarchical_memsw_limit # of bytes of memory+swap limit with regard to
+ hierarchy under which memory cgroup is.
+
+total_<counter> # hierarchical version of <counter>, which in
+ addition to the cgroup's own value includes the
+ sum of all hierarchical children's values of
+ <counter>, i.e. total_cache
+========================= ===================================================
+
+The following additional stats are dependent on CONFIG_DEBUG_VM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+========================= ========================================
+recent_rotated_anon VM internal parameter. (see mm/vmscan.c)
+recent_rotated_file VM internal parameter. (see mm/vmscan.c)
+recent_scanned_anon VM internal parameter. (see mm/vmscan.c)
+recent_scanned_file VM internal parameter. (see mm/vmscan.c)
+========================= ========================================
Memo:
recent_rotated means recent frequency of LRU rotation.
@@ -525,12 +598,15 @@ Note:
Only anonymous and swap cache memory is listed as part of 'rss' stat.
This should not be confused with the true 'resident set size' or the
amount of physical memory used by the cgroup.
+
'rss + mapped_file" will give you resident set size of cgroup.
+
(Note: file and shmem may be shared among other cgroups. In that case,
- mapped_file is accounted only when the memory cgroup is owner of page
- cache.)
+ mapped_file is accounted only when the memory cgroup is owner of page
+ cache.)
5.3 swappiness
+--------------
Overrides /proc/sys/vm/swappiness for the particular group. The tunable
in the root cgroup corresponds to the global swappiness setting.
@@ -541,16 +617,19 @@ there is a swap storage available. This might lead to memcg OOM killer
if there are no file pages to reclaim.
5.4 failcnt
+-----------
A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
This failcnt(== failure count) shows the number of times that a usage counter
hit its limit. When a memory cgroup hits a limit, failcnt increases and
memory under it will be reclaimed.
-You can reset failcnt by writing 0 to failcnt file.
-# echo 0 > .../memory.failcnt
+You can reset failcnt by writing 0 to failcnt file::
+
+ # echo 0 > .../memory.failcnt
5.5 usage_in_bytes
+------------------
For efficiency, as other kernel components, memory cgroup uses some optimization
to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
@@ -560,6 +639,7 @@ If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
value in memory.stat(see 5.2).
5.6 numa_stat
+-------------
This is similar to numa_maps but operates on a per-memcg basis. This is
useful for providing visibility into the numa locality information within
@@ -571,22 +651,23 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
per-node page counts including "hierarchical_<counter>" which sums up all
hierarchical children's values in addition to the memcg's own value.
-The output format of memory.numa_stat is:
+The output format of memory.numa_stat is::
-total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
-file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
-anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
-hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+ hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
The "total" count is sum of file + anon + unevictable.
6. Hierarchy support
+====================
The memory controller supports a deep hierarchy and hierarchical accounting.
The hierarchy is created by creating the appropriate cgroups in the
cgroup filesystem. Consider for example, the following cgroup filesystem
-hierarchy
+hierarchy::
root
/ | \
@@ -603,24 +684,28 @@ limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
children of the ancestor.
6.1 Enabling hierarchical accounting and reclaim
+------------------------------------------------
A memory cgroup by default disables the hierarchy feature. Support
-can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
+can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup::
-# echo 1 > memory.use_hierarchy
+ # echo 1 > memory.use_hierarchy
-The feature can be disabled by
+The feature can be disabled by::
-# echo 0 > memory.use_hierarchy
+ # echo 0 > memory.use_hierarchy
-NOTE1: Enabling/disabling will fail if either the cgroup already has other
+NOTE1:
+ Enabling/disabling will fail if either the cgroup already has other
cgroups created below it, or if the parent cgroup has use_hierarchy
enabled.
-NOTE2: When panic_on_oom is set to "2", the whole system will panic in
+NOTE2:
+ When panic_on_oom is set to "2", the whole system will panic in
case of an OOM event in any cgroup.
7. Soft limits
+==============
Soft limits allow for greater sharing of memory. The idea behind soft limits
is to allow control groups to use as much of the memory as needed, provided
@@ -640,22 +725,26 @@ hints/setup. Currently soft limit based reclaim is set up such that
it gets invoked from balance_pgdat (kswapd).
7.1 Interface
+-------------
Soft limits can be setup by using the following commands (in this example we
-assume a soft limit of 256 MiB)
+assume a soft limit of 256 MiB)::
-# echo 256M > memory.soft_limit_in_bytes
+ # echo 256M > memory.soft_limit_in_bytes
-If we want to change this to 1G, we can at any time use
+If we want to change this to 1G, we can at any time use::
-# echo 1G > memory.soft_limit_in_bytes
+ # echo 1G > memory.soft_limit_in_bytes
-NOTE1: Soft limits take effect over a long period of time, since they involve
+NOTE1:
+ Soft limits take effect over a long period of time, since they involve
reclaiming memory for balancing between memory cgroups
-NOTE2: It is recommended to set the soft limit always below the hard limit,
+NOTE2:
+ It is recommended to set the soft limit always below the hard limit,
otherwise the hard limit will take precedence.
8. Move charges at task migration
+=================================
Users can move charges associated with a task along with task migration, that
is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
@@ -663,60 +752,71 @@ This feature is not supported in !CONFIG_MMU environments because of lack of
page tables.
8.1 Interface
+-------------
This feature is disabled by default. It can be enabled (and disabled again) by
writing to memory.move_charge_at_immigrate of the destination cgroup.
-If you want to enable it:
+If you want to enable it::
-# echo (some positive value) > memory.move_charge_at_immigrate
+ # echo (some positive value) > memory.move_charge_at_immigrate
-Note: Each bits of move_charge_at_immigrate has its own meaning about what type
+Note:
+ Each bits of move_charge_at_immigrate has its own meaning about what type
of charges should be moved. See 8.2 for details.
-Note: Charges are moved only when you move mm->owner, in other words,
+Note:
+ Charges are moved only when you move mm->owner, in other words,
a leader of a thread group.
-Note: If we cannot find enough space for the task in the destination cgroup, we
+Note:
+ If we cannot find enough space for the task in the destination cgroup, we
try to make space by reclaiming memory. Task migration may fail if we
cannot make enough space.
-Note: It can take several seconds if you move charges much.
+Note:
+ It can take several seconds if you move charges much.
-And if you want disable it again:
+And if you want disable it again::
-# echo 0 > memory.move_charge_at_immigrate
+ # echo 0 > memory.move_charge_at_immigrate
8.2 Type of charges which can be moved
+--------------------------------------
Each bit in move_charge_at_immigrate has its own meaning about what type of
charges should be moved. But in any case, it must be noted that an account of
a page or a swap can be moved only when it is charged to the task's current
(old) memory cgroup.
- bit | what type of charges would be moved ?
- -----+------------------------------------------------------------------------
- 0 | A charge of an anonymous page (or swap of it) used by the target task.
- | You must enable Swap Extension (see 2.4) to enable move of swap charges.
- -----+------------------------------------------------------------------------
- 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory)
- | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
- | anonymous pages, file pages (and swaps) in the range mmapped by the task
- | will be moved even if the task hasn't done page fault, i.e. they might
- | not be the task's "RSS", but other task's "RSS" that maps the same file.
- | And mapcount of the page is ignored (the page can be moved even if
- | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to
- | enable move of swap charges.
++---+--------------------------------------------------------------------------+
+|bit| what type of charges would be moved ? |
++===+==========================================================================+
+| 0 | A charge of an anonymous page (or swap of it) used by the target task. |
+| | You must enable Swap Extension (see 2.4) to enable move of swap charges. |
++---+--------------------------------------------------------------------------+
+| 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory) |
+| | and swaps of tmpfs file) mmapped by the target task. Unlike the case of |
+| | anonymous pages, file pages (and swaps) in the range mmapped by the task |
+| | will be moved even if the task hasn't done page fault, i.e. they might |
+| | not be the task's "RSS", but other task's "RSS" that maps the same file. |
+| | And mapcount of the page is ignored (the page can be moved even if |
+| | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to |
+| | enable move of swap charges. |
++---+--------------------------------------------------------------------------+
8.3 TODO
+--------
- All of moving charge operations are done under cgroup_mutex. It's not good
behavior to hold the mutex too long, so we may need some trick.
9. Memory thresholds
+====================
Memory cgroup implements memory thresholds using the cgroups notification
API (see cgroups.txt). It allows to register multiple memory and memsw
thresholds and gets notifications when it crosses.
To register a threshold, an application must:
+
- create an eventfd using eventfd(2);
- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
@@ -728,6 +828,7 @@ threshold in any direction.
It's applicable for root and non-root cgroup.
10. OOM Control
+===============
memory.oom_control file is for OOM notification and other controls.
@@ -736,6 +837,7 @@ API (See cgroups.txt). It allows to register multiple OOM notification
delivery and gets notification when OOM happens.
To register a notifier, an application must:
+
- create an eventfd using eventfd(2)
- open memory.oom_control file
- write string like "<event_fd> <fd of memory.oom_control>" to
@@ -752,8 +854,11 @@ If OOM-killer is disabled, tasks under cgroup will hang/sleep
in memory cgroup's OOM-waitqueue when they request accountable memory.
For running them, you have to relax the memory cgroup's OOM status by
+
* enlarge limit or reduce usage.
+
To reduce usage,
+
* kill some tasks.
* move some tasks to other group with account migration.
* remove some files (on tmpfs?)
@@ -761,11 +866,14 @@ To reduce usage,
Then, stopped tasks will work again.
At reading, current status of OOM is shown.
- oom_kill_disable 0 or 1 (if 1, oom-killer is disabled)
- under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may
- be stopped.)
+
+ - oom_kill_disable 0 or 1
+ (if 1, oom-killer is disabled)
+ - under_oom 0 or 1
+ (if 1, the memory cgroup is under OOM, tasks may be stopped.)
11. Memory Pressure
+===================
The pressure level notifications can be used to monitor the memory
allocation cost; based on the pressure, applications can implement
@@ -840,21 +948,22 @@ Test:
Here is a small script example that makes a new cgroup, sets up a
memory limit, sets up a notification in the cgroup and then makes child
- cgroup experience a critical pressure:
+ cgroup experience a critical pressure::
- # cd /sys/fs/cgroup/memory/
- # mkdir foo
- # cd foo
- # cgroup_event_listener memory.pressure_level low,hierarchy &
- # echo 8000000 > memory.limit_in_bytes
- # echo 8000000 > memory.memsw.limit_in_bytes
- # echo $$ > tasks
- # dd if=/dev/zero | read x
+ # cd /sys/fs/cgroup/memory/
+ # mkdir foo
+ # cd foo
+ # cgroup_event_listener memory.pressure_level low,hierarchy &
+ # echo 8000000 > memory.limit_in_bytes
+ # echo 8000000 > memory.memsw.limit_in_bytes
+ # echo $$ > tasks
+ # dd if=/dev/zero | read x
(Expect a bunch of notifications, and eventually, the oom-killer will
trigger.)
12. TODO
+========
1. Make per-cgroup scanner reclaim not-shared pages first
2. Teach controller to account for shared-pages
@@ -862,11 +971,13 @@ Test:
not yet hit but the usage is getting closer
Summary
+=======
Overall, the memory controller has been a stable controller and has been
commented and discussed quite extensively in the community.
References
+==========
1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
2. Singh, Balbir. Memory Controller (RSS Control),
diff --git a/Documentation/cgroup-v1/net_cls.txt b/Documentation/cgroup-v1/net_cls.rst
index ec182346dea2..a2cf272af7a0 100644
--- a/Documentation/cgroup-v1/net_cls.txt
+++ b/Documentation/cgroup-v1/net_cls.rst
@@ -1,5 +1,6 @@
+=========================
Network classifier cgroup
--------------------------
+=========================
The Network classifier cgroup provides an interface to
tag network packets with a class identifier (classid).
@@ -17,23 +18,27 @@ values is 0xAAAABBBB; AAAA is the major handle number and BBBB
is the minor handle number.
Reading net_cls.classid yields a decimal result.
-Example:
-mkdir /sys/fs/cgroup/net_cls
-mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
-mkdir /sys/fs/cgroup/net_cls/0
-echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid
- - setting a 10:1 handle.
+Example::
-cat /sys/fs/cgroup/net_cls/0/net_cls.classid
-1048577
+ mkdir /sys/fs/cgroup/net_cls
+ mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+ mkdir /sys/fs/cgroup/net_cls/0
+ echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid
-configuring tc:
-tc qdisc add dev eth0 root handle 10: htb
+- setting a 10:1 handle::
-tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
- - creating traffic class 10:1
+ cat /sys/fs/cgroup/net_cls/0/net_cls.classid
+ 1048577
-tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+- configuring tc::
-configuring iptables, basic example:
-iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
+ tc qdisc add dev eth0 root handle 10: htb
+ tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
+
+- creating traffic class 10:1::
+
+ tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example::
+
+ iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroup-v1/net_prio.txt b/Documentation/cgroup-v1/net_prio.rst
index a82cbd28ea8a..b40905871c64 100644
--- a/Documentation/cgroup-v1/net_prio.txt
+++ b/Documentation/cgroup-v1/net_prio.rst
@@ -1,5 +1,6 @@
+=======================
Network priority cgroup
--------------------------
+=======================
The Network priority cgroup provides an interface to allow an administrator to
dynamically set the priority of network traffic generated by various
@@ -14,9 +15,9 @@ SO_PRIORITY socket option. This however, is not always possible because:
This cgroup allows an administrator to assign a process to a group which defines
the priority of egress traffic on a given interface. Network priority groups can
-be created by first mounting the cgroup filesystem.
+be created by first mounting the cgroup filesystem::
-# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
+ # mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
With the above step, the initial group acting as the parent accounting group
becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in
@@ -25,17 +26,18 @@ the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
Each net_prio cgroup contains two files that are subsystem specific
net_prio.prioidx
-This file is read-only, and is simply informative. It contains a unique integer
-value that the kernel uses as an internal representation of this cgroup.
+ This file is read-only, and is simply informative. It contains a unique
+ integer value that the kernel uses as an internal representation of this
+ cgroup.
net_prio.ifpriomap
-This file contains a map of the priorities assigned to traffic originating from
-processes in this group and egressing the system on various interfaces. It
-contains a list of tuples in the form <ifname priority>. Contents of this file
-can be modified by echoing a string into the file using the same tuple format.
-for example:
+ This file contains a map of the priorities assigned to traffic originating
+ from processes in this group and egressing the system on various interfaces.
+ It contains a list of tuples in the form <ifname priority>. Contents of this
+ file can be modified by echoing a string into the file using the same tuple
+ format. For example::
-echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
+ echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
This command would force any traffic originating from processes belonging to the
iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
diff --git a/Documentation/cgroup-v1/pids.txt b/Documentation/cgroup-v1/pids.rst
index e105d708ccde..6acebd9e72c8 100644
--- a/Documentation/cgroup-v1/pids.txt
+++ b/Documentation/cgroup-v1/pids.rst
@@ -1,5 +1,6 @@
- Process Number Controller
- =========================
+=========================
+Process Number Controller
+=========================
Abstract
--------
@@ -34,55 +35,58 @@ pids.current tracks all child cgroup hierarchies, so parent/pids.current is a
superset of parent/child/pids.current.
The pids.events file contains event counters:
+
- max: Number of times fork failed because limit was hit.
Example
-------
-First, we mount the pids controller:
-# mkdir -p /sys/fs/cgroup/pids
-# mount -t cgroup -o pids none /sys/fs/cgroup/pids
+First, we mount the pids controller::
+
+ # mkdir -p /sys/fs/cgroup/pids
+ # mount -t cgroup -o pids none /sys/fs/cgroup/pids
+
+Then we create a hierarchy, set limits and attach processes to it::
-Then we create a hierarchy, set limits and attach processes to it:
-# mkdir -p /sys/fs/cgroup/pids/parent/child
-# echo 2 > /sys/fs/cgroup/pids/parent/pids.max
-# echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-#
+ # mkdir -p /sys/fs/cgroup/pids/parent/child
+ # echo 2 > /sys/fs/cgroup/pids/parent/pids.max
+ # echo $$ > /sys/fs/cgroup/pids/parent/cgroup.procs
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ #
It should be noted that attempts to overcome the set limit (2 in this case) will
-fail:
+fail::
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ # ( /bin/echo "Here's some processes for you." | cat )
+ sh: fork: Resource temporary unavailable
+ #
Even if we migrate to a child cgroup (which doesn't have a set limit), we will
not be able to overcome the most stringent limit in the hierarchy (in this case,
-parent's):
-
-# echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
-# cat /sys/fs/cgroup/pids/parent/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.current
-2
-# cat /sys/fs/cgroup/pids/parent/child/pids.max
-max
-# ( /bin/echo "Here's some processes for you." | cat )
-sh: fork: Resource temporary unavailable
-#
+parent's)::
+
+ # echo $$ > /sys/fs/cgroup/pids/parent/child/cgroup.procs
+ # cat /sys/fs/cgroup/pids/parent/pids.current
+ 2
+ # cat /sys/fs/cgroup/pids/parent/child/pids.current
+ 2
+ # cat /sys/fs/cgroup/pids/parent/child/pids.max
+ max
+ # ( /bin/echo "Here's some processes for you." | cat )
+ sh: fork: Resource temporary unavailable
+ #
We can set a limit that is smaller than pids.current, which will stop any new
processes from being forked at all (note that the shell itself counts towards
-pids.current):
-
-# echo 1 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-# echo 0 > /sys/fs/cgroup/pids/parent/pids.max
-# /bin/echo "We can't even spawn a single process now."
-sh: fork: Resource temporary unavailable
-#
+pids.current)::
+
+ # echo 1 > /sys/fs/cgroup/pids/parent/pids.max
+ # /bin/echo "We can't even spawn a single process now."
+ sh: fork: Resource temporary unavailable
+ # echo 0 > /sys/fs/cgroup/pids/parent/pids.max
+ # /bin/echo "We can't even spawn a single process now."
+ sh: fork: Resource temporary unavailable
+ #
diff --git a/Documentation/cgroup-v1/rdma.txt b/Documentation/cgroup-v1/rdma.rst
index 9bdb7fd03f83..2fcb0a9bf790 100644
--- a/Documentation/cgroup-v1/rdma.txt
+++ b/Documentation/cgroup-v1/rdma.rst
@@ -1,16 +1,17 @@
- RDMA Controller
- ----------------
+===============
+RDMA Controller
+===============
-Contents
---------
+.. Contents
-1. Overview
- 1-1. What is RDMA controller?
- 1-2. Why RDMA controller needed?
- 1-3. How is RDMA controller implemented?
-2. Usage Examples
+ 1. Overview
+ 1-1. What is RDMA controller?
+ 1-2. Why RDMA controller needed?
+ 1-3. How is RDMA controller implemented?
+ 2. Usage Examples
1. Overview
+===========
1-1. What is RDMA controller?
-----------------------------
@@ -83,27 +84,34 @@ what is configured by user for a given cgroup and what is supported by
IB device.
Following resources can be accounted by rdma controller.
+
+ ========== =============================
hca_handle Maximum number of HCA Handles
hca_object Maximum number of HCA Objects
+ ========== =============================
2. Usage Examples
------------------
-
-(a) Configure resource limit:
-echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
-echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
-
-(b) Query resource limit:
-cat /sys/fs/cgroup/rdma/2/rdma.max
-#Output:
-mlx4_0 hca_handle=2 hca_object=2000
-ocrdma1 hca_handle=3 hca_object=max
-
-(c) Query current usage:
-cat /sys/fs/cgroup/rdma/2/rdma.current
-#Output:
-mlx4_0 hca_handle=1 hca_object=20
-ocrdma1 hca_handle=1 hca_object=23
-
-(d) Delete resource limit:
-echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
+=================
+
+(a) Configure resource limit::
+
+ echo mlx4_0 hca_handle=2 hca_object=2000 > /sys/fs/cgroup/rdma/1/rdma.max
+ echo ocrdma1 hca_handle=3 > /sys/fs/cgroup/rdma/2/rdma.max
+
+(b) Query resource limit::
+
+ cat /sys/fs/cgroup/rdma/2/rdma.max
+ #Output:
+ mlx4_0 hca_handle=2 hca_object=2000
+ ocrdma1 hca_handle=3 hca_object=max
+
+(c) Query current usage::
+
+ cat /sys/fs/cgroup/rdma/2/rdma.current
+ #Output:
+ mlx4_0 hca_handle=1 hca_object=20
+ ocrdma1 hca_handle=1 hca_object=23
+
+(d) Delete resource limit::
+
+ echo echo mlx4_0 hca_handle=max hca_object=max > /sys/fs/cgroup/rdma/1/rdma.max
diff --git a/Documentation/core-api/circular-buffers.rst b/Documentation/core-api/circular-buffers.rst
index 53e51caa3347..50966f66e398 100644
--- a/Documentation/core-api/circular-buffers.rst
+++ b/Documentation/core-api/circular-buffers.rst
@@ -3,7 +3,7 @@ Circular Buffers
================
:Author: David Howells <dhowells@redhat.com>
-:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+:Author: Paul E. McKenney <paulmck@linux.ibm.com>
Linux provides a number of features that can be used to implement circular
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
index 93cbeb9daec0..20ee447a50f3 100644
--- a/Documentation/core-api/timekeeping.rst
+++ b/Documentation/core-api/timekeeping.rst
@@ -65,7 +65,7 @@ different format depending on what is required by the user:
.. c:function:: u64 ktime_get_ns( void )
u64 ktime_get_boottime_ns( void )
u64 ktime_get_real_ns( void )
- u64 ktime_get_tai_ns( void )
+ u64 ktime_get_clocktai_ns( void )
u64 ktime_get_raw_ns( void )
Same as the plain ktime_get functions, but returning a u64 number
@@ -99,16 +99,20 @@ Coarse and fast_ns access
Some additional variants exist for more specialized cases:
-.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+.. c:function:: ktime_t ktime_get_coarse( void )
+ ktime_t ktime_get_coarse_boottime( void )
ktime_t ktime_get_coarse_real( void )
ktime_t ktime_get_coarse_clocktai( void )
- ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: u64 ktime_get_coarse_ns( void )
+ u64 ktime_get_coarse_boottime_ns( void )
+ u64 ktime_get_coarse_real_ns( void )
+ u64 ktime_get_coarse_clocktai_ns( void )
.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
void ktime_get_coarse_boottime_ts64( struct timespec64 * )
void ktime_get_coarse_real_ts64( struct timespec64 * )
void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
- void ktime_get_coarse_raw_ts64( struct timespec64 * )
These are quicker than the non-coarse versions, but less accurate,
corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index cb61277e2308..b90dafcc8237 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -12,6 +12,12 @@ physical_package_id:
socket number, but the actual value is architecture and platform
dependent.
+die_id:
+
+ the CPU die ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+
core_id:
the CPU core ID of cpuX. Typically it is the hardware platform's
@@ -30,25 +36,33 @@ drawer_id:
identifier (rather than the kernel's). The actual value is
architecture and platform dependent.
-thread_siblings:
+core_cpus:
- internal kernel map of cpuX's hardware threads within the same
- core as cpuX.
+ internal kernel map of CPUs within the same core.
+ (deprecated name: "thread_siblings")
-thread_siblings_list:
+core_cpus_list:
- human-readable list of cpuX's hardware threads within the same
- core as cpuX.
+ human-readable list of CPUs within the same core.
+ (deprecated name: "thread_siblings_list");
-core_siblings:
+package_cpus:
- internal kernel map of cpuX's hardware threads within the same
- physical_package_id.
+ internal kernel map of the CPUs sharing the same physical_package_id.
+ (deprecated name: "core_siblings")
-core_siblings_list:
+package_cpus_list:
- human-readable list of cpuX's hardware threads within the same
- physical_package_id.
+ human-readable list of CPUs sharing the same physical_package_id.
+ (deprecated name: "core_siblings_list")
+
+die_cpus:
+
+ internal kernel map of CPUs within the same die.
+
+die_cpus_list:
+
+ human-readable list of CPUs within the same die.
book_siblings:
@@ -81,11 +95,13 @@ For an architecture to support this feature, it must define some of
these macros in include/asm-XXX/topology.h::
#define topology_physical_package_id(cpu)
+ #define topology_die_id(cpu)
#define topology_core_id(cpu)
#define topology_book_id(cpu)
#define topology_drawer_id(cpu)
#define topology_sibling_cpumask(cpu)
#define topology_core_cpumask(cpu)
+ #define topology_die_cpumask(cpu)
#define topology_book_cpumask(cpu)
#define topology_drawer_cpumask(cpu)
@@ -99,9 +115,11 @@ provides default definitions for any of the above macros that are
not defined by include/asm-XXX/topology.h:
1) topology_physical_package_id: -1
-2) topology_core_id: 0
-3) topology_sibling_cpumask: just the given CPU
-4) topology_core_cpumask: just the given CPU
+2) topology_die_id: -1
+3) topology_core_id: 0
+4) topology_sibling_cpumask: just the given CPU
+5) topology_core_cpumask: just the given CPU
+6) topology_die_cpumask: just the given CPU
For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
default definitions for topology_book_id() and topology_book_cpumask().
diff --git a/Documentation/crypto/api-samples.rst b/Documentation/crypto/api-samples.rst
index f14afaaf2f32..e923f17bc2bd 100644
--- a/Documentation/crypto/api-samples.rst
+++ b/Documentation/crypto/api-samples.rst
@@ -4,111 +4,89 @@ Code Examples
Code Example For Symmetric Key Cipher Operation
-----------------------------------------------
-::
-
-
- /* tie all data structures together */
- struct skcipher_def {
- struct scatterlist sg;
- struct crypto_skcipher *tfm;
- struct skcipher_request *req;
- struct crypto_wait wait;
- };
-
- /* Perform cipher operation */
- static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
- int enc)
- {
- int rc;
-
- if (enc)
- rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req), &sk->wait);
- else
- rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req), &sk->wait);
-
- if (rc)
- pr_info("skcipher encrypt returned with result %d\n", rc);
+This code encrypts some data with AES-256-XTS. For sake of example,
+all inputs are random bytes, the encryption is done in-place, and it's
+assumed the code is running in a context where it can sleep.
- return rc;
- }
+::
- /* Initialize and trigger cipher operation */
static int test_skcipher(void)
{
- struct skcipher_def sk;
- struct crypto_skcipher *skcipher = NULL;
- struct skcipher_request *req = NULL;
- char *scratchpad = NULL;
- char *ivdata = NULL;
- unsigned char key[32];
- int ret = -EFAULT;
-
- skcipher = crypto_alloc_skcipher("cbc-aes-aesni", 0, 0);
- if (IS_ERR(skcipher)) {
- pr_info("could not allocate skcipher handle\n");
- return PTR_ERR(skcipher);
- }
-
- req = skcipher_request_alloc(skcipher, GFP_KERNEL);
- if (!req) {
- pr_info("could not allocate skcipher request\n");
- ret = -ENOMEM;
- goto out;
- }
-
- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done,
- &sk.wait);
-
- /* AES 256 with random key */
- get_random_bytes(&key, 32);
- if (crypto_skcipher_setkey(skcipher, key, 32)) {
- pr_info("key could not be set\n");
- ret = -EAGAIN;
- goto out;
- }
-
- /* IV will be random */
- ivdata = kmalloc(16, GFP_KERNEL);
- if (!ivdata) {
- pr_info("could not allocate ivdata\n");
- goto out;
- }
- get_random_bytes(ivdata, 16);
-
- /* Input data will be random */
- scratchpad = kmalloc(16, GFP_KERNEL);
- if (!scratchpad) {
- pr_info("could not allocate scratchpad\n");
- goto out;
- }
- get_random_bytes(scratchpad, 16);
-
- sk.tfm = skcipher;
- sk.req = req;
-
- /* We encrypt one block */
- sg_init_one(&sk.sg, scratchpad, 16);
- skcipher_request_set_crypt(req, &sk.sg, &sk.sg, 16, ivdata);
- crypto_init_wait(&sk.wait);
-
- /* encrypt data */
- ret = test_skcipher_encdec(&sk, 1);
- if (ret)
- goto out;
-
- pr_info("Encryption triggered successfully\n");
-
+ struct crypto_skcipher *tfm = NULL;
+ struct skcipher_request *req = NULL;
+ u8 *data = NULL;
+ const size_t datasize = 512; /* data size in bytes */
+ struct scatterlist sg;
+ DECLARE_CRYPTO_WAIT(wait);
+ u8 iv[16]; /* AES-256-XTS takes a 16-byte IV */
+ u8 key[64]; /* AES-256-XTS takes a 64-byte key */
+ int err;
+
+ /*
+ * Allocate a tfm (a transformation object) and set the key.
+ *
+ * In real-world use, a tfm and key are typically used for many
+ * encryption/decryption operations. But in this example, we'll just do a
+ * single encryption operation with it (which is not very efficient).
+ */
+
+ tfm = crypto_alloc_skcipher("xts(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ pr_err("Error allocating xts(aes) handle: %ld\n", PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+
+ get_random_bytes(key, sizeof(key));
+ err = crypto_skcipher_setkey(tfm, key, sizeof(key));
+ if (err) {
+ pr_err("Error setting key: %d\n", err);
+ goto out;
+ }
+
+ /* Allocate a request object */
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Prepare the input data */
+ data = kmalloc(datasize, GFP_KERNEL);
+ if (!data) {
+ err = -ENOMEM;
+ goto out;
+ }
+ get_random_bytes(data, datasize);
+
+ /* Initialize the IV */
+ get_random_bytes(iv, sizeof(iv));
+
+ /*
+ * Encrypt the data in-place.
+ *
+ * For simplicity, in this example we wait for the request to complete
+ * before proceeding, even if the underlying implementation is asynchronous.
+ *
+ * To decrypt instead of encrypt, just change crypto_skcipher_encrypt() to
+ * crypto_skcipher_decrypt().
+ */
+ sg_init_one(&sg, data, datasize);
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+ CRYPTO_TFM_REQ_MAY_SLEEP,
+ crypto_req_done, &wait);
+ skcipher_request_set_crypt(req, &sg, &sg, datasize, iv);
+ err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
+ if (err) {
+ pr_err("Error encrypting data: %d\n", err);
+ goto out;
+ }
+
+ pr_debug("Encryption was successful\n");
out:
- if (skcipher)
- crypto_free_skcipher(skcipher);
- if (req)
+ crypto_free_skcipher(tfm);
skcipher_request_free(req);
- if (ivdata)
- kfree(ivdata);
- if (scratchpad)
- kfree(scratchpad);
- return ret;
+ kfree(data);
+ return err;
}
diff --git a/Documentation/crypto/api-skcipher.rst b/Documentation/crypto/api-skcipher.rst
index 4eec4a93f7e3..20ba08dddf2e 100644
--- a/Documentation/crypto/api-skcipher.rst
+++ b/Documentation/crypto/api-skcipher.rst
@@ -5,7 +5,7 @@ Block Cipher Algorithm Definitions
:doc: Block Cipher Algorithm Definitions
.. kernel-doc:: include/linux/crypto.h
- :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg
+ :functions: crypto_alg ablkcipher_alg blkcipher_alg cipher_alg compress_alg
Symmetric Key Cipher API
------------------------
diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst
index ee8ff0762d7f..3eae1ae7f798 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -208,9 +208,7 @@ the aforementioned cipher types:
- CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
an ECDH or DH implementation
-- CRYPTO_ALG_TYPE_DIGEST Raw message digest
-
-- CRYPTO_ALG_TYPE_HASH Alias for CRYPTO_ALG_TYPE_DIGEST
+- CRYPTO_ALG_TYPE_HASH Raw message digest
- CRYPTO_ALG_TYPE_SHASH Synchronous multi-block hash
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
index 1d56221dfe35..236c674d6897 100644
--- a/Documentation/crypto/crypto_engine.rst
+++ b/Documentation/crypto/crypto_engine.rst
@@ -1,50 +1,85 @@
-=============
-CRYPTO ENGINE
+.. SPDX-License-Identifier: GPL-2.0
+Crypto Engine
=============
Overview
--------
-The crypto engine API (CE), is a crypto queue manager.
+The crypto engine (CE) API is a crypto queue manager.
Requirement
-----------
-You have to put at start of your tfm_ctx the struct crypto_engine_ctx::
+You must put, at the start of your transform context your_tfm_ctx, the structure
+crypto_engine:
+
+::
- struct your_tfm_ctx {
- struct crypto_engine_ctx enginectx;
- ...
- };
+ struct your_tfm_ctx {
+ struct crypto_engine engine;
+ ...
+ };
-Why: Since CE manage only crypto_async_request, it cannot know the underlying
-request_type and so have access only on the TFM.
-So using container_of for accessing __ctx is impossible.
-Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
-so it must assume that crypto_engine_ctx is at start of it.
+The crypto engine only manages asynchronous requests in the form of
+crypto_async_request. It cannot know the underlying request type and thus only
+has access to the transform structure. It is not possible to access the context
+using container_of. In addition, the engine knows nothing about your
+structure "``struct your_tfm_ctx``". The engine assumes (requires) the placement
+of the known member ``struct crypto_engine`` at the beginning.
Order of operations
-------------------
-You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
-And start it via crypto_engine_start().
-
-Before transferring any request, you have to fill the enginectx.
-- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
-- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
-- do_one_request: (taking a function pointer) Do encryption for current request
-
-Note: that those three functions get the crypto_async_request associated with the received request.
-So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
-
-When your driver receive a crypto_request, you have to transfer it to
-the cryptoengine via one of:
-- crypto_transfer_ablkcipher_request_to_engine()
-- crypto_transfer_aead_request_to_engine()
-- crypto_transfer_akcipher_request_to_engine()
-- crypto_transfer_hash_request_to_engine()
-- crypto_transfer_skcipher_request_to_engine()
-
-At the end of the request process, a call to one of the following function is needed:
-- crypto_finalize_ablkcipher_request
-- crypto_finalize_aead_request
-- crypto_finalize_akcipher_request
-- crypto_finalize_hash_request
-- crypto_finalize_skcipher_request
+You are required to obtain a struct crypto_engine via ``crypto_engine_alloc_init()``.
+Start it via ``crypto_engine_start()``. When finished with your work, shut down the
+engine using ``crypto_engine_stop()`` and destroy the engine with
+``crypto_engine_exit()``.
+
+Before transferring any request, you have to fill the context enginectx by
+providing functions for the following:
+
+* ``prepare_crypt_hardware``: Called once before any prepare functions are
+ called.
+
+* ``unprepare_crypt_hardware``: Called once after all unprepare functions have
+ been called.
+
+* ``prepare_cipher_request``/``prepare_hash_request``: Called before each
+ corresponding request is performed. If some processing or other preparatory
+ work is required, do it here.
+
+* ``unprepare_cipher_request``/``unprepare_hash_request``: Called after each
+ request is handled. Clean up / undo what was done in the prepare function.
+
+* ``cipher_one_request``/``hash_one_request``: Handle the current request by
+ performing the operation.
+
+Note that these functions access the crypto_async_request structure
+associated with the received request. You are able to retrieve the original
+request by using:
+
+::
+
+ container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receives a crypto_request, you must to transfer it to
+the crypto engine via one of:
+
+* crypto_transfer_ablkcipher_request_to_engine()
+
+* crypto_transfer_aead_request_to_engine()
+
+* crypto_transfer_akcipher_request_to_engine()
+
+* crypto_transfer_hash_request_to_engine()
+
+* crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following functions is needed:
+
+* crypto_finalize_ablkcipher_request()
+
+* crypto_finalize_aead_request()
+
+* crypto_finalize_akcipher_request()
+
+* crypto_finalize_hash_request()
+
+* crypto_finalize_skcipher_request()
diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
index 6b458bb2440d..f2aab3dc2b52 100644
--- a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
+++ b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
@@ -66,16 +66,3 @@ sha@f8034000 {
dmas = <&dma1 2 17>;
dma-names = "tx";
};
-
-* Eliptic Curve Cryptography (I2C)
-
-Required properties:
-- compatible : must be "atmel,atecc508a".
-- reg: I2C bus address of the device.
-- clock-frequency: must be present in the i2c controller node.
-
-Example:
-atecc508a@c0 {
- compatible = "atmel,atecc508a";
- reg = <0xC0>;
-};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
index 553b92a7e87b..bc6b4b62df83 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
@@ -5,6 +5,7 @@ Required Properties:
"ti,keystone-gpio": for Keystone 2 66AK2H/K, 66AK2L,
66AK2E SoCs
"ti,k2g-gpio", "ti,keystone-gpio": for 66AK2G
+ "ti,am654-gpio", "ti,keystone-gpio": for TI K3 AM654
- reg: Physical base address of the controller and the size of memory mapped
registers.
@@ -145,3 +146,20 @@ gpio0: gpio@260bf00 {
ti,ngpio = <32>;
ti,davinci-gpio-unbanked = <32>;
};
+
+Example for K3 AM654:
+
+wkup_gpio0: wkup_gpio0@42110000 {
+ compatible = "ti,am654-gpio", "ti,keystone-gpio";
+ reg = <0x42110000 0x100>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-parent = <&intr_wkup_gpio>;
+ interrupts = <59 128>, <59 129>, <59 130>, <59 131>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ ti,ngpio = <56>;
+ ti,davinci-gpio-unbanked = <0>;
+ clocks = <&k3_clks 59 0>;
+ clock-names = "gpio";
+};
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
deleted file mode 100644
index 89058d375b7c..000000000000
--- a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-ARM PL061 GPIO controller
-
-Required properties:
-- compatible : "arm,pl061", "arm,primecell"
-- #gpio-cells : Should be two. The first cell is the pin number and the
- second cell is used to specify optional parameters:
- - bit 0 specifies polarity (0 for normal, 1 for inverted)
-- gpio-controller : Marks the device node as a GPIO controller.
-- interrupts : Interrupt mapping for GPIO IRQ.
-- gpio-ranges : Interaction with the PINCTRL subsystem.
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
new file mode 100644
index 000000000000..313b17229247
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/pl061-gpio.yaml
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/pl061-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM PL061 GPIO controller
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+ - Rob Herring <robh@kernel.org>
+
+# We need a select here so we don't match all nodes with 'arm,primecell'
+select:
+ properties:
+ compatible:
+ contains:
+ const: arm,pl061
+ required:
+ - compatible
+
+properties:
+ $nodename:
+ pattern: "^gpio@[0-9a-f]+$"
+
+ compatible:
+ items:
+ - const: arm,pl061
+ - const: arm,primecell
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ oneOf:
+ - maxItems: 1
+ - maxItems: 8
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 2
+
+ clocks:
+ maxItems: 1
+
+ clock-names: true
+
+ "#gpio-cells":
+ const: 2
+
+ gpio-controller: true
+
+ gpio-ranges:
+ maxItems: 8
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-controller
+ - "#interrupt-cells"
+ - clocks
+ - "#gpio-cells"
+ - gpio-controller
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt b/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
index 69da2115abdc..1cf6182f888c 100644
--- a/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
+++ b/Documentation/devicetree/bindings/i3c/cdns,i3c-master.txt
@@ -38,6 +38,6 @@ Example:
nunchuk: nunchuk@52 {
compatible = "nintendo,nunchuk";
- reg = <0x52 0x80000010 0>;
+ reg = <0x52 0x0 0x10>;
};
};
diff --git a/Documentation/devicetree/bindings/i3c/i3c.txt b/Documentation/devicetree/bindings/i3c/i3c.txt
index ab729a0a86ae..4ffe059f0fec 100644
--- a/Documentation/devicetree/bindings/i3c/i3c.txt
+++ b/Documentation/devicetree/bindings/i3c/i3c.txt
@@ -39,7 +39,9 @@ valid here, but several new properties have been added.
New constraint on existing properties:
--------------------------------------
- reg: contains 3 cells
- + first cell : still encoding the I2C address
+ + first cell : still encoding the I2C address. 10 bit addressing is not
+ supported. Devices with 10 bit address can't be properly passed through
+ DEFSLVS command.
+ second cell: shall be 0
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
new file mode 100644
index 000000000000..4e82fd575cec
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
@@ -0,0 +1,29 @@
+Amazon's Annapurna Labs Fabric Interrupt Controller
+
+Required properties:
+
+- compatible: should be "amazon,al-fic"
+- reg: physical base address and size of the registers
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: must be 2.
+ First cell defines the index of the interrupt within the controller.
+ Second cell is used to specify the trigger type and must be one of the
+ following:
+ - bits[3:0] trigger type and level flags
+ 1 = low-to-high edge triggered
+ 4 = active high level-sensitive
+- interrupt-parent: specifies the parent interrupt controller.
+- interrupts: describes which input line in the interrupt parent, this
+ fic's output is connected to. This field property depends on the parent's
+ binding
+
+Example:
+
+amazon_fic: interrupt-controller@0xfd8a8500 {
+ compatible = "amazon,al-fic";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x0 0xfd8a8500 0x0 0x1000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 0x0 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index 1502a51548bb..7d531d5fff29 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -15,6 +15,7 @@ Required properties:
"amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
"amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
"amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
+ "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
- reg : Specifies base physical address and size of the registers.
- interrupt-controller : Identifies the node as an interrupt controller.
- #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
index ab921f1698fb..e13405355166 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/csky,mpintc.txt
@@ -6,11 +6,16 @@ C-SKY Multi-processors Interrupt Controller is designed for ck807/ck810/ck860
SMP soc, and it also could be used in non-SMP system.
Interrupt number definition:
-
0-15 : software irq, and we use 15 as our IPI_IRQ.
16-31 : private irq, and we use 16 as the co-processor timer.
31-1024: common irq for soc ip.
+Interrupt triger mode: (Defined in dt-bindings/interrupt-controller/irq.h)
+ IRQ_TYPE_LEVEL_HIGH (default)
+ IRQ_TYPE_LEVEL_LOW
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+
=============================
intc node bindings definition
=============================
@@ -26,15 +31,22 @@ intc node bindings definition
- #interrupt-cells
Usage: required
Value type: <u32>
- Definition: must be <1>
+ Definition: <2>
- interrupt-controller:
Usage: required
-Examples:
+Examples: ("interrupts = <irq_num IRQ_TYPE_XXX>")
---------
+#include <dt-bindings/interrupt-controller/irq.h>
intc: interrupt-controller {
compatible = "csky,mpintc";
- #interrupt-cells = <1>;
+ #interrupt-cells = <2>;
interrupt-controller;
};
+
+ device: device-example {
+ ...
+ interrupts = <34 IRQ_TYPE_EDGE_RISING>;
+ interrupt-parent = <&intc>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
new file mode 100644
index 000000000000..727b7e4cd6e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,rza1-irqc.txt
@@ -0,0 +1,43 @@
+DT bindings for the Renesas RZ/A1 Interrupt Controller
+
+The RZ/A1 Interrupt Controller is a front-end for the GIC found on Renesas
+RZ/A1 and RZ/A2 SoCs:
+ - IRQ sense select for 8 external interrupts, 1:1-mapped to 8 GIC SPI
+ interrupts,
+ - NMI edge select.
+
+Required properties:
+ - compatible: Must be "renesas,<soctype>-irqc", and "renesas,rza1-irqc" as
+ fallback.
+ Examples with soctypes are:
+ - "renesas,r7s72100-irqc" (RZ/A1H)
+ - "renesas,r7s9210-irqc" (RZ/A2M)
+ - #interrupt-cells: Must be 2 (an interrupt index and flags, as defined
+ in interrupts.txt in this directory)
+ - #address-cells: Must be zero
+ - interrupt-controller: Marks the device as an interrupt controller
+ - reg: Base address and length of the memory resource used by the interrupt
+ controller
+ - interrupt-map: Specifies the mapping from external interrupts to GIC
+ interrupts
+ - interrupt-map-mask: Must be <7 0>
+
+Example:
+
+ irqc: interrupt-controller@fcfef800 {
+ compatible = "renesas,r7s72100-irqc", "renesas,rza1-irqc";
+ #interrupt-cells = <2>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0xfcfef800 0x6>;
+ interrupt-map =
+ <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-map-mask = <7 0>;
+ };
diff --git a/Documentation/devicetree/bindings/leds/leds-lm36274.txt b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
new file mode 100644
index 000000000000..39c230d59a4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm36274.txt
@@ -0,0 +1,85 @@
+* Texas Instruments LM36274 4-Channel LCD Backlight Driver w/Integrated Bias
+
+The LM36274 is an integrated four-channel WLED driver and LCD bias supply.
+The backlight boost provides the power to bias four parallel LED strings with
+up to 29V total output voltage. The 11-bit LED current is programmable via
+the I2C bus and/or controlled via a logic level PWM input from 60 uA to 30 mA.
+
+Parent device properties are documented in
+Documentation/devicetree/bindings/mfd/ti-lmu.txt
+
+Regulator properties are documented in
+Documentation/devicetree/bindings/regulator/lm363x-regulator.txt
+
+Required backlight properties:
+ - compatible:
+ "ti,lm36274-backlight"
+ - reg : 0
+ - #address-cells : 1
+ - #size-cells : 0
+ - led-sources : Indicates which LED strings will be enabled.
+ Values from 0-3, sources is 0 based so strings will be
+ source value + 1.
+
+Optional backlight properties:
+ - label : see Documentation/devicetree/bindings/leds/common.txt
+ - linux,default-trigger :
+ see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+HVLED string 1 and 3 are controlled by control bank A and HVLED 2 string is
+controlled by control bank B.
+
+lm36274@11 {
+ compatible = "ti,lm36274";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11>;
+
+ enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+
+ regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,lm363x-regulator";
+
+ enable-gpios = <&pioC 0 GPIO_ACTIVE_HIGH>,
+ <&pioC 1 GPIO_ACTIVE_HIGH>;
+
+ vboost {
+ regulator-name = "lcd_boost";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <7150000>;
+ regulator-always-on;
+ };
+
+ vpos {
+ regulator-name = "lcd_vpos";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <6500000>;
+ };
+
+ vneg {
+ regulator-name = "lcd_vneg";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <6500000>;
+ };
+ };
+
+ backlight {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,lm36274-backlight";
+
+ led@0 {
+ reg = <0>;
+ led-sources = <0 2>;
+ label = "white:backlight_cluster";
+ linux,default-trigger = "backlight";
+ };
+ };
+};
+
+For more product information please see the link below:
+http://www.ti.com/lit/ds/symlink/lm36274.pdf
diff --git a/Documentation/devicetree/bindings/leds/leds-lm3697.txt b/Documentation/devicetree/bindings/leds/leds-lm3697.txt
new file mode 100644
index 000000000000..63992d732959
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm3697.txt
@@ -0,0 +1,73 @@
+* Texas Instruments - LM3697 Highly Efficient White LED Driver
+
+The LM3697 11-bit LED driver provides high-
+performance backlight dimming for 1, 2, or 3 series
+LED strings while delivering up to 90% efficiency.
+
+This device is suitable for display and keypad lighting
+
+Required properties:
+ - compatible:
+ "ti,lm3697"
+ - reg : I2C slave address
+ - #address-cells : 1
+ - #size-cells : 0
+
+Optional properties:
+ - enable-gpios : GPIO pin to enable/disable the device
+ - vled-supply : LED supply
+
+Required child properties:
+ - reg : 0 - LED is Controlled by bank A
+ 1 - LED is Controlled by bank B
+ - led-sources : Indicates which HVLED string is associated to which
+ control bank. This is a zero based property so
+ HVLED1 = 0, HVLED2 = 1, HVLED3 = 2.
+ Additional information is contained
+ in Documentation/devicetree/bindings/leds/common.txt
+
+Optional child properties:
+ - ti,brightness-resolution - see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+ - ramp-up-us: see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+ - ramp-down-us: see Documentation/devicetree/bindings/mfd/ti-lmu.txt
+ - label : see Documentation/devicetree/bindings/leds/common.txt
+ - linux,default-trigger :
+ see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+HVLED string 1 and 3 are controlled by control bank A and HVLED 2 string is
+controlled by control bank B.
+
+led-controller@36 {
+ compatible = "ti,lm3697";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x36>;
+
+ enable-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ vled-supply = <&vbatt>;
+
+ led@0 {
+ reg = <0>;
+ led-sources = <0 2>;
+ ti,brightness-resolution = <2047>;
+ ramp-up-us = <5000>;
+ ramp-down-us = <1000>;
+ label = "white:first_backlight_cluster";
+ linux,default-trigger = "backlight";
+ };
+
+ led@1 {
+ reg = <1>;
+ led-sources = <1>;
+ ti,brightness-resolution = <255>;
+ ramp-up-us = <500>;
+ ramp-down-us = <1000>;
+ label = "white:second_backlight_cluster";
+ linux,default-trigger = "backlight";
+ };
+}
+
+For more product information please see the link below:
+http://www.ti.com/lit/ds/symlink/lm3697.pdf
diff --git a/Documentation/devicetree/bindings/leds/leds-spi-byte.txt b/Documentation/devicetree/bindings/leds/leds-spi-byte.txt
new file mode 100644
index 000000000000..28b6b2d9091e
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-spi-byte.txt
@@ -0,0 +1,44 @@
+* Single Byte SPI LED Device Driver.
+
+The driver can be used for controllers with a very simple SPI protocol:
+- one LED is controlled by a single byte on MOSI
+- the value of the byte gives the brightness between two values (lowest to
+ highest)
+- no return value is necessary (no MISO signal)
+
+The value for lowest and highest brightness is dependent on the device and
+therefore on the compatible string.
+
+Depending on the compatible string some special functions (like hardware
+accelerated blinking) might can be supported too.
+
+The driver currently only supports one LED. The properties of the LED are
+configured in a sub-node in the device node.
+
+Required properties:
+- compatible: should be one of
+ * "ubnt,acb-spi-led" microcontroller (SONiX 8F26E611LA) based device
+ used for example in Ubiquiti airCube ISP
+
+Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt
+apply.
+
+LED sub-node properties:
+- label:
+ see Documentation/devicetree/bindings/leds/common.txt
+- default-state:
+ see Documentation/devicetree/bindings/leds/common.txt
+ Only "on" and "off" are supported.
+
+Example:
+
+led-controller@0 {
+ compatible = "ubnt,acb-spi-led";
+ reg = <0>;
+ spi-max-frequency = <100000>;
+
+ led {
+ label = "white:status";
+ default-state = "on";
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index 86ca786d54fc..2296b8f24de4 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -8,7 +8,7 @@ TI LMU driver supports lighting devices below.
LM3632 Backlight and regulator
LM3633 Backlight, LED and fault monitor
LM3695 Backlight
- LM3697 Backlight and fault monitor
+ LM36274 Backlight and regulator
Required properties:
- compatible: Should be one of:
@@ -16,15 +16,32 @@ Required properties:
"ti,lm3632"
"ti,lm3633"
"ti,lm3695"
- "ti,lm3697"
+ "ti,lm36274"
- reg: I2C slave address.
0x11 for LM3632
0x29 for LM3631
- 0x36 for LM3633, LM3697
+ 0x36 for LM3633
0x63 for LM3695
+ 0x11 for LM36274
-Optional property:
+Optional properties:
- enable-gpios: A GPIO specifier for hardware enable pin.
+ - ramp-up-us: Current ramping from one brightness level to
+ the a higher brightness level.
+ Range from 2048 us - 117.44 s
+ - ramp-down-us: Current ramping from one brightness level to
+ the a lower brightness level.
+ Range from 2048 us - 117.44 s
+ - ti,brightness-resolution - This determines whether to use 8 bit brightness
+ mode or 11 bit brightness mode. If this value is
+ not set the device is defaulted to the preferred
+ 8bit brightness mode per 7.3.4.1 of the data
+ sheet. This setting can either be in the parent
+ node or as part of the LED child nodes. This
+ is determined by the part itself if the strings
+ have a common brightness register or individual
+ brightness registers.
+ The values are 255 (8bit) or 2047 (11bit).
Required node:
- backlight: All LMU devices have backlight child nodes.
@@ -35,14 +52,15 @@ Optional nodes:
Required properties:
- compatible: Should be one of:
"ti,lm3633-fault-monitor"
- "ti,lm3697-fault-monitor"
- leds: LED properties for LM3633. Please refer to [2].
+ LED properties for LM36274. Please refer to [4].
- regulators: Regulator properties for LM3631 and LM3632.
Please refer to [3].
[1] ../leds/backlight/ti-lmu-backlight.txt
[2] ../leds/leds-lm3633.txt
[3] ../regulator/lm363x-regulator.txt
+[4] ../leds/leds-lm36274.txt
lm3631@29 {
compatible = "ti,lm3631";
@@ -90,7 +108,7 @@ lm3631@29 {
lcd_bl {
led-sources = <0 1>;
- ramp-up-msec = <300>;
+ ramp-up-us = <300000>;
};
};
};
@@ -152,15 +170,15 @@ lm3633@36 {
main {
label = "main_lcd";
led-sources = <1 2>;
- ramp-up-msec = <500>;
- ramp-down-msec = <500>;
+ ramp-up-us = <500000>;
+ ramp-down-us = <500000>;
};
front {
label = "front_lcd";
led-sources = <0>;
- ramp-up-msec = <1000>;
- ramp-down-msec = <0>;
+ ramp-up-us = <1000000>;
+ ramp-down-us = <0>;
};
};
@@ -201,23 +219,51 @@ lm3695@63 {
};
};
-lm3697@36 {
- compatible = "ti,lm3697";
- reg = <0x36>;
+lm36274@11 {
+ compatible = "ti,lm36274";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11>;
enable-gpios = <&pioC 2 GPIO_ACTIVE_HIGH>;
+ regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,lm363x-regulator";
- backlight {
- compatible = "ti,lm3697-backlight";
+ enable-gpios = <&pioC 0 GPIO_ACTIVE_HIGH>,
+ <&pioC 1 GPIO_ACTIVE_HIGH>;
- lcd {
- led-sources = <0 1 2>;
- ramp-up-msec = <200>;
- ramp-down-msec = <200>;
+ vboost {
+ regulator-name = "lcd_boost";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <7150000>;
+ regulator-always-on;
+ };
+
+ vpos {
+ regulator-name = "lcd_vpos";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <6500000>;
+ };
+
+ vneg {
+ regulator-name = "lcd_vneg";
+ regulator-min-microvolt = <4000000>;
+ regulator-max-microvolt = <6500000>;
};
};
- fault-monitor {
- compatible = "ti,lm3697-fault-monitor";
+ backlight {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,lm36274-backlight";
+
+ led@0 {
+ reg = <0>;
+ led-sources = <0 2>;
+ label = "white:backlight_cluster";
+ linux,default-trigger = "backlight";
+ };
};
};
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
new file mode 100644
index 000000000000..d77e3f26f9e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -0,0 +1,21 @@
+* Freescale(NXP) IMX8 DDR performance monitor
+
+Required properties:
+
+- compatible: should be one of:
+ "fsl,imx8-ddr-pmu"
+ "fsl,imx8m-ddr-pmu"
+
+- reg: physical address and size
+
+- interrupts: single interrupt
+ generated by the control block
+
+Example:
+
+ ddr-pmu@5c020000 {
+ compatible = "fsl,imx8-ddr-pmu";
+ reg = <0x5c020000 0x10000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
index 7d9d3f90641b..493bec80d59b 100644
--- a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
@@ -2,10 +2,7 @@ Ingenic JZ47xx PWM Controller
=============================
Required properties:
-- compatible: One of:
- * "ingenic,jz4740-pwm"
- * "ingenic,jz4770-pwm"
- * "ingenic,jz4780-pwm"
+- compatible: Should be "ingenic,jz4740-pwm"
- #pwm-cells: Should be 3. See pwm.txt in this directory for a description
of the cells format.
- clocks : phandle to the external clock.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.txt b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
new file mode 100644
index 000000000000..36447e3c9378
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.txt
@@ -0,0 +1,33 @@
+SiFive PWM controller
+
+Unlike most other PWM controllers, the SiFive PWM controller currently only
+supports one period for all channels in the PWM. All PWMs need to run at
+the same period. The period also has significant restrictions on the values
+it can achieve, which the driver rounds to the nearest achievable period.
+PWM RTL that corresponds to the IP block version numbers can be found
+here:
+
+https://github.com/sifive/sifive-blocks/tree/master/src/main/scala/devices/pwm
+
+Required properties:
+- compatible: Should be "sifive,<chip>-pwm" and "sifive,pwm<version>".
+ Supported compatible strings are: "sifive,fu540-c000-pwm" for the SiFive
+ PWM v0 as integrated onto the SiFive FU540 chip, and "sifive,pwm0" for the
+ SiFive PWM v0 IP block with no chip integration tweaks.
+ Please refer to sifive-blocks-ip-versioning.txt for details.
+- reg: physical base address and length of the controller's registers
+- clocks: Should contain a clock identifier for the PWM's parent clock.
+- #pwm-cells: Should be 3. See pwm.txt in this directory
+ for a description of the cell format.
+- interrupts: one interrupt per PWM channel
+
+Examples:
+
+pwm: pwm@10020000 {
+ compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
+ reg = <0x0 0x10020000 0x0 0x1000>;
+ clocks = <&tlclk>;
+ interrupt-parent = <&plic>;
+ interrupts = <42 43 44 45>;
+ #pwm-cells = <3>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
index bd23302e84be..6521bc44a74e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
@@ -11,8 +11,10 @@ Required parameters:
bindings defined in pwm.txt.
Optional properties:
-- pinctrl-names: Set to "default".
-- pinctrl-0: Phandle pointing to pin configuration node for PWM.
+- pinctrl-names: Set to "default". An additional "sleep" state can be
+ defined to set pins in sleep state when in low power.
+- pinctrl-n: Phandle(s) pointing to pin configuration node for PWM,
+ respectively for "default" and "sleep" states.
Example:
timer@40002400 {
@@ -21,7 +23,8 @@ Example:
pwm {
compatible = "st,stm32-pwm-lp";
#pwm-cells = <3>;
- pinctrl-names = "default";
+ pinctrl-names = "default", "sleep";
pinctrl-0 = <&lppwm1_pins>;
+ pinctrl-1 = <&lppwm1_sleep_pins>;
};
};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
index 3e6d55018d7a..a8690bfa5e1f 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
@@ -8,6 +8,8 @@ Required parameters:
- pinctrl-names: Set to "default".
- pinctrl-0: List of phandles pointing to pin configuration nodes for PWM module.
For Pinctrl properties see ../pinctrl/pinctrl-bindings.txt
+- #pwm-cells: Should be set to 3. This PWM chip uses the default 3 cells
+ bindings defined in pwm.txt.
Optional parameters:
- st,breakinput: One or two <index level filter> to describe break input configurations.
@@ -28,6 +30,7 @@ Example:
pwm {
compatible = "st,stm32-pwm";
+ #pwm-cells = <3>;
pinctrl-0 = <&pwm1_pins>;
pinctrl-names = "default";
st,breakinput = <0 1 5>;
diff --git a/Documentation/devicetree/bindings/regulator/arizona-regulator.txt b/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
index 443564d7784f..69bf41949b01 100644
--- a/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/arizona-regulator.txt
@@ -5,7 +5,8 @@ of analogue I/O.
This document lists regulator specific bindings, see the primary binding
document:
- ../mfd/arizona.txt
+ For Wolfson Microelectronic Arizona codecs: ../mfd/arizona.txt
+ For Cirrus Logic Madera codecs: ../mfd/madera.txt
Optional properties:
- wlf,ldoena : GPIO specifier for the GPIO controlling LDOENA
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
index d289c2f7455a..a650b457085d 100644
--- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
@@ -12,10 +12,13 @@ maintainers:
description:
Any property defined as part of the core regulator binding, defined in
- regulator.txt, can also be used. However a fixed voltage regulator is
+ regulator.yaml, can also be used. However a fixed voltage regulator is
expected to have the regulator-min-microvolt and regulator-max-microvolt
to be the same.
+allOf:
+ - $ref: "regulator.yaml#"
+
properties:
compatible:
const: regulator-fixed
diff --git a/Documentation/devicetree/bindings/regulator/gpio-regulator.txt b/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
deleted file mode 100644
index dd25e73b5d79..000000000000
--- a/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-GPIO controlled regulators
-
-Required properties:
-- compatible : Must be "regulator-gpio".
-- regulator-name : Defined in regulator.txt as optional, but required
- here.
-- gpios : Array of one or more GPIO pins used to select the
- regulator voltage/current listed in "states".
-- states : Selection of available voltages/currents provided by
- this regulator and matching GPIO configurations to
- achieve them. If there are no states in the "states"
- array, use a fixed regulator instead.
-
-Optional properties:
-- enable-gpios : GPIO used to enable/disable the regulator.
- Warning, the GPIO phandle flags are ignored and the
- GPIO polarity is controlled solely by the presence
- of "enable-active-high" DT property. This is due to
- compatibility with old DTs.
-- enable-active-high : Polarity of "enable-gpio" GPIO is active HIGH.
- Default is active LOW.
-- gpios-states : On operating systems, that don't support reading back
- gpio values in output mode (most notably linux), this
- array provides the state of GPIO pins set when
- requesting them from the gpio controller. Systems,
- that are capable of preserving state when requesting
- the lines, are free to ignore this property.
- 0: LOW, 1: HIGH. Default is LOW if nothing else
- is specified.
-- startup-delay-us : Startup time in microseconds.
-- regulator-type : Specifies what is being regulated, must be either
- "voltage" or "current", defaults to voltage.
-
-Any property defined as part of the core regulator binding defined in
-regulator.txt can also be used.
-
-Example:
-
- mmciv: gpio-regulator {
- compatible = "regulator-gpio";
-
- regulator-name = "mmci-gpio-supply";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <2600000>;
- regulator-boot-on;
-
- enable-gpios = <&gpio0 23 0x4>;
- gpios = <&gpio0 24 0x4
- &gpio0 25 0x4>;
- states = <1800000 0x3
- 2200000 0x2
- 2600000 0x1
- 2900000 0x0>;
-
- startup-delay-us = <100000>;
- enable-active-high;
- };
diff --git a/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml b/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml
new file mode 100644
index 000000000000..9d3b28417fb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/gpio-regulator.yaml
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/gpio-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO controlled regulators
+
+maintainers:
+ - Liam Girdwood <lgirdwood@gmail.com>
+ - Mark Brown <broonie@kernel.org>
+
+description:
+ Any property defined as part of the core regulator binding, defined in
+ regulator.txt, can also be used.
+
+allOf:
+ - $ref: "regulator.yaml#"
+
+properties:
+ compatible:
+ const: regulator-gpio
+
+ regulator-name: true
+
+ enable-gpios:
+ description: GPIO to use to enable/disable the regulator.
+ Warning, the GPIO phandle flags are ignored and the GPIO polarity is
+ controlled solely by the presence of "enable-active-high" DT property.
+ This is due to compatibility with old DTs.
+ maxItems: 1
+
+ gpios:
+ description: Array of one or more GPIO pins used to select the regulator
+ voltage/current listed in "states".
+ minItems: 1
+ maxItems: 8 # Should be enough...
+
+ gpios-states:
+ description: |
+ On operating systems, that don't support reading back gpio values in
+ output mode (most notably linux), this array provides the state of GPIO
+ pins set when requesting them from the gpio controller. Systems, that are
+ capable of preserving state when requesting the lines, are free to ignore
+ this property.
+ 0: LOW
+ 1: HIGH
+ Default is LOW if nothing else is specified.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ - maxItems: 8
+ items:
+ enum: [ 0, 1 ]
+ default: 0
+
+ states:
+ description: Selection of available voltages/currents provided by this
+ regulator and matching GPIO configurations to achieve them. If there are
+ no states in the "states" array, use a fixed regulator instead.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ - maxItems: 8
+ items:
+ items:
+ - description: Voltage in microvolts
+ - description: GPIO group state value
+
+ startup-delay-us:
+ description: startup time in microseconds
+
+ enable-active-high:
+ description: Polarity of "enable-gpio" GPIO is active HIGH. Default is
+ active LOW.
+ type: boolean
+
+ gpio-open-drain:
+ description:
+ GPIO is open drain type. If this property is missing then default
+ assumption is false.
+ type: boolean
+
+ regulator-type:
+ description: Specifies what is being regulated.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/string
+ - enum:
+ - voltage
+ - current
+ default: voltage
+
+required:
+ - compatible
+ - regulator-name
+ - gpios
+ - states
+
+examples:
+ - |
+ gpio-regulator {
+ compatible = "regulator-gpio";
+
+ regulator-name = "mmci-gpio-supply";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2600000>;
+ regulator-boot-on;
+
+ enable-gpios = <&gpio0 23 0x4>;
+ gpios = <&gpio0 24 0x4
+ &gpio0 25 0x4>;
+ states = <1800000 0x3>,
+ <2200000 0x2>,
+ <2600000 0x1>,
+ <2900000 0x0>;
+
+ startup-delay-us = <100000>;
+ enable-active-high;
+ };
+...
diff --git a/Documentation/devicetree/bindings/regulator/max8660.txt b/Documentation/devicetree/bindings/regulator/max8660.txt
deleted file mode 100644
index 8ba994d8a142..000000000000
--- a/Documentation/devicetree/bindings/regulator/max8660.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Maxim MAX8660 voltage regulator
-
-Required properties:
-- compatible: must be one of "maxim,max8660", "maxim,max8661"
-- reg: I2C slave address, usually 0x34
-- any required generic properties defined in regulator.txt
-
-Example:
-
- i2c_master {
- max8660@34 {
- compatible = "maxim,max8660";
- reg = <0x34>;
-
- regulators {
- regulator@0 {
- regulator-compatible= "V3(DCDC)";
- regulator-min-microvolt = <725000>;
- regulator-max-microvolt = <1800000>;
- };
-
- regulator@1 {
- regulator-compatible= "V4(DCDC)";
- regulator-min-microvolt = <725000>;
- regulator-max-microvolt = <1800000>;
- };
-
- regulator@2 {
- regulator-compatible= "V5(LDO)";
- regulator-min-microvolt = <1700000>;
- regulator-max-microvolt = <2000000>;
- };
-
- regulator@3 {
- regulator-compatible= "V6(LDO)";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3300000>;
- };
-
- regulator@4 {
- regulator-compatible= "V7(LDO)";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <3300000>;
- };
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/regulator/max8660.yaml b/Documentation/devicetree/bindings/regulator/max8660.yaml
new file mode 100644
index 000000000000..9c038698f880
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8660.yaml
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/max8660.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Maxim MAX8660 voltage regulator
+
+maintainers:
+ - Daniel Mack <zonque@gmail.com>
+
+properties:
+ $nodename:
+ pattern: "pmic@[0-9a-f]{1,2}"
+ compatible:
+ enum:
+ - maxim,max8660
+ - maxim,max8661
+
+ reg:
+ maxItems: 1
+
+ regulators:
+ type: object
+
+ patternProperties:
+ "regulator-.+":
+ $ref: "regulator.yaml#"
+
+ additionalProperties: false
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@34 {
+ compatible = "maxim,max8660";
+ reg = <0x34>;
+
+ regulators {
+ regulator-V3 {
+ regulator-compatible= "V3(DCDC)";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ regulator-V4 {
+ regulator-compatible= "V4(DCDC)";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ regulator-V5 {
+ regulator-compatible= "V5(LDO)";
+ regulator-min-microvolt = <1700000>;
+ regulator-max-microvolt = <2000000>;
+ };
+
+ regulator-V6 {
+ regulator-compatible= "V6(LDO)";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ regulator-V7 {
+ regulator-compatible= "V7(LDO)";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+ };
+ };
+ };
+...
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index 406f2e570c50..430b8622bda1 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -4,11 +4,13 @@ Qualcomm SPMI Regulators
Usage: required
Value type: <string>
Definition: must be one of:
+ "qcom,pm8005-regulators"
"qcom,pm8841-regulators"
"qcom,pm8916-regulators"
"qcom,pm8941-regulators"
"qcom,pm8994-regulators"
"qcom,pmi8994-regulators"
+ "qcom,pms405-regulators"
- interrupts:
Usage: optional
@@ -110,6 +112,23 @@ Qualcomm SPMI Regulators
Definition: Reference to regulator supplying the input pin, as
described in the data sheet.
+- vdd_l1_l2-supply:
+- vdd_l3_l8-supply:
+- vdd_l4-supply:
+- vdd_l5_l6-supply:
+- vdd_l10_l11_l12_l13-supply:
+- vdd_l7-supply:
+- vdd_l9-supply:
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply
+ Usage: optional (pms405 only)
+ Value type: <phandle>
+ Definition: Reference to regulator supplying the input pin, as
+ described in the data sheet.
+
- qcom,saw-reg:
Usage: optional
Value type: <phandle>
@@ -120,6 +139,9 @@ The regulator node houses sub-nodes for each regulator within the device. Each
sub-node is identified using the node's name, with valid values listed for each
of the PMICs below.
+pm8005:
+ s1, s2, s3, s4
+
pm8841:
s1, s2, s3, s4, s5, s6, s7, s8
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index 0a3f087d5844..487ccd8370b3 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -1,139 +1 @@
-Voltage/Current Regulators
-
-Optional properties:
-- regulator-name: A string used as a descriptive name for regulator outputs
-- regulator-min-microvolt: smallest voltage consumers may set
-- regulator-max-microvolt: largest voltage consumers may set
-- regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops
-- regulator-min-microamp: smallest current consumers may set
-- regulator-max-microamp: largest current consumers may set
-- regulator-input-current-limit-microamp: maximum input current regulator allows
-- regulator-always-on: boolean, regulator should never be disabled
-- regulator-boot-on: bootloader/firmware enabled regulator
-- regulator-allow-bypass: allow the regulator to go into bypass mode
-- regulator-allow-set-load: allow the regulator performance level to be configured
-- <name>-supply: phandle to the parent supply/regulator node
-- regulator-ramp-delay: ramp delay for regulator(in uV/us)
- For hardware which supports disabling ramp rate, it should be explicitly
- initialised to zero (regulator-ramp-delay = <0>) for disabling ramp delay.
-- regulator-enable-ramp-delay: The time taken, in microseconds, for the supply
- rail to reach the target voltage, plus/minus whatever tolerance the board
- design requires. This property describes the total system ramp time
- required due to the combination of internal ramping of the regulator itself,
- and board design issues such as trace capacitance and load on the supply.
-- regulator-settling-time-us: Settling time, in microseconds, for voltage
- change if regulator have the constant time for any level voltage change.
- This is useful when regulator have exponential voltage change.
-- regulator-settling-time-up-us: Settling time, in microseconds, for voltage
- increase if the regulator needs a constant time to settle after voltage
- increases of any level. This is useful for regulators with exponential
- voltage changes.
-- regulator-settling-time-down-us: Settling time, in microseconds, for voltage
- decrease if the regulator needs a constant time to settle after voltage
- decreases of any level. This is useful for regulators with exponential
- voltage changes.
-- regulator-soft-start: Enable soft start so that voltage ramps slowly
-- regulator-state-standby sub-root node for Standby mode
- : equivalent with standby Linux sleep state, which provides energy savings
- with a relatively quick transition back time.
-- regulator-state-mem sub-root node for Suspend-to-RAM mode
- : suspend to memory, the device goes to sleep, but all data stored in memory,
- only some external interrupt can wake the device.
-- regulator-state-disk sub-root node for Suspend-to-DISK mode
- : suspend to disk, this state operates similarly to Suspend-to-RAM,
- but includes a final step of writing memory contents to disk.
-- regulator-state-[mem/disk/standby] node has following common properties:
- - regulator-on-in-suspend: regulator should be on in suspend state.
- - regulator-off-in-suspend: regulator should be off in suspend state.
- - regulator-suspend-min-microvolt: minimum voltage may be set in
- suspend state.
- - regulator-suspend-max-microvolt: maximum voltage may be set in
- suspend state.
- - regulator-suspend-microvolt: the default voltage which regulator
- would be set in suspend. This property is now deprecated, instead
- setting voltage for suspend mode via the API which regulator
- driver provides is recommended.
- - regulator-changeable-in-suspend: whether the default voltage and
- the regulator on/off in suspend can be changed in runtime.
- - regulator-mode: operating mode in the given suspend state.
- The set of possible operating modes depends on the capabilities of
- every hardware so the valid modes are documented on each regulator
- device tree binding document.
-- regulator-initial-mode: initial operating mode. The set of possible operating
- modes depends on the capabilities of every hardware so each device binding
- documentation explains which values the regulator supports.
-- regulator-allowed-modes: list of operating modes that software is allowed to
- configure for the regulator at run-time. Elements may be specified in any
- order. The set of possible operating modes depends on the capabilities of
- every hardware so each device binding document explains which values the
- regulator supports.
-- regulator-system-load: Load in uA present on regulator that is not captured by
- any consumer request.
-- regulator-pull-down: Enable pull down resistor when the regulator is disabled.
-- regulator-over-current-protection: Enable over current protection.
-- regulator-active-discharge: tristate, enable/disable active discharge of
- regulators. The values are:
- 0: Disable active discharge.
- 1: Enable active discharge.
- Absence of this property will leave configuration to default.
-- regulator-coupled-with: Regulators with which the regulator
- is coupled. The linkage is 2-way - all coupled regulators should be linked
- with each other. A regulator should not be coupled with its supplier.
-- regulator-coupled-max-spread: Array of maximum spread between voltages of
- coupled regulators in microvolts, each value in the array relates to the
- corresponding couple specified by the regulator-coupled-with property.
-- regulator-max-step-microvolt: Maximum difference between current and target
- voltages that can be changed safely in a single step.
-
-Deprecated properties:
-- regulator-compatible: If a regulator chip contains multiple
- regulators, and if the chip's binding contains a child node that
- describes each regulator, then this property indicates which regulator
- this child node is intended to configure. If this property is missing,
- the node's name will be used instead.
-
-Example:
-
- xyzreg: regulator@0 {
- regulator-min-microvolt = <1000000>;
- regulator-max-microvolt = <2500000>;
- regulator-always-on;
- vin-supply = <&vin>;
-
- regulator-state-mem {
- regulator-on-in-suspend;
- };
- };
-
-Regulator Consumers:
-Consumer nodes can reference one or more of its supplies/
-regulators using the below bindings.
-
-- <name>-supply: phandle to the regulator node
-
-These are the same bindings that a regulator in the above
-example used to reference its own supply, in which case
-its just seen as a special case of a regulator being a
-consumer itself.
-
-Example of a consumer device node (mmc) referencing two
-regulators (twl_reg1 and twl_reg2),
-
- twl_reg1: regulator@0 {
- ...
- ...
- ...
- };
-
- twl_reg2: regulator@1 {
- ...
- ...
- ...
- };
-
- mmc: mmc@0 {
- ...
- ...
- vmmc-supply = <&twl_reg1>;
- vmmcaux-supply = <&twl_reg2>;
- };
+This file has moved to regulator.yaml.
diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml
new file mode 100644
index 000000000000..02c3043ce419
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Voltage/Current Regulators
+
+maintainers:
+ - Liam Girdwood <lgirdwood@gmail.com>
+ - Mark Brown <broonie@kernel.org>
+
+properties:
+ regulator-name:
+ description: A string used as a descriptive name for regulator outputs
+ $ref: "/schemas/types.yaml#/definitions/string"
+
+ regulator-min-microvolt:
+ description: smallest voltage consumers may set
+
+ regulator-max-microvolt:
+ description: largest voltage consumers may set
+
+ regulator-microvolt-offset:
+ description: Offset applied to voltages to compensate for voltage drops
+
+ regulator-min-microamp:
+ description: smallest current consumers may set
+
+ regulator-max-microamp:
+ description: largest current consumers may set
+
+ regulator-input-current-limit-microamp:
+ description: maximum input current regulator allows
+
+ regulator-always-on:
+ description: boolean, regulator should never be disabled
+ type: boolean
+
+ regulator-boot-on:
+ description: bootloader/firmware enabled regulator
+ type: boolean
+
+ regulator-allow-bypass:
+ description: allow the regulator to go into bypass mode
+ type: boolean
+
+ regulator-allow-set-load:
+ description: allow the regulator performance level to be configured
+ type: boolean
+
+ regulator-ramp-delay:
+ description: ramp delay for regulator(in uV/us) For hardware which supports
+ disabling ramp rate, it should be explicitly initialised to zero (regulator-ramp-delay
+ = <0>) for disabling ramp delay.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ regulator-enable-ramp-delay:
+ description: The time taken, in microseconds, for the supply rail to
+ reach the target voltage, plus/minus whatever tolerance the board
+ design requires. This property describes the total system ramp time
+ required due to the combination of internal ramping of the regulator
+ itself, and board design issues such as trace capacitance and load
+ on the supply.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ regulator-settling-time-us:
+ description: Settling time, in microseconds, for voltage change if regulator
+ have the constant time for any level voltage change. This is useful
+ when regulator have exponential voltage change.
+
+ regulator-settling-time-up-us:
+ description: Settling time, in microseconds, for voltage increase if
+ the regulator needs a constant time to settle after voltage increases
+ of any level. This is useful for regulators with exponential voltage
+ changes.
+
+ regulator-settling-time-down-us:
+ description: Settling time, in microseconds, for voltage decrease if
+ the regulator needs a constant time to settle after voltage decreases
+ of any level. This is useful for regulators with exponential voltage
+ changes.
+
+ regulator-soft-start:
+ description: Enable soft start so that voltage ramps slowly
+ type: boolean
+
+ regulator-initial-mode:
+ description: initial operating mode. The set of possible operating modes
+ depends on the capabilities of every hardware so each device binding
+ documentation explains which values the regulator supports.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ regulator-allowed-modes:
+ description: list of operating modes that software is allowed to configure
+ for the regulator at run-time. Elements may be specified in any order.
+ The set of possible operating modes depends on the capabilities of
+ every hardware so each device binding document explains which values
+ the regulator supports.
+ $ref: "/schemas/types.yaml#/definitions/uint32-array"
+
+ regulator-system-load:
+ description: Load in uA present on regulator that is not captured by
+ any consumer request.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ regulator-pull-down:
+ description: Enable pull down resistor when the regulator is disabled.
+ type: boolean
+
+ regulator-over-current-protection:
+ description: Enable over current protection.
+ type: boolean
+
+ regulator-active-discharge:
+ description: |
+ tristate, enable/disable active discharge of regulators. The values are:
+ 0: Disable active discharge.
+ 1: Enable active discharge.
+ Absence of this property will leave configuration to default.
+ allOf:
+ - $ref: "/schemas/types.yaml#/definitions/uint32"
+ - enum: [ 0, 1 ]
+
+ regulator-coupled-with:
+ description: Regulators with which the regulator is coupled. The linkage
+ is 2-way - all coupled regulators should be linked with each other.
+ A regulator should not be coupled with its supplier.
+ $ref: "/schemas/types.yaml#/definitions/phandle-array"
+
+ regulator-coupled-max-spread:
+ description: Array of maximum spread between voltages of coupled regulators
+ in microvolts, each value in the array relates to the corresponding
+ couple specified by the regulator-coupled-with property.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ regulator-max-step-microvolt:
+ description: Maximum difference between current and target voltages
+ that can be changed safely in a single step.
+
+patternProperties:
+ ".*-supply$":
+ description: Input supply phandle(s) for this node
+
+ regulator-state-(standby|mem|disk):
+ type: object
+ description:
+ sub-nodes for regulator state in Standby, Suspend-to-RAM, and
+ Suspend-to-DISK modes. Equivalent with standby, mem, and disk Linux
+ sleep states.
+
+ properties:
+ regulator-on-in-suspend:
+ description: regulator should be on in suspend state.
+ type: boolean
+
+ regulator-off-in-suspend:
+ description: regulator should be off in suspend state.
+ type: boolean
+
+ regulator-suspend-min-microvolt:
+ description: minimum voltage may be set in suspend state.
+
+ regulator-suspend-max-microvolt:
+ description: maximum voltage may be set in suspend state.
+
+ regulator-suspend-microvolt:
+ description: the default voltage which regulator would be set in
+ suspend. This property is now deprecated, instead setting voltage
+ for suspend mode via the API which regulator driver provides is
+ recommended.
+
+ regulator-changeable-in-suspend:
+ description: whether the default voltage and the regulator on/off
+ in suspend can be changed in runtime.
+ type: boolean
+
+ regulator-mode:
+ description: operating mode in the given suspend state. The set
+ of possible operating modes depends on the capabilities of every
+ hardware so the valid modes are documented on each regulator device
+ tree binding document.
+ $ref: "/schemas/types.yaml#/definitions/uint32"
+
+ additionalProperties: false
+
+examples:
+ - |
+ xyzreg: regulator@0 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ vin-supply = <&vin>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/regulator/slg51000.txt b/Documentation/devicetree/bindings/regulator/slg51000.txt
new file mode 100644
index 000000000000..aa0733e49b90
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/slg51000.txt
@@ -0,0 +1,88 @@
+* Dialog Semiconductor SLG51000 Voltage Regulator
+
+Required properties:
+- compatible : Should be "dlg,slg51000" for SLG51000
+- reg : Specifies the I2C slave address.
+- xxx-supply: Input voltage supply regulator for ldo3 to ldo7.
+ These entries are required if regulators are enabled for a device.
+ An absence of these properties can cause the regulator registration to fail.
+ If some of input supply is powered through battery or always-on supply then
+ also it is required to have these parameters with proper node handle of always
+ on power supply.
+ vin3-supply: Input supply for ldo3
+ vin4-supply: Input supply for ldo4
+ vin5-supply: Input supply for ldo5
+ vin6-supply: Input supply for ldo6
+ vin7-supply: Input supply for ldo7
+
+Optional properties:
+- interrupt-parent : Specifies the reference to the interrupt controller.
+- interrupts : IRQ line information.
+- dlg,cs-gpios : Specify a valid GPIO for chip select
+
+Sub-nodes:
+- regulators : This node defines the settings for the regulators.
+ The content of the sub-node is defined by the standard binding
+ for regulators; see regulator.txt.
+
+ The SLG51000 regulators are bound using their names listed below:
+ ldo1
+ ldo2
+ ldo3
+ ldo4
+ ldo5
+ ldo6
+ ldo7
+
+Optional properties for regulators:
+- enable-gpios : Specify a valid GPIO for platform control of the regulator.
+
+Example:
+ pmic: slg51000@75 {
+ compatible = "dlg,slg51000";
+ reg = <0x75>;
+
+ regulators {
+ ldo1 {
+ regulator-name = "ldo1";
+ regulator-min-microvolt = <2400000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo2 {
+ regulator-name = "ldo2";
+ regulator-min-microvolt = <2400000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo3 {
+ regulator-name = "ldo3";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3750000>;
+ };
+
+ ldo4 {
+ regulator-name = "ldo4";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3750000>;
+ };
+
+ ldo5 {
+ regulator-name = "ldo5";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ ldo6 {
+ regulator-name = "ldo6";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ ldo7 {
+ regulator-name = "ldo7";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3750000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt b/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
new file mode 100644
index 000000000000..479ad4c8758e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.txt
@@ -0,0 +1,18 @@
+STM32 BOOSTER - Booster for ADC analog input switches
+
+Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
+to supply ADC analog input switches.
+
+Required properties:
+- compatible: Should be one of:
+ "st,stm32h7-booster"
+ "st,stm32mp1-booster"
+- st,syscfg: Phandle to system configuration controller.
+- vdda-supply: Phandle to the vdda input analog voltage.
+
+Example:
+ booster: regulator-booster {
+ compatible = "st,stm32mp1-booster";
+ st,syscfg = <&syscfg>;
+ vdda-supply = <&vdda>;
+ };
diff --git a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
index 0014da9145af..c223e54452da 100644
--- a/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,iproc-rng200.txt
@@ -2,6 +2,7 @@ HWRNG support for the iproc-rng200 driver
Required properties:
- compatible : Must be one of:
+ "brcm,bcm7211-rng200"
"brcm,bcm7278-rng200"
"brcm,iproc-rng200"
- reg : base address and size of control register block
diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
new file mode 100644
index 000000000000..d57659996d62
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.txt
@@ -0,0 +1,25 @@
+NXP System Counter Module(sys_ctr)
+
+The system counter(sys_ctr) is a programmable system counter which provides
+a shared time base to Cortex A15, A7, A53, A73, etc. it is intended for use in
+applications where the counter is always powered and support multiple,
+unrelated clocks. The compare frame inside can be used for timer purpose.
+
+Required properties:
+
+- compatible : should be "nxp,sysctr-timer"
+- reg : Specifies the base physical address and size of the comapre
+ frame and the counter control, read & compare.
+- interrupts : should be the first compare frames' interrupt
+- clocks : Specifies the counter clock.
+- clock-names: Specifies the clock's name of this module
+
+Example:
+
+ system_counter: timer@306a0000 {
+ compatible = "nxp,sysctr-timer";
+ reg = <0x306a0000 0x20000>;/* system-counter-rd & compare */
+ clocks = <&clk_8m>;
+ clock-names = "per";
+ interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 747fd3f689dc..2e742d399e87 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -52,6 +52,10 @@ properties:
- at,24c08
# i2c trusted platform module (TPM)
- atmel,at97sc3204t
+ # i2c h/w symmetric crypto module
+ - atmel,atsha204a
+ # i2c h/w elliptic curve crypto module
+ - atmel,atecc508a
# CM32181: Ambient Light Sensor
- capella,cm32181
# CM3232: Ambient Light Sensor
diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 5e4d8aa68913..9559aa3cbcef 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -283,8 +283,6 @@ To summarize::
gpiod_set_value(desc, 1); default (active high) high
gpiod_set_value(desc, 0); active low high
gpiod_set_value(desc, 1); active low low
- gpiod_set_value(desc, 0); default (active high) low
- gpiod_set_value(desc, 1); default (active high) high
gpiod_set_value(desc, 0); open drain low
gpiod_set_value(desc, 1); open drain high impedance
gpiod_set_value(desc, 0); open source high impedance
@@ -366,7 +364,7 @@ accessed sequentially.
The functions take three arguments:
* array_size - the number of array elements
* desc_array - an array of GPIO descriptors
- * array_info - optional information obtained from gpiod_array_get()
+ * array_info - optional information obtained from gpiod_get_array()
* value_bitmap - a bitmap to store the GPIOs' values (get) or
a bitmap of values to assign to the GPIOs (set)
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 1ce7fcd0f989..4af9aae724f0 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -235,7 +235,7 @@ means that a pull up or pull-down resistor is available on the output of the
GPIO line, and this resistor is software controlled.
In discrete designs, a pull-up or pull-down resistor is simply soldered on
-the circuit board. This is not something we deal or model in software. The
+the circuit board. This is not something we deal with or model in software. The
most you will think about these lines is that they will very likely be
configured as open drain or open source (see the section above).
@@ -292,18 +292,18 @@ We can divide GPIO irqchips in two broad categories:
- HIERARCHICAL INTERRUPT CHIPS: this means that each GPIO line has a dedicated
irq line to a parent interrupt controller one level up. There is no need
- to inquire the GPIO hardware to figure out which line has figured, but it
- may still be necessary to acknowledge the interrupt and set up the
- configuration such as edge sensitivity.
+ to inquire the GPIO hardware to figure out which line has fired, but it
+ may still be necessary to acknowledge the interrupt and set up configuration
+ such as edge sensitivity.
Realtime considerations: a realtime compliant GPIO driver should not use
spinlock_t or any sleepable APIs (like PM runtime) as part of its irqchip
implementation.
-- spinlock_t should be replaced with raw_spinlock_t [1].
+- spinlock_t should be replaced with raw_spinlock_t.[1]
- If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks
- on an irqchip. Create the callbacks if needed [2].
+ on an irqchip. Create the callbacks if needed.[2]
Cascaded GPIO irqchips
@@ -361,7 +361,7 @@ Cascaded GPIO irqchips usually fall in one of three categories:
Realtime considerations: this kind of handlers will be forced threaded on -RT,
and as result the IRQ core will complain that generic_handle_irq() is called
- with IRQ enabled and the same work around as for "CHAINED GPIO irqchips" can
+ with IRQ enabled and the same work-around as for "CHAINED GPIO irqchips" can
be applied.
- NESTED THREADED GPIO IRQCHIPS: these are off-chip GPIO expanders and any
@@ -418,7 +418,7 @@ symbol:
If there is a need to exclude certain GPIO lines from the IRQ domain handled by
these helpers, we can set .irq.need_valid_mask of the gpiochip before
-[devm_]gpiochip_add_data() is called. This allocates an .irq.valid_mask with as
+``[devm_]gpiochip_add_data()`` is called. This allocates an .irq.valid_mask with as
many bits set as there are GPIO lines in the chip, each bit representing line
0..n-1. Drivers can exclude GPIO lines by clearing bits from this mask. The mask
must be filled in before gpiochip_irqchip_add() or gpiochip_irqchip_add_nested()
diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst
index 30e6aa7e160b..5158577bc29b 100644
--- a/Documentation/driver-api/s390-drivers.rst
+++ b/Documentation/driver-api/s390-drivers.rst
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
although they are not the focus of this document.
Some additional information can also be found in the kernel source under
-Documentation/s390/driver-model.txt.
+Documentation/s390/driver-model.rst.
The css bus
===========
@@ -38,7 +38,7 @@ into several categories:
* Standard I/O subchannels, for use by the system. They have a child
device on the ccw bus and are described below.
* I/O subchannels bound to the vfio-ccw driver. See
- Documentation/s390/vfio-ccw.txt.
+ Documentation/s390/vfio-ccw.rst.
* Message subchannels. No Linux driver currently exists.
* CHSC subchannels (at most one). The chsc subchannel driver can be used
to send asynchronous chsc commands.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 66cad5c86171..a226061fa109 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -45,6 +45,7 @@ Table of Contents
3.9 /proc/<pid>/map_files - Information about memory mapped files
3.10 /proc/<pid>/timerslack_ns - Task timerslack value
3.11 /proc/<pid>/patch_state - Livepatch patch operation state
+ 3.12 /proc/<pid>/arch_status - Task architecture specific information
4 Configuring procfs
4.1 Mount options
@@ -1948,6 +1949,45 @@ patched. If the patch is being enabled, then the task has already been
patched. If the patch is being disabled, then the task hasn't been
unpatched yet.
+3.12 /proc/<pid>/arch_status - task architecture specific status
+-------------------------------------------------------------------
+When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the
+architecture specific status of the task.
+
+Example
+-------
+ $ cat /proc/6753/arch_status
+ AVX512_elapsed_ms: 8
+
+Description
+-----------
+
+x86 specific entries:
+---------------------
+ AVX512_elapsed_ms:
+ ------------------
+ If AVX512 is supported on the machine, this entry shows the milliseconds
+ elapsed since the last time AVX512 usage was recorded. The recording
+ happens on a best effort basis when a task is scheduled out. This means
+ that the value depends on two factors:
+
+ 1) The time which the task spent on the CPU without being scheduled
+ out. With CPU isolation and a single runnable task this can take
+ several seconds.
+
+ 2) The time since the task was scheduled out last. Depending on the
+ reason for being scheduled out (time slice exhausted, syscall ...)
+ this can be arbitrary long time.
+
+ As a consequence the value cannot be considered precise and authoritative
+ information. The application which uses this information has to be aware
+ of the overall scenario on the system in order to determine whether a
+ task is a real AVX512 user or not. Precise information can be obtained
+ with performance counters.
+
+ A special value of '-1' indicates that no AVX512 usage was recorded, thus
+ the task is unlikely an AVX512 user, but depends on the workload and the
+ scheduling scenario, it also could be a false negative mentioned above.
------------------------------------------------------------------------------
Configuring procfs
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index d06e9a59a9f4..cad797a8a39e 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -98,7 +98,7 @@ A memory policy with a valid NodeList will be saved, as specified, for
use at file creation time. When a task allocates a file in the file
system, the mount option memory policy will be applied with a NodeList,
if any, modified by the calling task's cpuset constraints
-[See Documentation/cgroup-v1/cpusets.txt] and any optional flags, listed
+[See Documentation/cgroup-v1/cpusets.rst] and any optional flags, listed
below. If the resulting NodeLists is the empty set, the effective memory
policy for the file will revert to "default" policy.
diff --git a/Documentation/fmc/API.txt b/Documentation/fmc/API.txt
deleted file mode 100644
index 06b06b92c794..000000000000
--- a/Documentation/fmc/API.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Functions Exported by fmc.ko
-****************************
-
-The FMC core exports the usual 4 functions that are needed for a bus to
-work, and a few more:
-
- int fmc_driver_register(struct fmc_driver *drv);
- void fmc_driver_unregister(struct fmc_driver *drv);
- int fmc_device_register(struct fmc_device *fmc);
- void fmc_device_unregister(struct fmc_device *fmc);
-
- int fmc_device_register_n(struct fmc_device **fmc, int n);
- void fmc_device_unregister_n(struct fmc_device **fmc, int n);
-
- uint32_t fmc_readl(struct fmc_device *fmc, int offset);
- void fmc_writel(struct fmc_device *fmc, uint32_t val, int off);
- void *fmc_get_drvdata(struct fmc_device *fmc);
- void fmc_set_drvdata(struct fmc_device *fmc, void *data);
-
- int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw,
- int sdb_entry);
-
-The data structure that describe a device is detailed in *note FMC
-Device::, the one that describes a driver is detailed in *note FMC
-Driver::. Please note that structures of type fmc_device must be
-allocated by the caller, but must not be released after unregistering.
-The fmc-bus itself takes care of releasing the structure when their use
-count reaches zero - actually, the device model does that in lieu of us.
-
-The functions to register and unregister n devices are meant to be used
-by carriers that host more than one mezzanine. The devices must all be
-registered at the same time because if the FPGA is reprogrammed, all
-devices in the array are affected. Usually, the driver matching the
-first device will reprogram the FPGA, so other devices must know they
-are already driven by a reprogrammed FPGA.
-
-If a carrier hosts slots that are driven by different FPGA devices, it
-should register as a group only mezzanines that are driven by the same
-FPGA, for the reason outlined above.
-
-Finally, the fmc_reprogram function calls the reprogram method (see
-*note The API Offered by Carriers:: and also scans the memory area for
-an SDB tree. You can pass -1 as sdb_entry to disable such scan.
-Otherwise, the function fails if no tree is found at the specified
-entry point. The function is meant to factorize common code, and by
-the time you read this it is already used by the spec-sw and fine-delay
-modules.
diff --git a/Documentation/fmc/FMC-and-SDB.txt b/Documentation/fmc/FMC-and-SDB.txt
deleted file mode 100644
index fa14e0b24521..000000000000
--- a/Documentation/fmc/FMC-and-SDB.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-
-FMC (FPGA Mezzanine Card) is the standard we use for our I/O devices,
-in the context of White Rabbit and related hardware.
-
-In our I/O environments we need to write drivers for each mezzanine
-card, and such drivers must work regardless of the carrier being used.
-To achieve this, we abstract the FMC interface.
-
-We have a carrier for PCI-E called SPEC and one for VME called SVEC,
-but more are planned. Also, we support stand-alone devices (usually
-plugged on a SPEC card), controlled through Etherbone, developed by GSI.
-
-Code and documentation for the FMC bus was born as part of the spec-sw
-project, but now it lives in its own project. Other projects, i.e.
-software support for the various carriers, should include this as a
-submodule.
-
-The most up to date version of code and documentation is always
-available from the repository you can clone from:
-
- git://ohwr.org/fmc-projects/fmc-bus.git (read-only)
- git@ohwr.org:fmc-projects/fmc-bus.git (read-write for developers)
-
-Selected versions of the documentation, as well as complete tar
-archives for selected revisions are placed to the Files section of the
-project: `http://www.ohwr.org/projects/fmc-bus/files'
-
-
-What is FMC
-***********
-
-FMC, as said, stands for "FPGA Mezzanine Card". It is a standard
-developed by the VME consortium called VITA (VMEbus International Trade
-Association and ratified by ANSI, the American National Standard
-Institute. The official documentation is called "ANSI-VITA 57.1".
-
-The FMC card is an almost square PCB, around 70x75 millimeters, that is
-called mezzanine in this document. It usually lives plugged into
-another PCB for power supply and control; such bigger circuit board is
-called carrier from now on, and a single carrier may host more than one
-mezzanine.
-
-In the typical application the mezzanine is mostly analog while the
-carrier is mostly digital, and hosts an FPGA that must be configured to
-match the specific mezzanine and the desired application. Thus, you may
-need to load different FPGA images to drive different instances of the
-same mezzanine.
-
-FMC, as such, is not a bus in the usual meaning of the term, because
-most carriers have only one connector, and carriers with several
-connectors have completely separate electrical connections to them.
-This package, however, implements a bus as a software abstraction.
-
-
-What is SDB
-***********
-
-SDB (Self Describing Bus) is a set of data structures that we use for
-enumerating the internal structure of an FPGA image. We also use it as
-a filesystem inside the FMC EEPROM.
-
-SDB is not mandatory for use of this FMC kernel bus, but if you have SDB
-this package can make good use of it. SDB itself is developed in the
-fpga-config-space OHWR project. The link to the repository is
-`git://ohwr.org/hdl-core-lib/fpga-config-space.git' and what is used in
-this project lives in the sdbfs subdirectory in there.
-
-SDB support for FMC is described in *note FMC Identification:: and
-*note SDB Support::
-
-
-SDB Support
-***********
-
-The fmc.ko bus driver exports a few functions to help drivers taking
-advantage of the SDB information that may be present in your own FPGA
-memory image.
-
-The module exports the following functions, in the special header
-<linux/fmc-sdb.h>. The linux/ prefix in the name is there because we
-plan to submit it upstream in the future, and don't want to force
-changes on our drivers if that happens.
-
- int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address);
- void fmc_show_sdb_tree(struct fmc_device *fmc);
- signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor,
- uint32_t device, unsigned long *sz);
- int fmc_free_sdb_tree(struct fmc_device *fmc);
diff --git a/Documentation/fmc/carrier.txt b/Documentation/fmc/carrier.txt
deleted file mode 100644
index 5e4f1dd3e98b..000000000000
--- a/Documentation/fmc/carrier.txt
+++ /dev/null
@@ -1,311 +0,0 @@
-FMC Device
-**********
-
-Within the Linux bus framework, the FMC device is created and
-registered by the carrier driver. For example, the PCI driver for the
-SPEC card fills a data structure for each SPEC that it drives, and
-registers an associated FMC device for each card. The SVEC driver can
-do exactly the same for the VME carrier (actually, it should do it
-twice, because the SVEC carries two FMC mezzanines). Similarly, an
-Etherbone driver will be able to register its own FMC devices, offering
-communication primitives through frame exchange.
-
-The contents of the EEPROM within the FMC are used for identification
-purposes, i.e. for matching the device with its own driver. For this
-reason the device structure includes a complete copy of the EEPROM
-(actually, the carrier driver may choose whether or not to return it -
-for example we most likely won't have the whole EEPROM available for
-Etherbone devices.
-
-The following listing shows the current structure defining a device.
-Please note that all the machinery is in place but some details may
-still change in the future. For this reason, there is a version field
-at the beginning of the structure. As usual, the minor number will
-change for compatible changes (like a new flag) and the major number
-will increase when an incompatible change happens (for example, a
-change in layout of some fmc data structures). Device writers should
-just set it to the value FMC_VERSION, and be ready to get back -EINVAL
-at registration time.
-
- struct fmc_device {
- unsigned long version;
- unsigned long flags;
- struct module *owner; /* char device must pin it */
- struct fmc_fru_id id; /* for EEPROM-based match */
- struct fmc_operations *op; /* carrier-provided */
- int irq; /* according to host bus. 0 == none */
- int eeprom_len; /* Usually 8kB, may be less */
- int eeprom_addr; /* 0x50, 0x52 etc */
- uint8_t *eeprom; /* Full contents or leading part */
- char *carrier_name; /* "SPEC" or similar, for special use */
- void *carrier_data; /* "struct spec *" or equivalent */
- __iomem void *fpga_base; /* May be NULL (Etherbone) */
- __iomem void *slot_base; /* Set by the driver */
- struct fmc_device **devarray; /* Allocated by the bus */
- int slot_id; /* Index in the slot array */
- int nr_slots; /* Number of slots in this carrier */
- unsigned long memlen; /* Used for the char device */
- struct device dev; /* For Linux use */
- struct device *hwdev; /* The underlying hardware device */
- unsigned long sdbfs_entry;
- struct sdb_array *sdb;
- uint32_t device_id; /* Filled by the device */
- char *mezzanine_name; /* Defaults to ``fmc'' */
- void *mezzanine_data;
- };
-
-The meaning of most fields is summarized in the code comment above.
-
-The following fields must be filled by the carrier driver before
-registration:
-
- * version: must be set to FMC_VERSION.
-
- * owner: set to MODULE_OWNER.
-
- * op: the operations to act on the device.
-
- * irq: number for the mezzanine; may be zero.
-
- * eeprom_len: length of the following array.
-
- * eeprom_addr: 0x50 for first mezzanine and so on.
-
- * eeprom: the full content of the I2C EEPROM.
-
- * carrier_name.
-
- * carrier_data: a unique pointer for the carrier.
-
- * fpga_base: the I/O memory address (may be NULL).
-
- * slot_id: the index of this slot (starting from zero).
-
- * memlen: if fpga_base is valid, the length of I/O memory.
-
- * hwdev: to be used in some dev_err() calls.
-
- * device_id: a slot-specific unique integer number.
-
-
-Please note that the carrier should read its own EEPROM memory before
-registering the device, as well as fill all other fields listed above.
-
-The following fields should not be assigned, because they are filled
-later by either the bus or the device driver:
-
- * flags.
-
- * fru_id: filled by the bus, parsing the eeprom.
-
- * slot_base: filled and used by the driver, if useful to it.
-
- * devarray: an array og all mezzanines driven by a singe FPGA.
-
- * nr_slots: set by the core at registration time.
-
- * dev: used by Linux.
-
- * sdb: FPGA contents, scanned according to driver's directions.
-
- * sdbfs_entry: SDB entry point in EEPROM: autodetected.
-
- * mezzanine_data: available for the driver.
-
- * mezzanine_name: filled by fmc-bus during identification.
-
-
-Note: mezzanine_data may be redundant, because Linux offers the drvdata
-approach, so the field may be removed in later versions of this bus
-implementation.
-
-As I write this, she SPEC carrier is already completely functional in
-the fmc-bus environment, and is a good reference to look at.
-
-
-The API Offered by Carriers
-===========================
-
-The carrier provides a number of methods by means of the
-`fmc_operations' structure, which currently is defined like this
-(again, it is a moving target, please refer to the header rather than
-this document):
-
- struct fmc_operations {
- uint32_t (*readl)(struct fmc_device *fmc, int offset);
- void (*writel)(struct fmc_device *fmc, uint32_t value, int offset);
- int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw);
- int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv);
- int (*irq_request)(struct fmc_device *fmc, irq_handler_t h,
- char *name, int flags);
- void (*irq_ack)(struct fmc_device *fmc);
- int (*irq_free)(struct fmc_device *fmc);
- int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio,
- int ngpio);
- int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l);
- int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l);
- };
-
-The individual methods perform the following tasks:
-
-`readl'
-`writel'
- These functions access FPGA registers by whatever means the
- carrier offers. They are not expected to fail, and most of the time
- they will just make a memory access to the host bus. If the
- carrier provides a fpga_base pointer, the driver may use direct
- access through that pointer. For this reason the header offers the
- inline functions fmc_readl and fmc_writel that access fpga_base if
- the respective method is NULL. A driver that wants to be portable
- and efficient should use fmc_readl and fmc_writel. For Etherbone,
- or other non-local carriers, error-management is still to be
- defined.
-
-`validate'
- Module parameters are used to manage different applications for
- two or more boards of the same kind. Validation is based on the
- busid module parameter, if provided, and returns the matching
- index in the associated array. See *note Module Parameters:: in in
- doubt. If no match is found, `-ENOENT' is returned; if the user
- didn't pass `busid=', all devices will pass validation. The value
- returned by the validate method can be used as index into other
- parameters (for example, some drivers use the `lm32=' parameter in
- this way). Such "generic parameters" are documented in *note
- Module Parameters::, below. The validate method is used by
- `fmc-trivial.ko', described in *note fmc-trivial::.
-
-`reprogram'
- The carrier enumerates FMC devices by loading a standard (or
- golden) FPGA binary that allows EEPROM access. Each driver, then,
- will need to reprogram the FPGA by calling this function. If the
- name argument is NULL, the carrier should reprogram the golden
- binary. If the gateware name has been overridden through module
- parameters (in a carrier-specific way) the file loaded will match
- the parameters. Per-device gateware names can be specified using
- the `gateware=' parameter, see *note Module Parameters::. Note:
- Clients should call rhe new helper, fmc_reprogram, which both
- calls this method and parse the SDB tree of the FPGA.
-
-`irq_request'
-`irq_ack'
-`irq_free'
- Interrupt management is carrier-specific, so it is abstracted as
- operations. The interrupt number is listed in the device
- structure, and for the mezzanine driver the number is only
- informative. The handler will receive the fmc pointer as dev_id;
- the flags argument is passed to the Linux request_irq function,
- but fmc-specific flags may be added in the future. You'll most
- likely want to pass the `IRQF_SHARED' flag.
-
-`gpio_config'
- The method allows to configure a GPIO pin in the carrier, and read
- its current value if it is configured as input. See *note The GPIO
- Abstraction:: for details.
-
-`read_ee'
-`write_ee'
- Read or write the EEPROM. The functions are expected to be only
- called before reprogramming and the carrier should refuse them
- with `ENODEV' after reprogramming. The offset is expected to be
- within 8kB (the current size), but addresses up to 1MB are
- reserved to fit bigger I2C devices in the future. Carriers may
- offer access to other internal flash memories using these same
- methods: for example the SPEC driver may define that its carrier
- I2C memory is seen at offset 1M and the internal SPI flash is seen
- at offset 16M. This multiplexing of several flash memories in the
- same address space is carrier-specific and should only be used
- by a driver that has verified the `carrier_name' field.
-
-
-
-The GPIO Abstraction
-====================
-
-Support for GPIO pins in the fmc-bus environment is not very
-straightforward and deserves special discussion.
-
-While the general idea of a carrier-independent driver seems to fly,
-configuration of specific signals within the carrier needs at least
-some knowledge of the carrier itself. For this reason, the specific
-driver can request to configure carrier-specific GPIO pins, numbered
-from 0 to at most 4095. Configuration is performed by passing a
-pointer to an array of struct fmc_gpio items, as well as the length of
-the array. This is the data structure:
-
- struct fmc_gpio {
- char *carrier_name;
- int gpio;
- int _gpio; /* internal use by the carrier */
- int mode; /* GPIOF_DIR_OUT etc, from <linux/gpio.h> */
- int irqmode; /* IRQF_TRIGGER_LOW and so on */
- };
-
-By specifying a carrier_name for each pin, the driver may access
-different pins in different carriers. The gpio_config method is
-expected to return the number of pins successfully configured, ignoring
-requests for other carriers. However, if no pin is configured (because
-no structure at all refers to the current carrier_name), the operation
-returns an error so the caller will know that it is running under a
-yet-unsupported carrier.
-
-So, for example, a driver that has been developed and tested on both
-the SPEC and the SVEC may request configuration of two different GPIO
-pins, and expect one such configuration to succeed - if none succeeds
-it most likely means that the current carrier is a still-unknown one.
-
-If, however, your GPIO pin has a specific known role, you can pass a
-special number in the gpio field, using one of the following macros:
-
- #define FMC_GPIO_RAW(x) (x) /* 4096 of them */
- #define FMC_GPIO_IRQ(x) ((x) + 0x1000) /* 256 of them */
- #define FMC_GPIO_LED(x) ((x) + 0x1100) /* 256 of them */
- #define FMC_GPIO_KEY(x) ((x) + 0x1200) /* 256 of them */
- #define FMC_GPIO_TP(x) ((x) + 0x1300) /* 256 of them */
- #define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */
-
-Use of virtual GPIO numbers (anything but FMC_GPIO_RAW) is allowed
-provided the carrier_name field in the data structure is left
-unspecified (NULL). Each carrier is responsible for providing a mapping
-between virtual and physical GPIO numbers. The carrier may then use the
-_gpio field to cache the result of this mapping.
-
-All carriers must map their I/O lines to the sets above starting from
-zero. The SPEC, for example, maps interrupt pins 0 and 1, and test
-points 0 through 3 (even if the test points on the PCB are called
-5,6,7,8).
-
-If, for example, a driver requires a free LED and a test point (for a
-scope probe to be plugged at some point during development) it may ask
-for FMC_GPIO_LED(0) and FMC_GPIO_TP(0). Each carrier will provide
-suitable GPIO pins. Clearly, the person running the drivers will know
-the order used by the specific carrier driver in assigning leds and
-testpoints, so to make a carrier-dependent use of the diagnostic tools.
-
-In theory, some form of autodetection should be possible: a driver like
-the wr-nic (which uses IRQ(1) on the SPEC card) should configure
-IRQ(0), make a test with software-generated interrupts and configure
-IRQ(1) if the test fails. This probing step should be used because even
-if the wr-nic gateware is known to use IRQ1 on the SPEC, the driver
-should be carrier-independent and thus use IRQ(0) as a first bet -
-actually, the knowledge that IRQ0 may fail is carrier-dependent
-information, but using it doesn't make the driver unsuitable for other
-carriers.
-
-The return value of gpio_config is defined as follows:
-
- * If no pin in the array can be used by the carrier, `-ENODEV'.
-
- * If at least one virtual GPIO number cannot be mapped, `-ENOENT'.
-
- * On success, 0 or positive. The value returned is the number of
- high input bits (if no input is configured, the value for success
- is 0).
-
-While I admit the procedure is not completely straightforward, it
-allows configuration, input and output with a single carrier operation.
-Given the typical use case of FMC devices, GPIO operations are not
-expected to ever by in hot paths, and GPIO access so fare has only been
-used to configure the interrupt pin, mode and polarity. Especially
-reading inputs is not expected to be common. If your device has GPIO
-capabilities in the hot path, you should consider using the kernel's
-GPIO mechanisms.
diff --git a/Documentation/fmc/fmc-chardev.txt b/Documentation/fmc/fmc-chardev.txt
deleted file mode 100644
index d9ccb278e597..000000000000
--- a/Documentation/fmc/fmc-chardev.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-fmc-chardev
-===========
-
-This is a simple generic driver, that allows user access by means of a
-character device (actually, one for each mezzanine it takes hold of).
-
-The char device is created as a misc device. Its name in /dev (as
-created by udev) is the same name as the underlying FMC device. Thus,
-the name can be a silly fmc-0000 look-alike if the device has no
-identifiers nor bus_id, a more specific fmc-0400 if the device has a
-bus-specific address but no associated name, or something like
-fdelay-0400 if the FMC core can rely on both a mezzanine name and a bus
-address.
-
-Currently the driver only supports read and write: you can lseek to the
-desired address and read or write a register.
-
-The driver assumes all registers are 32-bit in size, and only accepts a
-single read or write per system call. However, as a result of Unix read
-and write semantics, users can simply fread or fwrite bigger areas in
-order to dump or store bigger memory areas.
-
-There is currently no support for mmap, user-space interrupt management
-and DMA buffers. They may be added in later versions, if the need
-arises.
-
-The example below shows raw access to a SPEC card programmed with its
-golden FPGA file, that features an SDB structure at offset 256 - i.e.
-64 words. The mezzanine's EEPROM in this case is not programmed, so the
-default name is fmc-<bus><devfn>, and there are two cards in the system:
-
- spusa.root# insmod fmc-chardev.ko
- [ 1073.339332] spec 0000:02:00.0: Driver has no ID: matches all
- [ 1073.345051] spec 0000:02:00.0: Created misc device "fmc-0200"
- [ 1073.350821] spec 0000:04:00.0: Driver has no ID: matches all
- [ 1073.356525] spec 0000:04:00.0: Created misc device "fmc-0400"
- spusa.root# ls -l /dev/fmc*
- crw------- 1 root root 10, 58 Nov 20 19:23 /dev/fmc-0200
- crw------- 1 root root 10, 57 Nov 20 19:23 /dev/fmc-0400
- spusa.root# dd bs=4 skip=64 count=1 if=/dev/fmc-0200 2> /dev/null | od -t x1z
- 0000000 2d 42 44 53 >-BDS<
- 0000004
-
-The simple program tools/fmc-mem in this package can access an FMC char
-device and read or write a word or a whole area. Actually, the program
-is not specific to FMC at all, it just uses lseek, read and write.
-
-Its first argument is the device name, the second the offset, the third
-(if any) the value to write and the optional last argument that must
-begin with "+" is the number of bytes to read or write. In case of
-repeated reading data is written to stdout; repeated writes read from
-stdin and the value argument is ignored.
-
-The following examples show reading the SDB magic number and the first
-SDB record from a SPEC device programmed with its golden image:
-
- spusa.root# ./fmc-mem /dev/fmc-0200 100
- 5344422d
- spusa.root# ./fmc-mem /dev/fmc-0200 100 +40 | od -Ax -t x1z
- 000000 2d 42 44 53 00 01 02 00 00 00 00 00 00 00 00 00 >-BDS............<
- 000010 00 00 00 00 ff 01 00 00 00 00 00 00 51 06 00 00 >............Q...<
- 000020 c9 42 a5 e6 02 00 00 00 11 05 12 20 2d 34 42 57 >.B......... -4BW<
- 000030 73 6f 72 43 72 61 62 73 49 53 47 2d 00 20 20 20 >sorCrabsISG-. <
- 000040
diff --git a/Documentation/fmc/fmc-fakedev.txt b/Documentation/fmc/fmc-fakedev.txt
deleted file mode 100644
index e85b74a4ae30..000000000000
--- a/Documentation/fmc/fmc-fakedev.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-fmc-fakedev
-===========
-
-This package includes a software-only device, called fmc-fakedev, which
-is able to register up to 4 mezzanines (by default it registers one).
-Unlike the SPEC driver, which creates an FMC device for each PCI cards
-it manages, this module creates a single instance of its set of
-mezzanines.
-
-It is meant as the simplest possible example of how a driver should be
-written, and it includes a fake EEPROM image (built using the tools
-described in *note FMC Identification::),, which by default is
-replicated for each fake mezzanine.
-
-You can also use this device to verify the match algorithms, by asking
-it to test your own EEPROM image. You can provide the image by means of
-the eeprom= module parameter: the new EEPROM image is loaded, as usual,
-by means of the firmware loader. This example shows the defaults and a
-custom EEPROM image:
-
- spusa.root# insmod fmc-fakedev.ko
- [ 99.971247] fake-fmc-carrier: mezzanine 0
- [ 99.975393] Manufacturer: fake-vendor
- [ 99.979624] Product name: fake-design-for-testing
- spusa.root# rmmod fmc-fakedev
- spusa.root# insmod fmc-fakedev.ko eeprom=fdelay-eeprom.bin
- [ 121.447464] fake-fmc-carrier: Mezzanine 0: eeprom "fdelay-eeprom.bin"
- [ 121.462725] fake-fmc-carrier: mezzanine 0
- [ 121.466858] Manufacturer: CERN
- [ 121.470477] Product name: FmcDelay1ns4cha
- spusa.root# rmmod fmc-fakedev
-
-After loading the device, you can use the write_ee method do modify its
-own internal fake EEPROM: whenever the image is overwritten starting at
-offset 0, the module will unregister and register again the FMC device.
-This is shown in fmc-write-eeprom.txt
diff --git a/Documentation/fmc/fmc-trivial.txt b/Documentation/fmc/fmc-trivial.txt
deleted file mode 100644
index d1910bc67159..000000000000
--- a/Documentation/fmc/fmc-trivial.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-fmc-trivial
-===========
-
-The simple module fmc-trivial is just a simple client that registers an
-interrupt handler. I used it to verify the basic mechanism of the FMC
-bus and how interrupts worked.
-
-The module implements the generic FMC parameters, so it can program a
-different gateware file in each card. The whole list of parameters it
-accepts are:
-
-`busid='
-`gateware='
- Generic parameters. See mezzanine.txt
-
-
-This driver is worth reading, in my opinion.
diff --git a/Documentation/fmc/fmc-write-eeprom.txt b/Documentation/fmc/fmc-write-eeprom.txt
deleted file mode 100644
index e0a9712156aa..000000000000
--- a/Documentation/fmc/fmc-write-eeprom.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-fmc-write-eeprom
-================
-
-This module is designed to load a binary file from /lib/firmware and to
-write it to the internal EEPROM of the mezzanine card. This driver uses
-the `busid' generic parameter.
-
-Overwriting the EEPROM is not something you should do daily, and it is
-expected to only happen during manufacturing. For this reason, the
-module makes it unlikely for the random user to change a working EEPROM.
-
-However, since the EEPROM may include application-specific information
-other than the identification, later versions of this packages added
-write-support through sysfs. See *note Accessing the EEPROM::.
-
-To avoid damaging the EEPROM content, the module takes the following
-measures:
-
- * It accepts a `file=' argument (within /lib/firmware) and if no
- such argument is received, it doesn't write anything to EEPROM
- (i.e. there is no default file name).
-
- * If the file name ends with `.bin' it is written verbatim starting
- at offset 0.
-
- * If the file name ends with `.tlv' it is interpreted as
- type-length-value (i.e., it allows writev(2)-like operation).
-
- * If the file name doesn't match any of the patterns above, it is
- ignored and no write is performed.
-
- * Only cards listed with `busid=' are written to. If no busid is
- specified, no programming is done (and the probe function of the
- driver will fail).
-
-
-Each TLV tuple is formatted in this way: the header is 5 bytes,
-followed by data. The first byte is `w' for write, the next two bytes
-represent the address, in little-endian byte order, and the next two
-represent the data length, in little-endian order. The length does not
-include the header (it is the actual number of bytes to be written).
-
-This is a real example: that writes 5 bytes at position 0x110:
-
- spusa.root# od -t x1 -Ax /lib/firmware/try.tlv
- 000000 77 10 01 05 00 30 31 32 33 34
- 00000a
- spusa.root# insmod /tmp/fmc-write-eeprom.ko busid=0x0200 file=try.tlv
- [19983.391498] spec 0000:03:00.0: write 5 bytes at 0x0110
- [19983.414615] spec 0000:03:00.0: write_eeprom: success
-
-Please note that you'll most likely want to use SDBFS to build your
-EEPROM image, at least if your mezzanines are being used in the White
-Rabbit environment. For this reason the TLV format is not expected to
-be used much and is not expected to be developed further.
-
-If you want to try reflashing fake EEPROM devices, you can use the
-fmc-fakedev.ko module (see *note fmc-fakedev::). Whenever you change
-the image starting at offset 0, it will deregister and register again
-after two seconds. Please note, however, that if fmc-write-eeprom is
-still loaded, the system will associate it to the new device, which
-will be reprogrammed and thus will be unloaded after two seconds. The
-following example removes the module after it reflashed fakedev the
-first time.
-
- spusa.root# insmod fmc-fakedev.ko
- [ 72.984733] fake-fmc: Manufacturer: fake-vendor
- [ 72.989434] fake-fmc: Product name: fake-design-for-testing
- spusa.root# insmod fmc-write-eeprom.ko busid=0 file=fdelay-eeprom.bin; \
- rmmod fmc-write-eeprom
- [ 130.874098] fake-fmc: Matching a generic driver (no ID)
- [ 130.887845] fake-fmc: programming 6155 bytes
- [ 130.894567] fake-fmc: write_eeprom: success
- [ 132.895794] fake-fmc: Manufacturer: CERN
- [ 132.899872] fake-fmc: Product name: FmcDelay1ns4cha
-
-
-Accessing the EEPROM
-=====================
-
-The bus creates a sysfs binary file called eeprom for each mezzanine it
-knows about:
-
- spusa.root# cd /sys/bus/fmc/devices; ls -l */eeprom
- -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcAdc100m14b4cha-0800/eeprom
- -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDelay1ns4cha-0200/eeprom
- -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDio5cha-0400/eeprom
-
-Everybody can read the files and the superuser can also modify it, but
-the operation may on the carrier driver, if the carrier is unable to
-access the I2C bus. For example, the spec driver can access the bus
-only with its golden gateware: after a mezzanine driver reprogrammed
-the FPGA with a custom circuit, the carrier is unable to access the
-EEPROM and returns ENOTSUPP.
-
-An alternative way to write the EEPROM is the mezzanine driver
-fmc-write-eeprom (See *note fmc-write-eeprom::), but the procedure is
-more complex.
diff --git a/Documentation/fmc/identifiers.txt b/Documentation/fmc/identifiers.txt
deleted file mode 100644
index 3bb577ff0d52..000000000000
--- a/Documentation/fmc/identifiers.txt
+++ /dev/null
@@ -1,168 +0,0 @@
-FMC Identification
-******************
-
-The FMC standard requires every compliant mezzanine to carry
-identification information in an I2C EEPROM. The information must be
-laid out according to the "IPMI Platform Management FRU Information",
-where IPMI is a lie I'd better not expand, and FRU means "Field
-Replaceable Unit".
-
-The FRU information is an intricate unreadable binary blob that must
-live at offset 0 of the EEPROM, and typically extends for a few hundred
-bytes. The standard allows the application to use all the remaining
-storage area of the EEPROM as it wants.
-
-This chapter explains how to create your own EEPROM image and how to
-write it in your mezzanine, as well as how devices and drivers are
-paired at run time. EEPROM programming uses tools that are part of this
-package and SDB (part of the fpga-config-space package).
-
-The first sections are only interesting for manufacturers who need to
-write the EEPROM. If you are just a software developer writing an FMC
-device or driver, you may jump straight to *note SDB Support::.
-
-
-Building the FRU Structure
-==========================
-
-If you want to know the internals of the FRU structure and despair, you
-can retrieve the document from
-`http://download.intel.com/design/servers/ipmi/FRU1011.pdf' . The
-standard is awful and difficult without reason, so we only support the
-minimum mandatory subset - we create a simple structure and parse it
-back at run time, but we are not able to either generate or parse more
-arcane features like non-english languages and 6-bit text. If you need
-more items of the FRU standard for your boards, please submit patches.
-
-This package includes the Python script that Matthieu Cattin wrote to
-generate the FRU binary blob, based on an helper libipmi by Manohar
-Vanga and Matthieu himself. I changed the test script to receive
-parameters from the command line or from the environment (the command
-line takes precedence)
-
-To make a long story short, in order to build a standard-compliant
-binary file to be burned in your EEPROM, you need the following items:
-
- Environment Opt Official Name Default
----------------------------------------------------------------------
- FRU_VENDOR -v "Board Manufacturer" fmc-example
- FRU_NAME -n "Board Product Name" mezzanine
- FRU_SERIAL -s `Board Serial Number" 0001
- FRU_PART -p "Board Part Number" sample-part
- FRU_OUTPUT -o not applicable /dev/stdout
-
-The "Official Name" above is what you find in the FRU official
-documentation, chapter 11, page 7 ("Board Info Area Format"). The
-output option is used to save the generated binary to a specific file
-name instead of stdout.
-
-You can pass the items to the FRU generator either in the environment
-or on the command line. This package has currently no support for
-specifying power consumption or such stuff, but I plan to add it as
-soon as I find some time for that.
-
-FIXME: consumption etc for FRU are here or in PTS?
-
-The following example creates a binary image for a specific board:
-
- ./tools/fru-generator -v CERN -n FmcAdc100m14b4cha \
- -s HCCFFIA___-CR000003 -p EDA-02063-V5-0 > eeprom.bin
-
-The following example shows a script that builds several binary EEPROM
-images for a series of boards, changing the serial number for each of
-them. The script uses a mix of environment variables and command line
-options, and uses the same string patterns shown above.
-
- #!/bin/sh
-
- export FRU_VENDOR="CERN"
- export FRU_NAME="FmcAdc100m14b4cha"
- export FRU_PART="EDA-02063-V5-0"
-
- serial="HCCFFIA___-CR"
-
- for number in $(seq 1 50); do
- # build number-string "ns"
- ns="$(printf %06d $number)"
- ./fru-generator -s "${serial}${ns}" > eeprom-${ns}.bin
- done
-
-
-Using SDB-FS in the EEPROM
-==========================
-
-If you want to use SDB as a filesystem in the EEPROM device within the
-mezzanine, you should create one such filesystem using gensdbfs, from
-the fpga-config-space package on OHWR.
-
-By using an SBD filesystem you can cluster several files in a single
-EEPROM, so both the host system and a soft-core running in the FPGA (if
-any) can access extra production-time information.
-
-We chose to use SDB as a storage filesystem because the format is very
-simple, and both the host system and the soft-core will likely already
-include support code for such format. The SDB library offered by the
-fpga-config-space is less than 1kB under LM32, so it proves quite up to
-the task.
-
-The SDB entry point (which acts as a directory listing) cannot live at
-offset zero in the flash device, because the FRU information must live
-there. To avoid wasting precious storage space while still allowing
-for more-than-minimal FRU structures, the fmc.ko will look for the SDB
-record at address 256, 512 and 1024.
-
-In order to generate the complete EEPROM image you'll need a
-configuration file for gensdbfs: you tell the program where to place
-the sdb entry point, and you must force the FRU data file to be placed
-at the beginning of the storage device. If needed, you can also place
-other files at a special offset (we sometimes do it for backward
-compatibility with drivers we wrote before implementing SDB for flash
-memory).
-
-The directory tools/sdbfs of this package includes a well-commented
-example that you may want to use as a starting point (the comments are
-in the file called -SDB-CONFIG-). Reading documentation for gensdbfs
-is a suggested first step anyways.
-
-This package (generic FMC bus support) only accesses two files in the
-EEPROM: the FRU information, at offset zero, with a suggested filename
-of IPMI-FRU and the short name for the mezzanine, in a file called
-name. The IPMI-FRU name is not mandatory, but a strongly suggested
-choice; the name filename is mandatory, because this is the preferred
-short name used by the FMC core. For example, a name of "fdelay" may
-supplement a Product Name like "FmcDelay1ns4cha" - exactly as
-demonstrated in `tools/sdbfs'.
-
-Note: SDB access to flash memory is not yet supported, so the short
-name currently in use is just the "Product Name" FRU string.
-
-The example in tools/sdbfs includes an extra file, that is needed by
-the fine-delay driver, and must live at a known address of 0x1800. By
-running gensdbfs on that directory you can output your binary EEPROM
-image (here below spusa$ is the shell prompt):
-
- spusa$ ../fru-generator -v CERN -n FmcDelay1ns4cha -s proto-0 \
- -p EDA-02267-V3 > IPMI-FRU
- spusa$ ls -l
- total 16
- -rw-rw-r-- 1 rubini staff 975 Nov 19 18:08 --SDB-CONFIG--
- -rw-rw-r-- 1 rubini staff 216 Nov 19 18:13 IPMI-FRU
- -rw-rw-r-- 1 rubini staff 11 Nov 19 18:04 fd-calib
- -rw-rw-r-- 1 rubini staff 7 Nov 19 18:04 name
- spusa$ sudo gensdbfs . /lib/firmware/fdelay-eeprom.bin
- spusa$ sdb-read -l -e 0x100 /lib/firmware/fdelay-eeprom.bin
- /home/rubini/wip/sdbfs/userspace/sdb-read: listing format is to be defined
- 46696c6544617461:2e202020 00000100-000018ff .
- 46696c6544617461:6e616d65 00000200-00000206 name
- 46696c6544617461:66642d63 00001800-000018ff fd-calib
- 46696c6544617461:49504d49 00000000-000000d7 IPMI-FRU
- spusa$ ../fru-dump /lib/firmware/fdelay-eeprom.bin
- /lib/firmware/fdelay-eeprom.bin: manufacturer: CERN
- /lib/firmware/fdelay-eeprom.bin: product-name: FmcDelay1ns4cha
- /lib/firmware/fdelay-eeprom.bin: serial-number: proto-0
- /lib/firmware/fdelay-eeprom.bin: part-number: EDA-02267-V3
-
-As expected, the output file is both a proper sdbfs object and an IPMI
-FRU information blob. The fd-calib file lives at offset 0x1800 and is
-over-allocated to 256 bytes, according to the configuration file for
-gensdbfs.
diff --git a/Documentation/fmc/mezzanine.txt b/Documentation/fmc/mezzanine.txt
deleted file mode 100644
index 87910dbfc91e..000000000000
--- a/Documentation/fmc/mezzanine.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-FMC Driver
-**********
-
-An FMC driver is concerned with the specific mezzanine and associated
-gateware. As such, it is expected to be independent of the carrier
-being used: it will perform I/O accesses only by means of
-carrier-provided functions.
-
-The matching between device and driver is based on the content of the
-EEPROM (as mandated by the FMC standard) or by the actual cores
-configured in the FPGA; the latter technique is used when the FPGA is
-already programmed when the device is registered to the bus core.
-
-In some special cases it is possible for a driver to directly access
-FPGA registers, by means of the `fpga_base' field of the device
-structure. This may be needed for high-bandwidth peripherals like fast
-ADC cards. If the device module registered a remote device (for example
-by means of Etherbone), the `fpga_base' pointer will be NULL.
-Therefore, drivers must be ready to deal with NULL base pointers, and
-fail gracefully. Most driver, however, are not expected to access the
-pointer directly but run fmc_readl and fmc_writel instead, which will
-work in any case.
-
-In even more special cases, the driver may access carrier-specific
-functionality: the `carrier_name' string allows the driver to check
-which is the current carrier and make use of the `carrier_data'
-pointer. We chose to use carrier names rather than numeric identifiers
-for greater flexibility, but also to avoid a central registry within
-the `fmc.h' file - we hope other users will exploit our framework with
-their own carriers. An example use of carrier names is in GPIO setup
-(see *note The GPIO Abstraction::), although the name match is not
-expected to be performed by the driver. If you depend on specific
-carriers, please check the carrier name and fail gracefully if your
-driver finds it is running in a yet-unknown-to-it environment.
-
-
-ID Table
-========
-
-Like most other Linux drivers, and FMC driver must list all the devices
-which it is able to drive. This is usually done by means of a device
-table, but in FMC we can match hardware based either on the contents of
-their EEPROM or on the actual FPGA cores that can be enumerated.
-Therefore, we have two tables of identifiers.
-
-Matching of FRU information depends on two names, the manufacturer (or
-vendor) and the device (see *note FMC Identification::); for
-flexibility during production (i.e. before writing to the EEPROM) the
-bus supports a catch-all driver that specifies NULL strings. For this
-reason, the table is specified as pointer-and-length, not a a
-null-terminated array - the entry with NULL names can be a valid entry.
-
-Matching on FPGA cores depends on two numeric fields: the 64-bit vendor
-number and the 32-bit device number. Support for matching based on
-class is not yet implemented. Each device is expected to be uniquely
-identified by an array of cores (it matches if all of the cores are
-instantiated), and for consistency the list is passed as
-pointer-and-length. Several similar devices can be driven by the same
-driver, and thus the driver specifies and array of such arrays.
-
-The complete set of involved data structures is thus the following:
-
- struct fmc_fru_id { char *manufacturer; char *product_name; };
- struct fmc_sdb_one_id { uint64_t vendor; uint32_t device; };
- struct fmc_sdb_id { struct fmc_sdb_one_id *cores; int cores_nr; };
-
- struct fmc_device_id {
- struct fmc_fru_id *fru_id; int fru_id_nr;
- struct fmc_sdb_id *sdb_id; int sdb_id_nr;
- };
-
-A better reference, with full explanation, is the <linux/fmc.h> header.
-
-
-Module Parameters
-=================
-
-Most of the FMC drivers need the same set of kernel parameters. This
-package includes support to implement common parameters by means of
-fields in the `fmc_driver' structure and simple macro definitions.
-
-The parameters are carrier-specific, in that they rely on the busid
-concept, that varies among carriers. For the SPEC, the identifier is a
-PCI bus and devfn number, 16 bits wide in total; drivers for other
-carriers will most likely offer something similar but not identical,
-and some code duplication is unavoidable.
-
-This is the list of parameters that are common to several modules to
-see how they are actually used, please look at spec-trivial.c.
-
-`busid='
- This is an array of integers, listing carrier-specific
- identification numbers. For PIC, for example, `0x0400' represents
- bus 4, slot 0. If any such ID is specified, the driver will only
- accept to drive cards that appear in the list (even if the FMC ID
- matches). This is accomplished by the validate carrier method.
-
-`gateware='
- The argument is an array of strings. If no busid= is specified,
- the first string of gateware= is used for all cards; otherwise the
- identifiers and gateware names are paired one by one, in the order
- specified.
-
-`show_sdb='
- For modules supporting it, this parameter asks to show the SDB
- internal structure by means of kernel messages. It is disabled by
- default because those lines tend to hide more important messages,
- if you look at the system console while loading the drivers.
- Note: the parameter is being obsoleted, because fmc.ko itself now
- supports dump_sdb= that applies to every client driver.
-
-
-For example, if you are using the trivial driver to load two different
-gateware files to two different cards, you can use the following
-parameters to load different binaries to the cards, after looking up
-the PCI identifiers. This has been tested with a SPEC carrier.
-
- insmod fmc-trivial.ko \
- busid=0x0200,0x0400 \
- gateware=fmc/fine-delay.bin,fmc/simple-dio.bin
-
-Please note that not all sub-modules support all of those parameters.
-You can use modinfo to check what is supported by each module.
diff --git a/Documentation/fmc/parameters.txt b/Documentation/fmc/parameters.txt
deleted file mode 100644
index 59edf088e3a4..000000000000
--- a/Documentation/fmc/parameters.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Module Parameters in fmc.ko
-***************************
-
-The core driver receives two module parameters, meant to help debugging
-client modules. Both parameters can be modified by writing to
-/sys/module/fmc/parameters/, because they are used when client drivers
-are devices are registered, not when fmc.ko is loaded.
-
-`dump_eeprom='
- If not zero, the parameter asks the bus controller to dump the
- EEPROM of any device that is registered, using printk.
-
-`dump_sdb='
- If not zero, the parameter prints the SDB tree of every FPGA it is
- loaded by fmc_reprogram(). If greater than one, it asks to dump
- the binary content of SDB records. This currently only dumps the
- top-level SDB array, though.
-
-
-EEPROM dumping avoids repeating lines, since most of the contents is
-usually empty and all bits are one or zero. This is an example of the
-output:
-
- [ 6625.850480] spec 0000:02:00.0: FPGA programming successful
- [ 6626.139949] spec 0000:02:00.0: Manufacturer: CERN
- [ 6626.144666] spec 0000:02:00.0: Product name: FmcDelay1ns4cha
- [ 6626.150370] FMC: mezzanine 0: 0000:02:00.0 on SPEC
- [ 6626.155179] FMC: dumping eeprom 0x2000 (8192) bytes
- [ 6626.160087] 0000: 01 00 00 01 00 0b 00 f3 01 0a 00 a5 85 87 c4 43
- [ 6626.167069] 0010: 45 52 4e cf 46 6d 63 44 65 6c 61 79 31 6e 73 34
- [ 6626.174019] 0020: 63 68 61 c7 70 72 6f 74 6f 2d 30 cc 45 44 41 2d
- [ 6626.180975] 0030: 30 32 32 36 37 2d 56 33 da 32 30 31 32 2d 31 31
- [...]
- [ 6626.371366] 0200: 66 64 65 6c 61 79 0a 00 00 00 00 00 00 00 00 00
- [ 6626.378359] 0210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
- [ 6626.385361] [...]
- [ 6626.387308] 1800: 70 6c 61 63 65 68 6f 6c 64 65 72 ff ff ff ff ff
- [ 6626.394259] 1810: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
- [ 6626.401250] [...]
-
-The dump of SDB looks like the following; the example shows the simple
-golden gateware for the SPEC card, removing the leading timestamps to
-fit the page:
-
- spec 0000:02:00.0: SDB: 00000651:e6a542c9 WB4-Crossbar-GSI
- spec 0000:02:00.0: SDB: 0000ce42:ff07fc47 WR-Periph-Syscon (00000000-000000ff)
- FMC: mezzanine 0: 0000:02:00.0 on SPEC
- FMC: poor dump of sdb first level:
- 0000: 53 44 42 2d 00 02 01 00 00 00 00 00 00 00 00 00
- 0010: 00 00 00 00 00 00 01 ff 00 00 00 00 00 00 06 51
- 0020: e6 a5 42 c9 00 00 00 02 20 12 05 11 57 42 34 2d
- 0030: 43 72 6f 73 73 62 61 72 2d 47 53 49 20 20 20 00
- 0040: 00 00 01 01 00 00 00 07 00 00 00 00 00 00 00 00
- 0050: 00 00 00 00 00 00 00 ff 00 00 00 00 00 00 ce 42
- 0060: ff 07 fc 47 00 00 00 01 20 12 03 05 57 52 2d 50
- 0070: 65 72 69 70 68 2d 53 79 73 63 6f 6e 20 20 20 01
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 6cced88de6da..75ef063622d2 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -679,7 +679,7 @@ status as "unknown". The available commands are:
sysfs notes:
The ThinkLight sysfs interface is documented by the LED class
-documentation, in Documentation/leds/leds-class.txt. The ThinkLight LED name
+documentation, in Documentation/leds/leds-class.rst. The ThinkLight LED name
is "tpacpi::thinklight".
Due to limitations in the sysfs LED class, if the status of the ThinkLight
@@ -779,7 +779,7 @@ All of the above can be turned on and off and can be made to blink.
sysfs notes:
The ThinkPad LED sysfs interface is described in detail by the LED class
-documentation, in Documentation/leds/leds-class.txt.
+documentation, in Documentation/leds/leds-class.rst.
The LEDs are named (in LED ID order, from 0 to 12):
"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
diff --git a/Documentation/leds/index.rst b/Documentation/leds/index.rst
new file mode 100644
index 000000000000..9885f7c1b75d
--- /dev/null
+++ b/Documentation/leds/index.rst
@@ -0,0 +1,25 @@
+:orphan:
+
+====
+LEDs
+====
+
+.. toctree::
+ :maxdepth: 1
+
+ leds-class
+ leds-class-flash
+ ledtrig-oneshot
+ ledtrig-transient
+ ledtrig-usbport
+
+ uleds
+
+ leds-blinkm
+ leds-lm3556
+ leds-lp3944
+ leds-lp5521
+ leds-lp5523
+ leds-lp5562
+ leds-lp55xx
+ leds-mlxcpld
diff --git a/Documentation/leds/leds-blinkm.txt b/Documentation/leds/leds-blinkm.rst
index 9dd92f4cf4e1..c74b5bc877b1 100644
--- a/Documentation/leds/leds-blinkm.txt
+++ b/Documentation/leds/leds-blinkm.rst
@@ -1,3 +1,7 @@
+==================
+Leds BlinkM driver
+==================
+
The leds-blinkm driver supports the devices of the BlinkM family.
They are RGB-LED modules driven by a (AT)tiny microcontroller and
@@ -14,35 +18,36 @@ The interface this driver provides is 2-fold:
a) LED class interface for use with triggers
############################################
-The registration follows the scheme:
-blinkm-<i2c-bus-nr>-<i2c-device-nr>-<color>
+The registration follows the scheme::
+
+ blinkm-<i2c-bus-nr>-<i2c-device-nr>-<color>
-$ ls -h /sys/class/leds/blinkm-6-*
-/sys/class/leds/blinkm-6-9-blue:
-brightness device max_brightness power subsystem trigger uevent
+ $ ls -h /sys/class/leds/blinkm-6-*
+ /sys/class/leds/blinkm-6-9-blue:
+ brightness device max_brightness power subsystem trigger uevent
-/sys/class/leds/blinkm-6-9-green:
-brightness device max_brightness power subsystem trigger uevent
+ /sys/class/leds/blinkm-6-9-green:
+ brightness device max_brightness power subsystem trigger uevent
-/sys/class/leds/blinkm-6-9-red:
-brightness device max_brightness power subsystem trigger uevent
+ /sys/class/leds/blinkm-6-9-red:
+ brightness device max_brightness power subsystem trigger uevent
(same is /sys/bus/i2c/devices/6-0009/leds)
We can control the colors separated into red, green and blue and
assign triggers on each color.
-E.g.:
+E.g.::
-$ cat blinkm-6-9-blue/brightness
-05
+ $ cat blinkm-6-9-blue/brightness
+ 05
-$ echo 200 > blinkm-6-9-blue/brightness
-$
+ $ echo 200 > blinkm-6-9-blue/brightness
+ $
-$ modprobe ledtrig-heartbeat
-$ echo heartbeat > blinkm-6-9-green/trigger
-$
+ $ modprobe ledtrig-heartbeat
+ $ echo heartbeat > blinkm-6-9-green/trigger
+ $
b) Sysfs group to control rgb, fade, hsb, scripts ...
@@ -52,29 +57,28 @@ This extended interface is available as folder blinkm
in the sysfs folder of the I2C device.
E.g. below /sys/bus/i2c/devices/6-0009/blinkm
-$ ls -h /sys/bus/i2c/devices/6-0009/blinkm/
-blue green red test
+ $ ls -h /sys/bus/i2c/devices/6-0009/blinkm/
+ blue green red test
Currently supported is just setting red, green, blue
and a test sequence.
-E.g.:
+E.g.::
-$ cat *
-00
-00
-00
-#Write into test to start test sequence!#
+ $ cat *
+ 00
+ 00
+ 00
+ #Write into test to start test sequence!#
-$ echo 1 > test
-$
+ $ echo 1 > test
+ $
-$ echo 255 > red
-$
+ $ echo 255 > red
+ $
as of 6/2012
dl9pf <at> gmx <dot> de
-
diff --git a/Documentation/leds/leds-class-flash.txt b/Documentation/leds/leds-class-flash.rst
index 8da3c6f4b60b..6ec12c5a1a0e 100644
--- a/Documentation/leds/leds-class-flash.txt
+++ b/Documentation/leds/leds-class-flash.rst
@@ -1,9 +1,9 @@
-
+==============================
Flash LED handling under Linux
==============================
Some LED devices provide two modes - torch and flash. In the LED subsystem
-those modes are supported by LED class (see Documentation/leds/leds-class.txt)
+those modes are supported by LED class (see Documentation/leds/leds-class.rst)
and LED Flash class respectively. The torch mode related features are enabled
by default and the flash ones only if a driver declares it by setting
LED_DEV_CAP_FLASH flag.
@@ -14,6 +14,7 @@ registered in the LED subsystem with led_classdev_flash_register function.
Following sysfs attributes are exposed for controlling flash LED devices:
(see Documentation/ABI/testing/sysfs-class-led-flash)
+
- flash_brightness
- max_flash_brightness
- flash_timeout
@@ -31,30 +32,46 @@ be defined in the kernel config.
The driver must call the v4l2_flash_init function to get registered in the
V4L2 subsystem. The function takes six arguments:
-- dev : flash device, e.g. an I2C device
-- of_node : of_node of the LED, may be NULL if the same as device's
-- fled_cdev : LED flash class device to wrap
-- iled_cdev : LED flash class device representing indicator LED associated with
- fled_cdev, may be NULL
-- ops : V4L2 specific ops
- * external_strobe_set - defines the source of the flash LED strobe -
+
+- dev:
+ flash device, e.g. an I2C device
+- of_node:
+ of_node of the LED, may be NULL if the same as device's
+- fled_cdev:
+ LED flash class device to wrap
+- iled_cdev:
+ LED flash class device representing indicator LED associated with
+ fled_cdev, may be NULL
+- ops:
+ V4L2 specific ops
+
+ * external_strobe_set
+ defines the source of the flash LED strobe -
V4L2_CID_FLASH_STROBE control or external source, typically
a sensor, which makes it possible to synchronise the flash
strobe start with exposure start,
- * intensity_to_led_brightness and led_brightness_to_intensity - perform
+ * intensity_to_led_brightness and led_brightness_to_intensity
+ perform
enum led_brightness <-> V4L2 intensity conversion in a device
specific manner - they can be used for devices with non-linear
LED current scale.
-- config : configuration for V4L2 Flash sub-device
- * dev_name - the name of the media entity, unique in the system,
- * flash_faults - bitmask of flash faults that the LED flash class
+- config:
+ configuration for V4L2 Flash sub-device
+
+ * dev_name
+ the name of the media entity, unique in the system,
+ * flash_faults
+ bitmask of flash faults that the LED flash class
device can report; corresponding LED_FAULT* bit definitions are
available in <linux/led-class-flash.h>,
- * torch_intensity - constraints for the LED in TORCH mode
+ * torch_intensity
+ constraints for the LED in TORCH mode
in microamperes,
- * indicator_intensity - constraints for the indicator LED
+ * indicator_intensity
+ constraints for the indicator LED
in microamperes,
- * has_external_strobe - determines whether the flash strobe source
+ * has_external_strobe
+ determines whether the flash strobe source
can be switched to external,
On remove the v4l2_flash_release function has to be called, which takes one
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.rst
index 8b39cc6b03ee..df0120a1ee3c 100644
--- a/Documentation/leds/leds-class.txt
+++ b/Documentation/leds/leds-class.rst
@@ -1,4 +1,4 @@
-
+========================
LED handling under Linux
========================
@@ -43,7 +43,7 @@ LED Device Naming
Is currently of the form:
-"devicename:colour:function"
+ "devicename:colour:function"
There have been calls for LED properties such as colour to be exported as
individual led class attributes. As a solution which doesn't incur as much
@@ -57,9 +57,12 @@ Brightness setting API
LED subsystem core exposes following API for setting brightness:
- - led_set_brightness : it is guaranteed not to sleep, passing LED_OFF stops
+ - led_set_brightness:
+ it is guaranteed not to sleep, passing LED_OFF stops
blinking,
- - led_set_brightness_sync : for use cases when immediate effect is desired -
+
+ - led_set_brightness_sync:
+ for use cases when immediate effect is desired -
it can block the caller for the time required for accessing
device registers and can sleep, passing LED_OFF stops hardware
blinking, returns -EBUSY if software blink fallback is enabled.
@@ -70,7 +73,7 @@ LED registration API
A driver wanting to register a LED classdev for use by other drivers /
userspace needs to allocate and fill a led_classdev struct and then call
-[devm_]led_classdev_register. If the non devm version is used the driver
+`[devm_]led_classdev_register`. If the non devm version is used the driver
must call led_classdev_unregister from its remove function before
free-ing the led_classdev struct.
@@ -94,7 +97,7 @@ with brightness value LED_OFF, which should stop any software
timers that may have been required for blinking.
The blink_set() function should choose a user friendly blinking value
-if it is called with *delay_on==0 && *delay_off==0 parameters. In this
+if it is called with `*delay_on==0` && `*delay_off==0` parameters. In this
case the driver should give back the chosen value through delay_on and
delay_off parameters to the leds subsystem.
diff --git a/Documentation/leds/leds-lm3556.txt b/Documentation/leds/leds-lm3556.rst
index 62278e871b50..1ef17d7d800e 100644
--- a/Documentation/leds/leds-lm3556.txt
+++ b/Documentation/leds/leds-lm3556.rst
@@ -1,68 +1,118 @@
+========================
Kernel driver for lm3556
========================
-*Texas Instrument:
- 1.5 A Synchronous Boost LED Flash Driver w/ High-Side Current Source
+* Texas Instrument:
+ 1.5 A Synchronous Boost LED Flash Driver w/ High-Side Current Source
* Datasheet: http://www.national.com/ds/LM/LM3556.pdf
Authors:
- Daniel Jeong
+ - Daniel Jeong
+
Contact:Daniel Jeong(daniel.jeong-at-ti.com, gshark.jeong-at-gmail.com)
Description
-----------
There are 3 functions in LM3556, Flash, Torch and Indicator.
-FLASH MODE
+Flash Mode
+^^^^^^^^^^
+
In Flash Mode, the LED current source(LED) provides 16 target current levels
from 93.75 mA to 1500 mA.The Flash currents are adjusted via the CURRENT
CONTROL REGISTER(0x09).Flash mode is activated by the ENABLE REGISTER(0x0A),
or by pulling the STROBE pin HIGH.
+
LM3556 Flash can be controlled through sys/class/leds/flash/brightness file
+
* if STROBE pin is enabled, below example control brightness only, and
-ON / OFF will be controlled by STROBE pin.
+ ON / OFF will be controlled by STROBE pin.
Flash Example:
-OFF : #echo 0 > sys/class/leds/flash/brightness
-93.75 mA: #echo 1 > sys/class/leds/flash/brightness
-... .....
-1500 mA: #echo 16 > sys/class/leds/flash/brightness
-TORCH MODE
+OFF::
+
+ #echo 0 > sys/class/leds/flash/brightness
+
+93.75 mA::
+
+ #echo 1 > sys/class/leds/flash/brightness
+
+...
+
+1500 mA::
+
+ #echo 16 > sys/class/leds/flash/brightness
+
+Torch Mode
+^^^^^^^^^^
+
In Torch Mode, the current source(LED) is programmed via the CURRENT CONTROL
REGISTER(0x09).Torch Mode is activated by the ENABLE REGISTER(0x0A) or by the
hardware TORCH input.
+
LM3556 torch can be controlled through sys/class/leds/torch/brightness file.
* if TORCH pin is enabled, below example control brightness only,
and ON / OFF will be controlled by TORCH pin.
Torch Example:
-OFF : #echo 0 > sys/class/leds/torch/brightness
-46.88 mA: #echo 1 > sys/class/leds/torch/brightness
-... .....
-375 mA : #echo 8 > sys/class/leds/torch/brightness
-INDICATOR MODE
+OFF::
+
+ #echo 0 > sys/class/leds/torch/brightness
+
+46.88 mA::
+
+ #echo 1 > sys/class/leds/torch/brightness
+
+...
+
+375 mA::
+
+ #echo 8 > sys/class/leds/torch/brightness
+
+Indicator Mode
+^^^^^^^^^^^^^^
+
Indicator pattern can be set through sys/class/leds/indicator/pattern file,
and 4 patterns are pre-defined in indicator_pattern array.
+
According to N-lank, Pulse time and N Period values, different pattern wiill
be generated.If you want new patterns for your own device, change
indicator_pattern array with your own values and INDIC_PATTERN_SIZE.
+
Please refer datasheet for more detail about N-Blank, Pulse time and N Period.
Indicator pattern example:
-pattern 0: #echo 0 > sys/class/leds/indicator/pattern
-....
-pattern 3: #echo 3 > sys/class/leds/indicator/pattern
+
+pattern 0::
+
+ #echo 0 > sys/class/leds/indicator/pattern
+
+...
+
+pattern 3::
+
+ #echo 3 > sys/class/leds/indicator/pattern
Indicator brightness can be controlled through
sys/class/leds/indicator/brightness file.
Example:
-OFF : #echo 0 > sys/class/leds/indicator/brightness
-5.86 mA : #echo 1 > sys/class/leds/indicator/brightness
-........
-46.875mA : #echo 8 > sys/class/leds/indicator/brightness
+
+OFF::
+
+ #echo 0 > sys/class/leds/indicator/brightness
+
+5.86 mA::
+
+ #echo 1 > sys/class/leds/indicator/brightness
+
+...
+
+46.875mA::
+
+ #echo 8 > sys/class/leds/indicator/brightness
Notes
-----
@@ -70,7 +120,8 @@ Driver expects it is registered using the i2c_board_info mechanism.
To register the chip at address 0x63 on specific adapter, set the platform data
according to include/linux/platform_data/leds-lm3556.h, set the i2c board info
-Example:
+Example::
+
static struct i2c_board_info board_i2c_ch4[] __initdata = {
{
I2C_BOARD_INFO(LM3556_NAME, 0x63),
@@ -80,6 +131,7 @@ Example:
and register it in the platform init function
-Example:
+Example::
+
board_register_i2c_bus(4, 400,
board_i2c_ch4, ARRAY_SIZE(board_i2c_ch4));
diff --git a/Documentation/leds/leds-lp3944.txt b/Documentation/leds/leds-lp3944.rst
index e88ac3b60c08..c2f87dc1a3a9 100644
--- a/Documentation/leds/leds-lp3944.txt
+++ b/Documentation/leds/leds-lp3944.rst
@@ -1,14 +1,20 @@
+====================
Kernel driver lp3944
====================
* National Semiconductor LP3944 Fun-light Chip
+
Prefix: 'lp3944'
+
Addresses scanned: None (see the Notes section below)
- Datasheet: Publicly available at the National Semiconductor website
- http://www.national.com/pf/LP/LP3944.html
+
+ Datasheet:
+
+ Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LP/LP3944.html
Authors:
- Antonio Ospite <ospite@studenti.unina.it>
+ Antonio Ospite <ospite@studenti.unina.it>
Description
@@ -19,8 +25,11 @@ is used as a led controller.
The DIM modes are used to set _blink_ patterns for leds, the pattern is
specified supplying two parameters:
- - period: from 0s to 1.6s
- - duty cycle: percentage of the period the led is on, from 0 to 100
+
+ - period:
+ from 0s to 1.6s
+ - duty cycle:
+ percentage of the period the led is on, from 0 to 100
Setting a led in DIM0 or DIM1 mode makes it blink according to the pattern.
See the datasheet for details.
@@ -35,7 +44,7 @@ The chip is used mainly in embedded contexts, so this driver expects it is
registered using the i2c_board_info mechanism.
To register the chip at address 0x60 on adapter 0, set the platform data
-according to include/linux/leds-lp3944.h, set the i2c board info:
+according to include/linux/leds-lp3944.h, set the i2c board info::
static struct i2c_board_info a910_i2c_board_info[] __initdata = {
{
@@ -44,7 +53,7 @@ according to include/linux/leds-lp3944.h, set the i2c board info:
},
};
-and register it in the platform init function
+and register it in the platform init function::
i2c_register_board_info(0, a910_i2c_board_info,
ARRAY_SIZE(a910_i2c_board_info));
diff --git a/Documentation/leds/leds-lp5521.rst b/Documentation/leds/leds-lp5521.rst
new file mode 100644
index 000000000000..0432615b083d
--- /dev/null
+++ b/Documentation/leds/leds-lp5521.rst
@@ -0,0 +1,115 @@
+========================
+Kernel driver for lp5521
+========================
+
+* National Semiconductor LP5521 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5521.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+
+LP5521 can drive up to 3 channels. Leds can be controlled directly via
+the led class control interface. Channels have generic names:
+lp5521:channelx, where x is 0 .. 2
+
+All three channels can be also controlled using the engine micro programs.
+More details of the instructions can be found from the public data sheet.
+
+LP5521 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode and enginex_load
+ Control interface for the engines:
+
+ x is 1 .. 3
+
+ enginex_mode:
+ disabled, load, run
+ enginex_load:
+ store program (visible only in engine load mode)
+
+ Example (start to blink the channel 2 led)::
+
+ cd /sys/class/leds/lp5521:channel2/device
+ echo "load" > engine3_mode
+ echo "037f4d0003ff6000" > engine3_load
+ echo "run" > engine3_mode
+
+ To stop the engine::
+
+ echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+
+For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+sysfs contains a selftest entry.
+
+The test communicates with the chip and checks that
+the clock mode is automatically set to the requested one.
+
+Each channel has its own led current settings.
+
+- /sys/class/leds/lp5521:channel0/led_current - RW
+- /sys/class/leds/lp5521:channel0/max_current - RO
+
+Format: 10x mA i.e 10 means 1.0 mA
+
+example platform data::
+
+ static struct lp55xx_led_config lp5521_led_config[] = {
+ {
+ .name = "red",
+ .chan_nr = 0,
+ .led_current = 50,
+ .max_current = 130,
+ }, {
+ .name = "green",
+ .chan_nr = 1,
+ .led_current = 0,
+ .max_current = 130,
+ }, {
+ .name = "blue",
+ .chan_nr = 2,
+ .led_current = 0,
+ .max_current = 130,
+ }
+ };
+
+ static int lp5521_setup(void)
+ {
+ /* setup HW resources */
+ }
+
+ static void lp5521_release(void)
+ {
+ /* Release HW resources */
+ }
+
+ static void lp5521_enable(bool state)
+ {
+ /* Control of chip enable signal */
+ }
+
+ static struct lp55xx_platform_data lp5521_platform_data = {
+ .led_config = lp5521_led_config,
+ .num_channels = ARRAY_SIZE(lp5521_led_config),
+ .clock_mode = LP55XX_CLOCK_EXT,
+ .setup_resources = lp5521_setup,
+ .release_resources = lp5521_release,
+ .enable = lp5521_enable,
+ };
+
+Note:
+ chan_nr can have values between 0 and 2.
+ The name of each channel can be configurable.
+ If the name field is not defined, the default name will be set to 'xxxx:channelN'
+ (XXXX : pdata->label or i2c client name, N : channel number)
+
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt
deleted file mode 100644
index d08d8c179f85..000000000000
--- a/Documentation/leds/leds-lp5521.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-Kernel driver for lp5521
-========================
-
-* National Semiconductor LP5521 led driver chip
-* Datasheet: http://www.national.com/pf/LP/LP5521.html
-
-Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
-Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
-
-Description
------------
-
-LP5521 can drive up to 3 channels. Leds can be controlled directly via
-the led class control interface. Channels have generic names:
-lp5521:channelx, where x is 0 .. 2
-
-All three channels can be also controlled using the engine micro programs.
-More details of the instructions can be found from the public data sheet.
-
-LP5521 has the internal program memory for running various LED patterns.
-There are two ways to run LED patterns.
-
-1) Legacy interface - enginex_mode and enginex_load
- Control interface for the engines:
- x is 1 .. 3
- enginex_mode : disabled, load, run
- enginex_load : store program (visible only in engine load mode)
-
- Example (start to blink the channel 2 led):
- cd /sys/class/leds/lp5521:channel2/device
- echo "load" > engine3_mode
- echo "037f4d0003ff6000" > engine3_load
- echo "run" > engine3_mode
-
- To stop the engine:
- echo "disabled" > engine3_mode
-
-2) Firmware interface - LP55xx common interface
- For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-sysfs contains a selftest entry.
-The test communicates with the chip and checks that
-the clock mode is automatically set to the requested one.
-
-Each channel has its own led current settings.
-/sys/class/leds/lp5521:channel0/led_current - RW
-/sys/class/leds/lp5521:channel0/max_current - RO
-Format: 10x mA i.e 10 means 1.0 mA
-
-example platform data:
-
-Note: chan_nr can have values between 0 and 2.
-The name of each channel can be configurable.
-If the name field is not defined, the default name will be set to 'xxxx:channelN'
-(XXXX : pdata->label or i2c client name, N : channel number)
-
-static struct lp55xx_led_config lp5521_led_config[] = {
- {
- .name = "red",
- .chan_nr = 0,
- .led_current = 50,
- .max_current = 130,
- }, {
- .name = "green",
- .chan_nr = 1,
- .led_current = 0,
- .max_current = 130,
- }, {
- .name = "blue",
- .chan_nr = 2,
- .led_current = 0,
- .max_current = 130,
- }
-};
-
-static int lp5521_setup(void)
-{
- /* setup HW resources */
-}
-
-static void lp5521_release(void)
-{
- /* Release HW resources */
-}
-
-static void lp5521_enable(bool state)
-{
- /* Control of chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5521_platform_data = {
- .led_config = lp5521_led_config,
- .num_channels = ARRAY_SIZE(lp5521_led_config),
- .clock_mode = LP55XX_CLOCK_EXT,
- .setup_resources = lp5521_setup,
- .release_resources = lp5521_release,
- .enable = lp5521_enable,
-};
-
-If the current is set to 0 in the platform data, that channel is
-disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5523.rst b/Documentation/leds/leds-lp5523.rst
new file mode 100644
index 000000000000..7d7362a1dd57
--- /dev/null
+++ b/Documentation/leds/leds-lp5523.rst
@@ -0,0 +1,147 @@
+========================
+Kernel driver for lp5523
+========================
+
+* National Semiconductor LP5523 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5523.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+LP5523 can drive up to 9 channels. Leds can be controlled directly via
+the led class control interface.
+The name of each channel is configurable in the platform data - name and label.
+There are three options to make the channel name.
+
+a) Define the 'name' in the platform data
+
+To make specific channel name, then use 'name' platform data.
+
+- /sys/class/leds/R1 (name: 'R1')
+- /sys/class/leds/B1 (name: 'B1')
+
+b) Use the 'label' with no 'name' field
+
+For one device name with channel number, then use 'label'.
+- /sys/class/leds/RGB:channelN (label: 'RGB', N: 0 ~ 8)
+
+c) Default
+
+If both fields are NULL, 'lp5523' is used by default.
+- /sys/class/leds/lp5523:channelN (N: 0 ~ 8)
+
+LP5523 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode, enginex_load and enginex_leds
+
+ Control interface for the engines:
+
+ x is 1 .. 3
+
+ enginex_mode:
+ disabled, load, run
+ enginex_load:
+ microcode load
+ enginex_leds:
+ led mux control
+
+ ::
+
+ cd /sys/class/leds/lp5523:channel2/device
+ echo "load" > engine3_mode
+ echo "9d80400004ff05ff437f0000" > engine3_load
+ echo "111111111" > engine3_leds
+ echo "run" > engine3_mode
+
+ To stop the engine::
+
+ echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+
+For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+LP5523 has three master faders. If a channel is mapped to one of
+the master faders, its output is dimmed based on the value of the master
+fader.
+
+For example::
+
+ echo "123000123" > master_fader_leds
+
+creates the following channel-fader mappings::
+
+ channel 0,6 to master_fader1
+ channel 1,7 to master_fader2
+ channel 2,8 to master_fader3
+
+Then, to have 25% of the original output on channel 0,6::
+
+ echo 64 > master_fader1
+
+To have 0% of the original output (i.e. no output) channel 1,7::
+
+ echo 0 > master_fader2
+
+To have 100% of the original output (i.e. no dimming) on channel 2,8::
+
+ echo 255 > master_fader3
+
+To clear all master fader controls::
+
+ echo "000000000" > master_fader_leds
+
+Selftest uses always the current from the platform data.
+
+Each channel contains led current settings.
+- /sys/class/leds/lp5523:channel2/led_current - RW
+- /sys/class/leds/lp5523:channel2/max_current - RO
+
+Format: 10x mA i.e 10 means 1.0 mA
+
+Example platform data::
+
+ static struct lp55xx_led_config lp5523_led_config[] = {
+ {
+ .name = "D1",
+ .chan_nr = 0,
+ .led_current = 50,
+ .max_current = 130,
+ },
+ ...
+ {
+ .chan_nr = 8,
+ .led_current = 50,
+ .max_current = 130,
+ }
+ };
+
+ static int lp5523_setup(void)
+ {
+ /* Setup HW resources */
+ }
+
+ static void lp5523_release(void)
+ {
+ /* Release HW resources */
+ }
+
+ static void lp5523_enable(bool state)
+ {
+ /* Control chip enable signal */
+ }
+
+ static struct lp55xx_platform_data lp5523_platform_data = {
+ .led_config = lp5523_led_config,
+ .num_channels = ARRAY_SIZE(lp5523_led_config),
+ .clock_mode = LP55XX_CLOCK_EXT,
+ .setup_resources = lp5523_setup,
+ .release_resources = lp5523_release,
+ .enable = lp5523_enable,
+ };
+
+Note
+ chan_nr can have values between 0 and 8.
diff --git a/Documentation/leds/leds-lp5523.txt b/Documentation/leds/leds-lp5523.txt
deleted file mode 100644
index 0961a060fc4d..000000000000
--- a/Documentation/leds/leds-lp5523.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-Kernel driver for lp5523
-========================
-
-* National Semiconductor LP5523 led driver chip
-* Datasheet: http://www.national.com/pf/LP/LP5523.html
-
-Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
-Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
-
-Description
------------
-LP5523 can drive up to 9 channels. Leds can be controlled directly via
-the led class control interface.
-The name of each channel is configurable in the platform data - name and label.
-There are three options to make the channel name.
-
-a) Define the 'name' in the platform data
-To make specific channel name, then use 'name' platform data.
-/sys/class/leds/R1 (name: 'R1')
-/sys/class/leds/B1 (name: 'B1')
-
-b) Use the 'label' with no 'name' field
-For one device name with channel number, then use 'label'.
-/sys/class/leds/RGB:channelN (label: 'RGB', N: 0 ~ 8)
-
-c) Default
-If both fields are NULL, 'lp5523' is used by default.
-/sys/class/leds/lp5523:channelN (N: 0 ~ 8)
-
-LP5523 has the internal program memory for running various LED patterns.
-There are two ways to run LED patterns.
-
-1) Legacy interface - enginex_mode, enginex_load and enginex_leds
- Control interface for the engines:
- x is 1 .. 3
- enginex_mode : disabled, load, run
- enginex_load : microcode load
- enginex_leds : led mux control
-
- cd /sys/class/leds/lp5523:channel2/device
- echo "load" > engine3_mode
- echo "9d80400004ff05ff437f0000" > engine3_load
- echo "111111111" > engine3_leds
- echo "run" > engine3_mode
-
- To stop the engine:
- echo "disabled" > engine3_mode
-
-2) Firmware interface - LP55xx common interface
- For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-LP5523 has three master faders. If a channel is mapped to one of
-the master faders, its output is dimmed based on the value of the master
-fader.
-
-For example,
-
- echo "123000123" > master_fader_leds
-
-creates the following channel-fader mappings:
-
- channel 0,6 to master_fader1
- channel 1,7 to master_fader2
- channel 2,8 to master_fader3
-
-Then, to have 25% of the original output on channel 0,6:
-
- echo 64 > master_fader1
-
-To have 0% of the original output (i.e. no output) channel 1,7:
-
- echo 0 > master_fader2
-
-To have 100% of the original output (i.e. no dimming) on channel 2,8:
-
- echo 255 > master_fader3
-
-To clear all master fader controls:
-
- echo "000000000" > master_fader_leds
-
-Selftest uses always the current from the platform data.
-
-Each channel contains led current settings.
-/sys/class/leds/lp5523:channel2/led_current - RW
-/sys/class/leds/lp5523:channel2/max_current - RO
-Format: 10x mA i.e 10 means 1.0 mA
-
-Example platform data:
-
-Note - chan_nr can have values between 0 and 8.
-
-static struct lp55xx_led_config lp5523_led_config[] = {
- {
- .name = "D1",
- .chan_nr = 0,
- .led_current = 50,
- .max_current = 130,
- },
-...
- {
- .chan_nr = 8,
- .led_current = 50,
- .max_current = 130,
- }
-};
-
-static int lp5523_setup(void)
-{
- /* Setup HW resources */
-}
-
-static void lp5523_release(void)
-{
- /* Release HW resources */
-}
-
-static void lp5523_enable(bool state)
-{
- /* Control chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5523_platform_data = {
- .led_config = lp5523_led_config,
- .num_channels = ARRAY_SIZE(lp5523_led_config),
- .clock_mode = LP55XX_CLOCK_EXT,
- .setup_resources = lp5523_setup,
- .release_resources = lp5523_release,
- .enable = lp5523_enable,
-};
diff --git a/Documentation/leds/leds-lp5562.rst b/Documentation/leds/leds-lp5562.rst
new file mode 100644
index 000000000000..79bbb2487ff6
--- /dev/null
+++ b/Documentation/leds/leds-lp5562.rst
@@ -0,0 +1,137 @@
+========================
+Kernel driver for lp5562
+========================
+
+* TI LP5562 LED Driver
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+===========
+
+ LP5562 can drive up to 4 channels. R/G/B and White.
+ LEDs can be controlled directly via the led class control interface.
+
+ All four channels can be also controlled using the engine micro programs.
+ LP5562 has the internal program memory for running various LED patterns.
+ For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+Device attribute
+================
+
+engine_mux
+ 3 Engines are allocated in LP5562, but the number of channel is 4.
+ Therefore each channel should be mapped to the engine number.
+
+ Value: RGB or W
+
+ This attribute is used for programming LED data with the firmware interface.
+ Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
+ so additional sysfs is required
+
+ LED Map
+
+ ===== === ===============================
+ Red ... Engine 1 (fixed)
+ Green ... Engine 2 (fixed)
+ Blue ... Engine 3 (fixed)
+ White ... Engine 1 or 2 or 3 (selective)
+ ===== === ===============================
+
+How to load the program data using engine_mux
+=============================================
+
+ Before loading the LP5562 program data, engine_mux should be written between
+ the engine selection and loading the firmware.
+ Engine mux has two different mode, RGB and W.
+ RGB is used for loading RGB program data, W is used for W program data.
+
+ For example, run blinking green channel pattern::
+
+ echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel
+ echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB
+ echo 1 > /sys/class/firmware/lp5562/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+ echo 0 > /sys/class/firmware/lp5562/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+ To run a blinking white pattern::
+
+ echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
+ echo 1 > /sys/class/firmware/lp5562/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+ echo 0 > /sys/class/firmware/lp5562/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+How to load the predefined patterns
+===================================
+
+ Please refer to 'leds-lp55xx.txt"
+
+Setting Current of Each Channel
+===============================
+
+ Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
+ The 'led_current' and 'max_current' are used.
+
+Example of Platform data
+========================
+
+::
+
+ static struct lp55xx_led_config lp5562_led_config[] = {
+ {
+ .name = "R",
+ .chan_nr = 0,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "G",
+ .chan_nr = 1,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "B",
+ .chan_nr = 2,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "W",
+ .chan_nr = 3,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ };
+
+ static int lp5562_setup(void)
+ {
+ /* setup HW resources */
+ }
+
+ static void lp5562_release(void)
+ {
+ /* Release HW resources */
+ }
+
+ static void lp5562_enable(bool state)
+ {
+ /* Control of chip enable signal */
+ }
+
+ static struct lp55xx_platform_data lp5562_platform_data = {
+ .led_config = lp5562_led_config,
+ .num_channels = ARRAY_SIZE(lp5562_led_config),
+ .setup_resources = lp5562_setup,
+ .release_resources = lp5562_release,
+ .enable = lp5562_enable,
+ };
+
+To configure the platform specific data, lp55xx_platform_data structure is used
+
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt
deleted file mode 100644
index 5a823ff6b393..000000000000
--- a/Documentation/leds/leds-lp5562.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-Kernel driver for LP5562
-========================
-
-* TI LP5562 LED Driver
-
-Author: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
-
- LP5562 can drive up to 4 channels. R/G/B and White.
- LEDs can be controlled directly via the led class control interface.
-
- All four channels can be also controlled using the engine micro programs.
- LP5562 has the internal program memory for running various LED patterns.
- For the details, please refer to 'firmware' section in leds-lp55xx.txt
-
-Device attribute: engine_mux
-
- 3 Engines are allocated in LP5562, but the number of channel is 4.
- Therefore each channel should be mapped to the engine number.
- Value : RGB or W
-
- This attribute is used for programming LED data with the firmware interface.
- Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
- so additional sysfs is required.
-
- LED Map
- Red ... Engine 1 (fixed)
- Green ... Engine 2 (fixed)
- Blue ... Engine 3 (fixed)
- White ... Engine 1 or 2 or 3 (selective)
-
-How to load the program data using engine_mux
-
- Before loading the LP5562 program data, engine_mux should be written between
- the engine selection and loading the firmware.
- Engine mux has two different mode, RGB and W.
- RGB is used for loading RGB program data, W is used for W program data.
-
- For example, run blinking green channel pattern,
- echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel
- echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB
- echo 1 > /sys/class/firmware/lp5562/loading
- echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
- echo 0 > /sys/class/firmware/lp5562/loading
- echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
- To run a blinking white pattern,
- echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
- echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
- echo 1 > /sys/class/firmware/lp5562/loading
- echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
- echo 0 > /sys/class/firmware/lp5562/loading
- echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-How to load the predefined patterns
-
- Please refer to 'leds-lp55xx.txt"
-
-Setting Current of Each Channel
-
- Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
- The 'led_current' and 'max_current' are used.
-
-(Example of Platform data)
-
-To configure the platform specific data, lp55xx_platform_data structure is used.
-
-static struct lp55xx_led_config lp5562_led_config[] = {
- {
- .name = "R",
- .chan_nr = 0,
- .led_current = 20,
- .max_current = 40,
- },
- {
- .name = "G",
- .chan_nr = 1,
- .led_current = 20,
- .max_current = 40,
- },
- {
- .name = "B",
- .chan_nr = 2,
- .led_current = 20,
- .max_current = 40,
- },
- {
- .name = "W",
- .chan_nr = 3,
- .led_current = 20,
- .max_current = 40,
- },
-};
-
-static int lp5562_setup(void)
-{
- /* setup HW resources */
-}
-
-static void lp5562_release(void)
-{
- /* Release HW resources */
-}
-
-static void lp5562_enable(bool state)
-{
- /* Control of chip enable signal */
-}
-
-static struct lp55xx_platform_data lp5562_platform_data = {
- .led_config = lp5562_led_config,
- .num_channels = ARRAY_SIZE(lp5562_led_config),
- .setup_resources = lp5562_setup,
- .release_resources = lp5562_release,
- .enable = lp5562_enable,
-};
-
-If the current is set to 0 in the platform data, that channel is
-disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp55xx.rst b/Documentation/leds/leds-lp55xx.rst
new file mode 100644
index 000000000000..632e41cec0b5
--- /dev/null
+++ b/Documentation/leds/leds-lp55xx.rst
@@ -0,0 +1,224 @@
+=================================================
+LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
+=================================================
+
+Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
+
+ Register access via the I2C
+ Device initialization/deinitialization
+ Create LED class devices for multiple output channels
+ Device attributes for user-space interface
+ Program memory for running LED patterns
+
+The LP55xx common driver provides these features using exported functions.
+
+ lp55xx_init_device() / lp55xx_deinit_device()
+ lp55xx_register_leds() / lp55xx_unregister_leds()
+ lp55xx_regsister_sysfs() / lp55xx_unregister_sysfs()
+
+( Driver Structure Data )
+
+In lp55xx common driver, two different data structure is used.
+
+* lp55xx_led
+ control multi output LED channels such as led current, channel index.
+* lp55xx_chip
+ general chip control such like the I2C and platform data.
+
+For example, LP5521 has maximum 3 LED channels.
+LP5523/55231 has 9 output channels::
+
+ lp55xx_chip for LP5521 ... lp55xx_led #1
+ lp55xx_led #2
+ lp55xx_led #3
+
+ lp55xx_chip for LP5523 ... lp55xx_led #1
+ lp55xx_led #2
+ .
+ .
+ lp55xx_led #9
+
+( Chip Dependent Code )
+
+To support device specific configurations, special structure
+'lpxx_device_config' is used.
+
+ - Maximum number of channels
+ - Reset command, chip enable command
+ - Chip specific initialization
+ - Brightness control register access
+ - Setting LED output current
+ - Program memory address access for running patterns
+ - Additional device specific attributes
+
+( Firmware Interface )
+
+LP55xx family devices have the internal program memory for running
+various LED patterns.
+
+This pattern data is saved as a file in the user-land or
+hex byte string is written into the memory through the I2C.
+
+LP55xx common driver supports the firmware interface.
+
+LP55xx chips have three program engines.
+
+To load and run the pattern, the programming sequence is following.
+
+ (1) Select an engine number (1/2/3)
+ (2) Mode change to load
+ (3) Write pattern data into selected area
+ (4) Mode change to run
+
+The LP55xx common driver provides simple interfaces as below.
+
+select_engine:
+ Select which engine is used for running program
+run_engine:
+ Start program which is loaded via the firmware interface
+firmware:
+ Load program data
+
+In case of LP5523, one more command is required, 'enginex_leds'.
+It is used for selecting LED output(s) at each engine number.
+In more details, please refer to 'leds-lp5523.txt'.
+
+For example, run blinking pattern in engine #1 of LP5521::
+
+ echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo 1 > /sys/class/firmware/lp5521/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+ echo 0 > /sys/class/firmware/lp5521/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+For example, run blinking pattern in engine #3 of LP55231
+
+Two LEDs are configured as pattern output channels::
+
+ echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo 1 > /sys/class/firmware/lp55231/loading
+ echo "9d0740ff7e0040007e00a0010000" > /sys/class/firmware/lp55231/data
+ echo 0 > /sys/class/firmware/lp55231/loading
+ echo "000001100" > /sys/bus/i2c/devices/xxxx/engine3_leds
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+To start blinking patterns in engine #2 and #3 simultaneously::
+
+ for idx in 2 3
+ do
+ echo $idx > /sys/class/leds/red/device/select_engine
+ sleep 0.1
+ echo 1 > /sys/class/firmware/lp5521/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+ echo 0 > /sys/class/firmware/lp5521/loading
+ done
+ echo 1 > /sys/class/leds/red/device/run_engine
+
+Here is another example for LP5523.
+
+Full LED strings are selected by 'engine2_leds'::
+
+ echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo 1 > /sys/class/firmware/lp5523/loading
+ echo "9d80400004ff05ff437f0000" > /sys/class/firmware/lp5523/data
+ echo 0 > /sys/class/firmware/lp5523/loading
+ echo "111111111" > /sys/bus/i2c/devices/xxxx/engine2_leds
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+As soon as 'loading' is set to 0, registered callback is called.
+Inside the callback, the selected engine is loaded and memory is updated.
+To run programmed pattern, 'run_engine' attribute should be enabled.
+
+The pattern sequence of LP8501 is similar to LP5523.
+
+However pattern data is specific.
+
+Ex 1) Engine 1 is used::
+
+ echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo 1 > /sys/class/firmware/lp8501/loading
+ echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+ echo 0 > /sys/class/firmware/lp8501/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+Ex 2) Engine 2 and 3 are used at the same time::
+
+ echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+ sleep 1
+ echo 1 > /sys/class/firmware/lp8501/loading
+ echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+ echo 0 > /sys/class/firmware/lp8501/loading
+ sleep 1
+ echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+ sleep 1
+ echo 1 > /sys/class/firmware/lp8501/loading
+ echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+ echo 0 > /sys/class/firmware/lp8501/loading
+ sleep 1
+ echo 1 > /sys/class/leds/d1/device/run_engine
+
+( 'run_engine' and 'firmware_cb' )
+
+The sequence of running the program data is common.
+
+But each device has own specific register addresses for commands.
+
+To support this, 'run_engine' and 'firmware_cb' are configurable in each driver.
+
+run_engine:
+ Control the selected engine
+firmware_cb:
+ The callback function after loading the firmware is done.
+
+ Chip specific commands for loading and updating program memory.
+
+( Predefined pattern data )
+
+Without the firmware interface, LP55xx driver provides another method for
+loading a LED pattern. That is 'predefined' pattern.
+
+A predefined pattern is defined in the platform data and load it(or them)
+via the sysfs if needed.
+
+To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
+configured.
+
+Example of predefined pattern data::
+
+ /* mode_1: blinking data */
+ static const u8 mode_1[] = {
+ 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
+ };
+
+ /* mode_2: always on */
+ static const u8 mode_2[] = { 0x40, 0xFF, };
+
+ struct lp55xx_predef_pattern board_led_patterns[] = {
+ {
+ .r = mode_1,
+ .size_r = ARRAY_SIZE(mode_1),
+ },
+ {
+ .b = mode_2,
+ .size_b = ARRAY_SIZE(mode_2),
+ },
+ }
+
+ struct lp55xx_platform_data lp5562_pdata = {
+ ...
+ .patterns = board_led_patterns,
+ .num_patterns = ARRAY_SIZE(board_led_patterns),
+ };
+
+Then, mode_1 and mode_2 can be run via through the sysfs::
+
+ echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern
+ echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on
+
+To stop running pattern::
+
+ echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern
diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt
deleted file mode 100644
index e23fa91ea722..000000000000
--- a/Documentation/leds/leds-lp55xx.txt
+++ /dev/null
@@ -1,194 +0,0 @@
-LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
-=================================================
-
-Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
-
-Description
------------
-LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
-
- Register access via the I2C
- Device initialization/deinitialization
- Create LED class devices for multiple output channels
- Device attributes for user-space interface
- Program memory for running LED patterns
-
-The LP55xx common driver provides these features using exported functions.
- lp55xx_init_device() / lp55xx_deinit_device()
- lp55xx_register_leds() / lp55xx_unregister_leds()
- lp55xx_regsister_sysfs() / lp55xx_unregister_sysfs()
-
-( Driver Structure Data )
-
-In lp55xx common driver, two different data structure is used.
-
-o lp55xx_led
- control multi output LED channels such as led current, channel index.
-o lp55xx_chip
- general chip control such like the I2C and platform data.
-
-For example, LP5521 has maximum 3 LED channels.
-LP5523/55231 has 9 output channels.
-
-lp55xx_chip for LP5521 ... lp55xx_led #1
- lp55xx_led #2
- lp55xx_led #3
-
-lp55xx_chip for LP5523 ... lp55xx_led #1
- lp55xx_led #2
- .
- .
- lp55xx_led #9
-
-( Chip Dependent Code )
-
-To support device specific configurations, special structure
-'lpxx_device_config' is used.
-
- Maximum number of channels
- Reset command, chip enable command
- Chip specific initialization
- Brightness control register access
- Setting LED output current
- Program memory address access for running patterns
- Additional device specific attributes
-
-( Firmware Interface )
-
-LP55xx family devices have the internal program memory for running
-various LED patterns.
-This pattern data is saved as a file in the user-land or
-hex byte string is written into the memory through the I2C.
-LP55xx common driver supports the firmware interface.
-
-LP55xx chips have three program engines.
-To load and run the pattern, the programming sequence is following.
- (1) Select an engine number (1/2/3)
- (2) Mode change to load
- (3) Write pattern data into selected area
- (4) Mode change to run
-
-The LP55xx common driver provides simple interfaces as below.
-select_engine : Select which engine is used for running program
-run_engine : Start program which is loaded via the firmware interface
-firmware : Load program data
-
-In case of LP5523, one more command is required, 'enginex_leds'.
-It is used for selecting LED output(s) at each engine number.
-In more details, please refer to 'leds-lp5523.txt'.
-
-For example, run blinking pattern in engine #1 of LP5521
-echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp5521/loading
-echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
-echo 0 > /sys/class/firmware/lp5521/loading
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-For example, run blinking pattern in engine #3 of LP55231
-Two LEDs are configured as pattern output channels.
-echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp55231/loading
-echo "9d0740ff7e0040007e00a0010000" > /sys/class/firmware/lp55231/data
-echo 0 > /sys/class/firmware/lp55231/loading
-echo "000001100" > /sys/bus/i2c/devices/xxxx/engine3_leds
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-To start blinking patterns in engine #2 and #3 simultaneously,
-for idx in 2 3
-do
- echo $idx > /sys/class/leds/red/device/select_engine
- sleep 0.1
- echo 1 > /sys/class/firmware/lp5521/loading
- echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
- echo 0 > /sys/class/firmware/lp5521/loading
-done
-echo 1 > /sys/class/leds/red/device/run_engine
-
-Here is another example for LP5523.
-Full LED strings are selected by 'engine2_leds'.
-echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp5523/loading
-echo "9d80400004ff05ff437f0000" > /sys/class/firmware/lp5523/data
-echo 0 > /sys/class/firmware/lp5523/loading
-echo "111111111" > /sys/bus/i2c/devices/xxxx/engine2_leds
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-As soon as 'loading' is set to 0, registered callback is called.
-Inside the callback, the selected engine is loaded and memory is updated.
-To run programmed pattern, 'run_engine' attribute should be enabled.
-
-The pattern sequence of LP8501 is similar to LP5523.
-However pattern data is specific.
-Ex 1) Engine 1 is used
-echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
-
-Ex 2) Engine 2 and 3 are used at the same time
-echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
-sleep 1
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-sleep 1
-echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
-sleep 1
-echo 1 > /sys/class/firmware/lp8501/loading
-echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
-echo 0 > /sys/class/firmware/lp8501/loading
-sleep 1
-echo 1 > /sys/class/leds/d1/device/run_engine
-
-( 'run_engine' and 'firmware_cb' )
-The sequence of running the program data is common.
-But each device has own specific register addresses for commands.
-To support this, 'run_engine' and 'firmware_cb' are configurable in each driver.
-run_engine : Control the selected engine
-firmware_cb : The callback function after loading the firmware is done.
- Chip specific commands for loading and updating program memory.
-
-( Predefined pattern data )
-
-Without the firmware interface, LP55xx driver provides another method for
-loading a LED pattern. That is 'predefined' pattern.
-A predefined pattern is defined in the platform data and load it(or them)
-via the sysfs if needed.
-To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
-configured.
-
- Example of predefined pattern data:
-
- /* mode_1: blinking data */
- static const u8 mode_1[] = {
- 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
- };
-
- /* mode_2: always on */
- static const u8 mode_2[] = { 0x40, 0xFF, };
-
- struct lp55xx_predef_pattern board_led_patterns[] = {
- {
- .r = mode_1,
- .size_r = ARRAY_SIZE(mode_1),
- },
- {
- .b = mode_2,
- .size_b = ARRAY_SIZE(mode_2),
- },
- }
-
- struct lp55xx_platform_data lp5562_pdata = {
- ...
- .patterns = board_led_patterns,
- .num_patterns = ARRAY_SIZE(board_led_patterns),
- };
-
-Then, mode_1 and mode_2 can be run via through the sysfs.
-
- echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern
- echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on
-
-To stop running pattern,
- echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern
diff --git a/Documentation/leds/leds-mlxcpld.rst b/Documentation/leds/leds-mlxcpld.rst
new file mode 100644
index 000000000000..528582429e0b
--- /dev/null
+++ b/Documentation/leds/leds-mlxcpld.rst
@@ -0,0 +1,118 @@
+=======================================
+Kernel driver for Mellanox systems LEDs
+=======================================
+
+Provide system LED support for the nex Mellanox systems:
+"msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
+"msn2410", "msb7800", "msn2740", "msn2100".
+
+Description
+-----------
+Driver provides the following LEDs for the systems "msx6710", "msx6720",
+"msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740":
+
+ - mlxcpld:fan1:green
+ - mlxcpld:fan1:red
+ - mlxcpld:fan2:green
+ - mlxcpld:fan2:red
+ - mlxcpld:fan3:green
+ - mlxcpld:fan3:red
+ - mlxcpld:fan4:green
+ - mlxcpld:fan4:red
+ - mlxcpld:psu:green
+ - mlxcpld:psu:red
+ - mlxcpld:status:green
+ - mlxcpld:status:red
+
+ "status"
+ - CPLD reg offset: 0x20
+ - Bits [3:0]
+
+ "psu"
+ - CPLD reg offset: 0x20
+ - Bits [7:4]
+
+ "fan1"
+ - CPLD reg offset: 0x21
+ - Bits [3:0]
+
+ "fan2"
+ - CPLD reg offset: 0x21
+ - Bits [7:4]
+
+ "fan3"
+ - CPLD reg offset: 0x22
+ - Bits [3:0]
+
+ "fan4"
+ - CPLD reg offset: 0x22
+ - Bits [7:4]
+
+ Color mask for all the above LEDs:
+
+ [bit3,bit2,bit1,bit0] or
+ [bit7,bit6,bit5,bit4]:
+
+ - [0,0,0,0] = LED OFF
+ - [0,1,0,1] = Red static ON
+ - [1,1,0,1] = Green static ON
+ - [0,1,1,0] = Red blink 3Hz
+ - [1,1,1,0] = Green blink 3Hz
+ - [0,1,1,1] = Red blink 6Hz
+ - [1,1,1,1] = Green blink 6Hz
+
+Driver provides the following LEDs for the system "msn2100":
+
+ - mlxcpld:fan:green
+ - mlxcpld:fan:red
+ - mlxcpld:psu1:green
+ - mlxcpld:psu1:red
+ - mlxcpld:psu2:green
+ - mlxcpld:psu2:red
+ - mlxcpld:status:green
+ - mlxcpld:status:red
+ - mlxcpld:uid:blue
+
+ "status"
+ - CPLD reg offset: 0x20
+ - Bits [3:0]
+
+ "fan"
+ - CPLD reg offset: 0x21
+ - Bits [3:0]
+
+ "psu1"
+ - CPLD reg offset: 0x23
+ - Bits [3:0]
+
+ "psu2"
+ - CPLD reg offset: 0x23
+ - Bits [7:4]
+
+ "uid"
+ - CPLD reg offset: 0x24
+ - Bits [3:0]
+
+ Color mask for all the above LEDs, excepted uid:
+
+ [bit3,bit2,bit1,bit0] or
+ [bit7,bit6,bit5,bit4]:
+
+ - [0,0,0,0] = LED OFF
+ - [0,1,0,1] = Red static ON
+ - [1,1,0,1] = Green static ON
+ - [0,1,1,0] = Red blink 3Hz
+ - [1,1,1,0] = Green blink 3Hz
+ - [0,1,1,1] = Red blink 6Hz
+ - [1,1,1,1] = Green blink 6Hz
+
+ Color mask for uid LED:
+ [bit3,bit2,bit1,bit0]:
+
+ - [0,0,0,0] = LED OFF
+ - [1,1,0,1] = Blue static ON
+ - [1,1,1,0] = Blue blink 3Hz
+ - [1,1,1,1] = Blue blink 6Hz
+
+Driver supports HW blinking at 3Hz and 6Hz frequency (50% duty cycle).
+For 3Hz duty cylce is about 167 msec, for 6Hz is about 83 msec.
diff --git a/Documentation/leds/leds-mlxcpld.txt b/Documentation/leds/leds-mlxcpld.txt
deleted file mode 100644
index a0e8fd457117..000000000000
--- a/Documentation/leds/leds-mlxcpld.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-Kernel driver for Mellanox systems LEDs
-=======================================
-
-Provide system LED support for the nex Mellanox systems:
-"msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
-"msn2410", "msb7800", "msn2740", "msn2100".
-
-Description
------------
-Driver provides the following LEDs for the systems "msx6710", "msx6720",
-"msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740":
- mlxcpld:fan1:green
- mlxcpld:fan1:red
- mlxcpld:fan2:green
- mlxcpld:fan2:red
- mlxcpld:fan3:green
- mlxcpld:fan3:red
- mlxcpld:fan4:green
- mlxcpld:fan4:red
- mlxcpld:psu:green
- mlxcpld:psu:red
- mlxcpld:status:green
- mlxcpld:status:red
-
- "status"
- CPLD reg offset: 0x20
- Bits [3:0]
-
- "psu"
- CPLD reg offset: 0x20
- Bits [7:4]
-
- "fan1"
- CPLD reg offset: 0x21
- Bits [3:0]
-
- "fan2"
- CPLD reg offset: 0x21
- Bits [7:4]
-
- "fan3"
- CPLD reg offset: 0x22
- Bits [3:0]
-
- "fan4"
- CPLD reg offset: 0x22
- Bits [7:4]
-
- Color mask for all the above LEDs:
- [bit3,bit2,bit1,bit0] or
- [bit7,bit6,bit5,bit4]:
- [0,0,0,0] = LED OFF
- [0,1,0,1] = Red static ON
- [1,1,0,1] = Green static ON
- [0,1,1,0] = Red blink 3Hz
- [1,1,1,0] = Green blink 3Hz
- [0,1,1,1] = Red blink 6Hz
- [1,1,1,1] = Green blink 6Hz
-
-Driver provides the following LEDs for the system "msn2100":
- mlxcpld:fan:green
- mlxcpld:fan:red
- mlxcpld:psu1:green
- mlxcpld:psu1:red
- mlxcpld:psu2:green
- mlxcpld:psu2:red
- mlxcpld:status:green
- mlxcpld:status:red
- mlxcpld:uid:blue
-
- "status"
- CPLD reg offset: 0x20
- Bits [3:0]
-
- "fan"
- CPLD reg offset: 0x21
- Bits [3:0]
-
- "psu1"
- CPLD reg offset: 0x23
- Bits [3:0]
-
- "psu2"
- CPLD reg offset: 0x23
- Bits [7:4]
-
- "uid"
- CPLD reg offset: 0x24
- Bits [3:0]
-
- Color mask for all the above LEDs, excepted uid:
- [bit3,bit2,bit1,bit0] or
- [bit7,bit6,bit5,bit4]:
- [0,0,0,0] = LED OFF
- [0,1,0,1] = Red static ON
- [1,1,0,1] = Green static ON
- [0,1,1,0] = Red blink 3Hz
- [1,1,1,0] = Green blink 3Hz
- [0,1,1,1] = Red blink 6Hz
- [1,1,1,1] = Green blink 6Hz
-
- Color mask for uid LED:
- [bit3,bit2,bit1,bit0]:
- [0,0,0,0] = LED OFF
- [1,1,0,1] = Blue static ON
- [1,1,1,0] = Blue blink 3Hz
- [1,1,1,1] = Blue blink 6Hz
-
-Driver supports HW blinking at 3Hz and 6Hz frequency (50% duty cycle).
-For 3Hz duty cylce is about 167 msec, for 6Hz is about 83 msec.
diff --git a/Documentation/leds/ledtrig-oneshot.txt b/Documentation/leds/ledtrig-oneshot.rst
index fe57474a12e2..69fa3ea1d554 100644
--- a/Documentation/leds/ledtrig-oneshot.txt
+++ b/Documentation/leds/ledtrig-oneshot.rst
@@ -1,3 +1,4 @@
+====================
One-shot LED Trigger
====================
@@ -17,27 +18,27 @@ additional "invert" property specifies if the LED has to stay off (normal) or
on (inverted) when not rearmed.
The trigger can be activated from user space on led class devices as shown
-below:
+below::
echo oneshot > trigger
This adds sysfs attributes to the LED that are documented in:
Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
-Example use-case: network devices, initialization:
+Example use-case: network devices, initialization::
echo oneshot > trigger # set trigger for this led
echo 33 > delay_on # blink at 1 / (33 + 33) Hz on continuous traffic
echo 33 > delay_off
-interface goes up:
+interface goes up::
echo 1 > invert # set led as normally-on, turn the led on
-packet received/transmitted:
+packet received/transmitted::
echo 1 > shot # led starts blinking, ignored if already blinking
-interface goes down
+interface goes down::
echo 0 > invert # set led as normally-off, turn the led off
diff --git a/Documentation/leds/ledtrig-transient.txt b/Documentation/leds/ledtrig-transient.rst
index 3bd38b487df1..d921dc830cd0 100644
--- a/Documentation/leds/ledtrig-transient.txt
+++ b/Documentation/leds/ledtrig-transient.rst
@@ -1,3 +1,4 @@
+=====================
LED Transient Trigger
=====================
@@ -62,12 +63,13 @@ non-transient state. When driver gets suspended, irrespective of the transient
state, the LED state changes to LED_OFF.
Transient trigger can be enabled and disabled from user space on led class
-devices, that support this trigger as shown below:
+devices, that support this trigger as shown below::
-echo transient > trigger
-echo none > trigger
+ echo transient > trigger
+ echo none > trigger
-NOTE: Add a new property trigger state to control the state.
+NOTE:
+ Add a new property trigger state to control the state.
This trigger exports three properties, activate, state, and duration. When
transient trigger is activated these properties are set to default values.
@@ -79,7 +81,8 @@ transient trigger is activated these properties are set to default values.
- state allows user to specify a transient state to be held for the specified
duration.
- activate - one shot timer activate mechanism.
+ activate
+ - one shot timer activate mechanism.
1 when activated, 0 when deactivated.
default value is zero when transient trigger is enabled,
to allow duration to be set.
@@ -89,12 +92,14 @@ transient trigger is activated these properties are set to default values.
deactivated state indicates that there is no active timer
running.
- duration - one shot timer value. When activate is set, duration value
+ duration
+ - one shot timer value. When activate is set, duration value
is used to start a timer that runs once. This value doesn't
get changed by the trigger unless user does a set via
echo new_value > duration
- state - transient state to be held. It has two values 0 or 1. 0 maps
+ state
+ - transient state to be held. It has two values 0 or 1. 0 maps
to LED_OFF and 1 maps to LED_FULL. The specified state is
held for the duration of the one shot timer and then the
state gets changed to the non-transient state which is the
@@ -114,39 +119,49 @@ When timer expires activate goes back to deactivated state, duration is left
at the set value to be used when activate is set at a future time. This will
allow user app to set the time once and activate it to run it once for the
specified value as needed. When timer expires, state is restored to the
-non-transient state which is the inverse of the transient state.
-
- echo 1 > activate - starts timer = duration when duration is not 0.
- echo 0 > activate - cancels currently running timer.
- echo n > duration - stores timer value to be used upon next
- activate. Currently active timer if
- any, continues to run for the specified time.
- echo 0 > duration - stores timer value to be used upon next
- activate. Currently active timer if any,
- continues to run for the specified time.
- echo 1 > state - stores desired transient state LED_FULL to be
+non-transient state which is the inverse of the transient state:
+
+ ================= ===============================================
+ echo 1 > activate starts timer = duration when duration is not 0.
+ echo 0 > activate cancels currently running timer.
+ echo n > duration stores timer value to be used upon next
+ activate. Currently active timer if
+ any, continues to run for the specified time.
+ echo 0 > duration stores timer value to be used upon next
+ activate. Currently active timer if any,
+ continues to run for the specified time.
+ echo 1 > state stores desired transient state LED_FULL to be
held for the specified duration.
- echo 0 > state - stores desired transient state LED_OFF to be
+ echo 0 > state stores desired transient state LED_OFF to be
held for the specified duration.
+ ================= ===============================================
+
+What is not supported
+=====================
-What is not supported:
-======================
- Timer activation is one shot and extending and/or shortening the timer
is not supported.
-Example use-case 1:
+Examples
+========
+
+use-case 1::
+
echo transient > trigger
echo n > duration
echo 1 > state
-repeat the following step as needed:
+
+repeat the following step as needed::
+
echo 1 > activate - start timer = duration to run once
echo 1 > activate - start timer = duration to run once
echo none > trigger
This trigger is intended to be used for for the following example use cases:
+
- Control of vibrate (phones, tablets etc.) hardware by user space app.
- Use of LED by user space app as activity indicator.
- Use of LED by user space app as a kind of watchdog indicator -- as
- long as the app is alive, it can keep the LED illuminated, if it dies
- the LED will be extinguished automatically.
+ long as the app is alive, it can keep the LED illuminated, if it dies
+ the LED will be extinguished automatically.
- Use by any user space app that needs a transient GPIO output.
diff --git a/Documentation/leds/ledtrig-usbport.txt b/Documentation/leds/ledtrig-usbport.rst
index 69f54bfb4789..37c2505bfd57 100644
--- a/Documentation/leds/ledtrig-usbport.txt
+++ b/Documentation/leds/ledtrig-usbport.rst
@@ -1,3 +1,4 @@
+====================
USB port LED trigger
====================
@@ -10,14 +11,18 @@ listed as separated entries in a "ports" subdirectory. Selecting is handled by
echoing "1" to a chosen port.
Please note that this trigger allows selecting multiple USB ports for a single
-LED. This can be useful in two cases:
+LED.
+
+This can be useful in two cases:
1) Device with single USB LED and few physical ports
+====================================================
In such a case LED will be turned on as long as there is at least one connected
USB device.
2) Device with a physical port handled by few controllers
+=========================================================
Some devices may have one controller per PHY standard. E.g. USB 3.0 physical
port may be handled by ohci-platform, ehci-platform and xhci-hcd. If there is
@@ -25,14 +30,14 @@ only one LED user will most likely want to assign ports from all 3 hubs.
This trigger can be activated from user space on led class devices as shown
-below:
+below::
echo usbport > trigger
This adds sysfs attributes to the LED that are documented in:
Documentation/ABI/testing/sysfs-class-led-trigger-usbport
-Example use-case:
+Example use-case::
echo usbport > trigger
echo 1 > ports/usb1-port1
diff --git a/Documentation/leds/uleds.txt b/Documentation/leds/uleds.rst
index 13e375a580f9..83221098009c 100644
--- a/Documentation/leds/uleds.txt
+++ b/Documentation/leds/uleds.rst
@@ -1,3 +1,4 @@
+==============
Userspace LEDs
==============
@@ -10,12 +11,12 @@ Usage
When the driver is loaded, a character device is created at /dev/uleds. To
create a new LED class device, open /dev/uleds and write a uleds_user_dev
-structure to it (found in kernel public header file linux/uleds.h).
+structure to it (found in kernel public header file linux/uleds.h)::
#define LED_MAX_NAME_SIZE 64
struct uleds_user_dev {
- char name[LED_MAX_NAME_SIZE];
+ char name[LED_MAX_NAME_SIZE];
};
A new LED class device will be created with the name given. The name can be
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 39fae143c9cb..f189d130e543 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -15,34 +15,48 @@ tens of thousands of) instantiations. For example a lock in the inode
struct is one class, while each inode has its own instantiation of that
lock class.
-The validator tracks the 'state' of lock-classes, and it tracks
-dependencies between different lock-classes. The validator maintains a
-rolling proof that the state and the dependencies are correct.
-
-Unlike an lock instantiation, the lock-class itself never goes away: when
-a lock-class is used for the first time after bootup it gets registered,
-and all subsequent uses of that lock-class will be attached to this
-lock-class.
+The validator tracks the 'usage state' of lock-classes, and it tracks
+the dependencies between different lock-classes. Lock usage indicates
+how a lock is used with regard to its IRQ contexts, while lock
+dependency can be understood as lock order, where L1 -> L2 suggests that
+a task is attempting to acquire L2 while holding L1. From lockdep's
+perspective, the two locks (L1 and L2) are not necessarily related; that
+dependency just means the order ever happened. The validator maintains a
+continuing effort to prove lock usages and dependencies are correct or
+the validator will shoot a splat if incorrect.
+
+A lock-class's behavior is constructed by its instances collectively:
+when the first instance of a lock-class is used after bootup the class
+gets registered, then all (subsequent) instances will be mapped to the
+class and hence their usages and dependecies will contribute to those of
+the class. A lock-class does not go away when a lock instance does, but
+it can be removed if the memory space of the lock class (static or
+dynamic) is reclaimed, this happens for example when a module is
+unloaded or a workqueue is destroyed.
State
-----
-The validator tracks lock-class usage history into 4 * nSTATEs + 1 separate
-state bits:
+The validator tracks lock-class usage history and divides the usage into
+(4 usages * n STATEs + 1) categories:
+where the 4 usages can be:
- 'ever held in STATE context'
- 'ever held as readlock in STATE context'
- 'ever held with STATE enabled'
- 'ever held as readlock with STATE enabled'
-Where STATE can be either one of (kernel/locking/lockdep_states.h)
- - hardirq
- - softirq
+where the n STATEs are coded in kernel/locking/lockdep_states.h and as of
+now they include:
+- hardirq
+- softirq
+where the last 1 category is:
- 'ever used' [ == !unused ]
-When locking rules are violated, these state bits are presented in the
-locking error messages, inside curlies. A contrived example:
+When locking rules are violated, these usage bits are presented in the
+locking error messages, inside curlies, with a total of 2 * n STATEs bits.
+A contrived example:
modprobe/2287 is trying to acquire lock:
(&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
@@ -51,28 +65,67 @@ locking error messages, inside curlies. A contrived example:
(&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
-The bit position indicates STATE, STATE-read, for each of the states listed
-above, and the character displayed in each indicates:
+For a given lock, the bit positions from left to right indicate the usage
+of the lock and readlock (if exists), for each of the n STATEs listed
+above respectively, and the character displayed at each bit position
+indicates:
'.' acquired while irqs disabled and not in irq context
'-' acquired in irq context
'+' acquired with irqs enabled
'?' acquired in irq context with irqs enabled.
-Unused mutexes cannot be part of the cause of an error.
+The bits are illustrated with an example:
+
+ (&sio_locks[i].lock){-.-.}, at: [<c02867fd>] mutex_lock+0x21/0x24
+ ||||
+ ||| \-> softirq disabled and not in softirq context
+ || \--> acquired in softirq context
+ | \---> hardirq disabled and not in hardirq context
+ \----> acquired in hardirq context
+
+
+For a given STATE, whether the lock is ever acquired in that STATE
+context and whether that STATE is enabled yields four possible cases as
+shown in the table below. The bit character is able to indicate which
+exact case is for the lock as of the reporting time.
+
+ -------------------------------------------
+ | | irq enabled | irq disabled |
+ |-------------------------------------------|
+ | ever in irq | ? | - |
+ |-------------------------------------------|
+ | never in irq | + | . |
+ -------------------------------------------
+
+The character '-' suggests irq is disabled because if otherwise the
+charactor '?' would have been shown instead. Similar deduction can be
+applied for '+' too.
+
+Unused locks (e.g., mutexes) cannot be part of the cause of an error.
Single-lock state rules:
------------------------
+A lock is irq-safe means it was ever used in an irq context, while a lock
+is irq-unsafe means it was ever acquired with irq enabled.
+
A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states are exclusive, and only one of them is allowed to be
-set for any lock-class:
+following states must be exclusive: only one of them is allowed to be set
+for any lock-class based on its usage:
+
+ <hardirq-safe> or <hardirq-unsafe>
+ <softirq-safe> or <softirq-unsafe>
- <hardirq-safe> and <hardirq-unsafe>
- <softirq-safe> and <softirq-unsafe>
+This is because if a lock can be used in irq context (irq-safe) then it
+cannot be ever acquired with irq enabled (irq-unsafe). Otherwise, a
+deadlock may happen. For example, in the scenario that after this lock
+was acquired but before released, if the context is interrupted this
+lock will be attempted to acquire twice, which creates a deadlock,
+referred to as lock recursion deadlock.
-The validator detects and reports lock usage that violate these
+The validator detects and reports lock usage that violates these
single-lock state rules.
Multi-lock dependency rules:
@@ -81,15 +134,18 @@ Multi-lock dependency rules:
The same lock-class must not be acquired twice, because this could lead
to lock recursion deadlocks.
-Furthermore, two locks may not be taken in different order:
+Furthermore, two locks can not be taken in inverse order:
<L1> -> <L2>
<L2> -> <L1>
-because this could lead to lock inversion deadlocks. (The validator
-finds such dependencies in arbitrary complexity, i.e. there can be any
-other locking sequence between the acquire-lock operations, the
-validator will still track all dependencies between locks.)
+because this could lead to a deadlock - referred to as lock inversion
+deadlock - as attempts to acquire the two locks form a circle which
+could lead to the two contexts waiting for each other permanently. The
+validator will find such dependency circle in arbitrary complexity,
+i.e., there can be any other locking sequence between the acquire-lock
+operations; the validator will still find whether these locks can be
+acquired in a circular fashion.
Furthermore, the following usage based lock dependencies are not allowed
between any two lock-classes:
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index f70ebcdfe592..e4e07c8ab89e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,7 +3,7 @@
============================
By: David Howells <dhowells@redhat.com>
- Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 18735dc460a0..0a18075c485e 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with isdn4k-utils.
====================== =============== ========================================
GNU C 4.6 gcc --version
GNU make 3.81 make --version
-binutils 2.20 ld -v
+binutils 2.21 ld -v
flex 2.5.35 flex --version
bison 2.0 bison --version
util-linux 2.10o fdformat --version
@@ -77,9 +77,7 @@ You will need GNU make 3.81 or later to build the kernel.
Binutils
--------
-The build system has, as of 4.13, switched to using thin archives (`ar T`)
-rather than incremental linking (`ld -r`) for built-in.a intermediate steps.
-This requires binutils 2.20 or newer.
+Binutils 2.21 or newer is needed to build the kernel.
pkg-config
----------
diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 8fbf0aa3ba2d..ab62f1bb0366 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt
@@ -65,6 +65,10 @@ period). struct pwm_args contains 2 fields (period and polarity) and should
be used to set the initial PWM config (usually done in the probe function
of the PWM user). PWM arguments are retrieved with pwm_get_args().
+All consumers should really be reconfiguring the PWM upon resume as
+appropriate. This is the only way to ensure that everything is resumed in
+the proper order.
+
Using PWMs with the sysfs interface
-----------------------------------
@@ -141,6 +145,9 @@ The implementation of ->get_state() (a method used to retrieve initial PWM
state) is also encouraged for the same reason: letting the PWM user know
about the current PWM state would allow him to avoid glitches.
+Drivers should not implement any power management. In other words,
+consumers should implement it as described in the "Using PWMs" section.
+
Locking
-------
diff --git a/Documentation/s390/3270.txt b/Documentation/s390/3270.rst
index 7c715de99774..e09e77954238 100644
--- a/Documentation/s390/3270.txt
+++ b/Documentation/s390/3270.rst
@@ -1,13 +1,17 @@
+===============================
IBM 3270 Display System support
+===============================
This file describes the driver that supports local channel attachment
of IBM 3270 devices. It consists of three sections:
+
* Introduction
* Installation
* Operation
-INTRODUCTION.
+Introduction
+============
This paper describes installing and operating 3270 devices under
Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
@@ -17,12 +21,12 @@ twenty and thirty years ago.
You may have 3270s in-house and not know it. If you're using the
VM-ESA operating system, define a 3270 to your virtual machine by using
the command "DEF GRAF <hex-address>" This paper presumes you will be
-defining four 3270s with the CP/CMS commands
+defining four 3270s with the CP/CMS commands:
- DEF GRAF 620
- DEF GRAF 621
- DEF GRAF 622
- DEF GRAF 623
+ - DEF GRAF 620
+ - DEF GRAF 621
+ - DEF GRAF 622
+ - DEF GRAF 623
Your network connection from VM-ESA allows you to use x3270, tn3270, or
another 3270 emulator, started from an xterm window on your PC or
@@ -34,7 +38,8 @@ This paper covers installation of the driver and operation of a
dialed-in x3270.
-INSTALLATION.
+Installation
+============
You install the driver by installing a patch, doing a kernel build, and
running the configuration script (config3270.sh, in this directory).
@@ -59,13 +64,15 @@ Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only
at boot time to a 3270 if it is a 3215.
In brief, these are the steps:
+
1. Install the tub3270 patch
- 2. (If a module) add a line to a file in /etc/modprobe.d/*.conf
+ 2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
3. (If VM) define devices with DEF GRAF
4. Reboot
5. Configure
To test that everything works, assuming VM and x3270,
+
1. Bring up an x3270 window.
2. Use the DIAL command in that window.
3. You should immediately see a Linux login screen.
@@ -74,7 +81,8 @@ Here are the installation steps in detail:
1. The 3270 driver is a part of the official Linux kernel
source. Build a tree with the kernel source and any necessary
- patches. Then do
+ patches. Then do::
+
make oldconfig
(If you wish to disable 3215 console support, edit
.config; change CONFIG_TN3215's value to "n";
@@ -84,20 +92,22 @@ Here are the installation steps in detail:
make modules_install
2. (Perform this step only if you have configured tub3270 as a
- module.) Add a line to a file /etc/modprobe.d/*.conf to automatically
+ module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically
load the driver when it's needed. With this line added, you will see
login prompts appear on your 3270s as soon as boot is complete (or
with emulated 3270s, as soon as you dial into your vm guest using the
command "DIAL <vmguestname>"). Since the line-mode major number is
- 227, the line to add should be:
+ 227, the line to add should be::
+
alias char-major-227 tub3270
3. Define graphic devices to your vm guest machine, if you
haven't already. Define them before you reboot (reipl):
- DEFINE GRAF 620
- DEFINE GRAF 621
- DEFINE GRAF 622
- DEFINE GRAF 623
+
+ - DEFINE GRAF 620
+ - DEFINE GRAF 621
+ - DEFINE GRAF 622
+ - DEFINE GRAF 623
4. Reboot. The reboot process scans hardware devices, including
3270s, and this enables the tub3270 driver once loaded to respond
@@ -107,21 +117,23 @@ Here are the installation steps in detail:
5. Run the 3270 configuration script config3270. It is
distributed in this same directory, Documentation/s390, as
- config3270.sh. Inspect the output script it produces,
+ config3270.sh. Inspect the output script it produces,
/tmp/mkdev3270, and then run that script. This will create the
necessary character special device files and make the necessary
changes to /etc/inittab.
Then notify /sbin/init that /etc/inittab has changed, by issuing
- the telinit command with the q operand:
+ the telinit command with the q operand::
+
cd Documentation/s390
sh config3270.sh
sh /tmp/mkdev3270
telinit q
- This should be sufficient for your first time. If your 3270
+ This should be sufficient for your first time. If your 3270
configuration has changed and you're reusing config3270, you
- should follow these steps:
+ should follow these steps::
+
Change 3270 configuration
Reboot
Run config3270 and /tmp/mkdev3270
@@ -132,8 +144,10 @@ Here are the testing steps in detail:
1. Bring up an x3270 window, or use an actual hardware 3278 or
3279, or use the 3270 emulator of your choice. You would be
running the emulator on your PC or workstation. You would use
- the command, for example,
+ the command, for example::
+
x3270 vm-esa-domain-name &
+
if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
default model number. The driver does not take advantage of
extended attributes.
@@ -144,7 +158,8 @@ Here are the testing steps in detail:
2. Use the DIAL command instead of the LOGIN command to connect
to one of the virtual 3270s you defined with the DEF GRAF
- commands:
+ commands::
+
dial my-vm-guest-name
3. You should immediately see a login prompt from your
@@ -171,14 +186,17 @@ Here are the testing steps in detail:
Wrong major number? Wrong minor number? There's your
problem!
- D. Do you get the message
+ D. Do you get the message::
+
"HCPDIA047E my-vm-guest-name 0620 does not exist"?
+
If so, you must issue the command "DEF GRAF 620" from your VM
3215 console and then reboot the system.
OPERATION.
+==========
The driver defines three areas on the 3270 screen: the log area, the
input area, and the status area.
@@ -203,8 +221,10 @@ which indicates no scrolling will occur. (If you hit ENTER with "Linux
Running" and nothing typed, the application receives a newline.)
You may change the scrolling timeout value. For example, the following
-command line:
+command line::
+
echo scrolltime=60 > /proc/tty/driver/tty3270
+
changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
you wish to prevent scrolling entirely.
@@ -228,7 +248,8 @@ cause an EOF also by typing "^D" and hitting ENTER.
No PF key is preassigned to cause a job suspension, but you may cause a
job suspension by typing "^Z" and hitting ENTER. You may wish to
assign this function to a PF key. To make PF7 cause job suspension,
-execute the command:
+execute the command::
+
echo pf7=^z > /proc/tty/driver/tty3270
If the input you type does not end with the two characters "^n", the
@@ -243,8 +264,10 @@ command is entered into the stack only when the input area is not made
invisible (such as for password entry) and it is not identical to the
current top entry. PF10 rotates backward through the command stack;
PF11 rotates forward. You may assign the backward function to any PF
-key (or PA key, for that matter), say, PA3, with the command:
+key (or PA key, for that matter), say, PA3, with the command::
+
echo -e pa3=\\033k > /proc/tty/driver/tty3270
+
This assigns the string ESC-k to PA3. Similarly, the string ESC-j
performs the forward function. (Rationale: In bash with vi-mode line
editing, ESC-k and ESC-j retrieve backward and forward history.
@@ -252,15 +275,19 @@ Suggestions welcome.)
Is a stack size of twenty commands not to your liking? Change it on
the fly. To change to saving the last 100 commands, execute the
-command:
+command::
+
echo recallsize=100 > /proc/tty/driver/tty3270
Have a command you issue frequently? Assign it to a PF or PA key! Use
-the command
- echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+the command::
+
+ echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+
to execute the commands mkdir foobar and cd foobar immediately when you
hit PF24. Want to see the command line first, before you execute it?
-Use the -n option of the echo command:
+Use the -n option of the echo command::
+
echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
deleted file mode 100644
index 5ae7f868a007..000000000000
--- a/Documentation/s390/Debugging390.txt
+++ /dev/null
@@ -1,2142 +0,0 @@
-
- Debugging on Linux for s/390 & z/Architecture
- by
- Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
- Best viewed with fixed width fonts
-
-Overview of Document:
-=====================
-This document is intended to give a good overview of how to debug Linux for
-s/390 and z/Architecture. It is not intended as a complete reference and not a
-tutorial on the fundamentals of C & assembly. It doesn't go into
-390 IO in any detail. It is intended to complement the documents in the
-reference section below & any other worthwhile references you get.
-
-It is intended like the Enterprise Systems Architecture/390 Reference Summary
-to be printed out & used as a quick cheat sheet self help style reference when
-problems occur.
-
-Contents
-========
-Register Set
-Address Spaces on Intel Linux
-Address Spaces on Linux for s/390 & z/Architecture
-The Linux for s/390 & z/Architecture Kernel Task Structure
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-A sample program with comments
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-Debugging under VM
-s/390 & z/Architecture IO Overview
-Debugging IO on s/390 & z/Architecture under VM
-GDB on s/390 & z/Architecture
-Stack chaining in gdb by hand
-Examining core dumps
-ldd
-Debugging modules
-The proc file system
-SysRq
-References
-Special Thanks
-
-Register Set
-============
-The current architectures have the following registers.
-
-16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
-r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
-
-16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
-kernel usage only, used for memory management, interrupt control, debugging
-control etc.
-
-16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
-normally not used by normal programs but potentially could be used as
-temporary storage. These registers have a 1:1 association with general
-purpose registers and are designed to be used in the so-called access
-register mode to select different address spaces.
-Access register 0 (and access register 1 on z/Architecture, which needs a
-64 bit pointer) is currently used by the pthread library as a pointer to
-the current running threads private area.
-
-16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating
-point format compliant on G5 upwards & a Floating point control reg (FPC)
-4 64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
-Note:
-Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines,
-( provided the kernel is configured for this ).
-
-
-The PSW is the most important register on the machine it
-is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
-a program counter (pc), condition code register,memory space designator.
-In IBM standard notation I am counting bit 0 as the MSB.
-It has several advantages over a normal program counter
-in that you can change address translation & program counter
-in a single instruction. To change address translation,
-e.g. switching address translation off requires that you
-have a logical=physical mapping for the address you are
-currently running at.
-
- Bit Value
-s/390 z/Architecture
-0 0 Reserved ( must be 0 ) otherwise specification exception occurs.
-
-1 1 Program Event Recording 1 PER enabled,
- PER is used to facilitate debugging e.g. single stepping.
-
-2-4 2-4 Reserved ( must be 0 ).
-
-5 5 Dynamic address translation 1=DAT on.
-
-6 6 Input/Output interrupt Mask
-
-7 7 External interrupt Mask used primarily for interprocessor
- signalling and clock interrupts.
-
-8-11 8-11 PSW Key used for complex memory protection mechanism
- (not used under linux)
-
-12 12 1 on s/390 0 on z/Architecture
-
-13 13 Machine Check Mask 1=enable machine check interrupts
-
-14 14 Wait State. Set this to 1 to stop the processor except for
- interrupts and give time to other LPARS. Used in CPU idle in
- the kernel to increase overall usage of processor resources.
-
-15 15 Problem state ( if set to 1 certain instructions are disabled )
- all linux user programs run with this bit 1
- ( useful info for debugging under VM ).
-
-16-17 16-17 Address Space Control
-
- 00 Primary Space Mode:
- The register CR1 contains the primary address-space control ele-
- ment (PASCE), which points to the primary space region/segment
- table origin.
-
- 01 Access register mode
-
- 10 Secondary Space Mode:
- The register CR7 contains the secondary address-space control
- element (SASCE), which points to the secondary space region or
- segment table origin.
-
- 11 Home Space Mode:
- The register CR13 contains the home space address-space control
- element (HASCE), which points to the home space region/segment
- table origin.
-
- See "Address Spaces on Linux for s/390 & z/Architecture" below
- for more information about address space usage in Linux.
-
-18-19 18-19 Condition codes (CC)
-
-20 20 Fixed point overflow mask if 1=FPU exceptions for this event
- occur ( normally 0 )
-
-21 21 Decimal overflow mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-22 22 Exponent underflow mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-23 23 Significance Mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-24-31 24-30 Reserved Must be 0.
-
- 31 Extended Addressing Mode
- 32 Basic Addressing Mode
- Used to set addressing mode
- PSW 31 PSW 32
- 0 0 24 bit
- 0 1 31 bit
- 1 1 64 bit
-
-32 1=31 bit addressing mode 0=24 bit addressing mode (for backward
- compatibility), linux always runs with this bit set to 1
-
-33-64 Instruction address.
- 33-63 Reserved must be 0
- 64-127 Address
- In 24 bits mode bits 64-103=0 bits 104-127 Address
- In 31 bits mode bits 64-96=0 bits 97-127 Address
- Note: unlike 31 bit mode on s/390 bit 96 must be zero
- when loading the address with LPSWE otherwise a
- specification exception occurs, LPSW is fully backward
- compatible.
-
-
-Prefix Page(s)
---------------
-This per cpu memory area is too intimately tied to the processor not to mention.
-It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
-z/Architecture and is exchanged with one page on s/390 or two pages on
-z/Architecture in absolute storage by the set prefix instruction during Linux
-startup.
-This page is mapped to a different prefix for each processor in an SMP
-configuration (assuming the OS designer is sane of course).
-Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
-z/Architecture are used by the processor itself for holding such information
-as exception indications and entry points for exceptions.
-Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
-z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
-0x1000, too, which is used by Linux).
-The closest thing to this on traditional architectures is the interrupt
-vector table. This is a good thing & does simplify some of the kernel coding
-however it means that we now cannot catch stray NULL pointers in the
-kernel without hard coded checks.
-
-
-
-Address Spaces on Intel Linux
-=============================
-
-The traditional Intel Linux is approximately mapped as follows forgive
-the ascii art.
-0xFFFFFFFF 4GB Himem *****************
- * *
- * Kernel Space *
- * *
- ***************** ****************
-User Space Himem * User Stack * * *
-(typically 0xC0000000 3GB ) ***************** * *
- * Shared Libs * * Next Process *
- ***************** * to *
- * * <== * Run * <==
- * User Program * * *
- * Data BSS * * *
- * Text * * *
- * Sections * * *
-0x00000000 ***************** ****************
-
-Now it is easy to see that on Intel it is quite easy to recognise a kernel
-address as being one greater than user space himem (in this case 0xC0000000),
-and addresses of less than this are the ones in the current running program on
-this processor (if an smp box).
-If using the virtual machine ( VM ) as a debugger it is quite difficult to
-know which user process is running as the address space you are looking at
-could be from any process in the run queue.
-
-The limitation of Intels addressing technique is that the linux
-kernel uses a very simple real address to virtual addressing technique
-of Real Address=Virtual Address-User Space Himem.
-This means that on Intel the kernel linux can typically only address
-Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
-can typically use.
-They can lower User Himem to 2GB or lower & thus be
-able to use 2GB of RAM however this shrinks the maximum size
-of User Space from 3GB to 2GB they have a no win limit of 4GB unless
-they go to 64 Bit.
-
-
-On 390 our limitations & strengths make us slightly different.
-For backward compatibility we are only allowed use 31 bits (2GB)
-of our 32 bit addresses, however, we use entirely separate address
-spaces for the user & kernel.
-
-This means we can support 2GB of non Extended RAM on s/390, & more
-with the Extended memory management swap device &
-currently 4TB of physical memory currently on z/Architecture.
-
-
-Address Spaces on Linux for s/390 & z/Architecture
-==================================================
-
-Our addressing scheme is basically as follows:
-
- Primary Space Home Space
-Himem 0x7fffffff 2GB on s/390 ***************** ****************
-currently 0x3ffffffffff (2^42)-1 * User Stack * * *
-on z/Architecture. ***************** * *
- * Shared Libs * * *
- ***************** * *
- * * * Kernel *
- * User Program * * *
- * Data BSS * * *
- * Text * * *
- * Sections * * *
-0x00000000 ***************** ****************
-
-This also means that we need to look at the PSW problem state bit and the
-addressing mode to decide whether we are looking at user or kernel space.
-
-User space runs in primary address mode (or access register mode within
-the vdso code).
-
-The kernel usually also runs in home space mode, however when accessing
-user space the kernel switches to primary or secondary address mode if
-the mvcos instruction is not available or if a compare-and-swap (futex)
-instruction on a user space address is performed.
-
-When also looking at the ASCE control registers, this means:
-
-User space:
-- runs in primary or access register mode
-- cr1 contains the user asce
-- cr7 contains the user asce
-- cr13 contains the kernel asce
-
-Kernel space:
-- runs in home space mode
-- cr1 contains the user or kernel asce
- -> the kernel asce is loaded when a uaccess requires primary or
- secondary address mode
-- cr7 contains the user or kernel asce, (changed with set_fs())
-- cr13 contains the kernel asce
-
-In case of uaccess the kernel changes to:
-- primary space mode in case of a uaccess (copy_to_user) and uses
- e.g. the mvcp instruction to access user space. However the kernel
- will stay in home space mode if the mvcos instruction is available
-- secondary space mode in case of futex atomic operations, so that the
- instructions come from primary address space and data from secondary
- space
-
-In case of KVM, the kernel runs in home space mode, but cr1 gets switched
-to contain the gmap asce before the SIE instruction gets executed. When
-the SIE instruction is finished, cr1 will be switched back to contain the
-user asce.
-
-
-Virtual Addresses on s/390 & z/Architecture
-===========================================
-
-A virtual address on s/390 is made up of 3 parts
-The SX (segment index, roughly corresponding to the PGD & PMD in Linux
-terminology) being bits 1-11.
-The PX (page index, corresponding to the page table entry (pte) in Linux
-terminology) being bits 12-19.
-The remaining bits BX (the byte index are the offset in the page )
-i.e. bits 20 to 31.
-
-On z/Architecture in linux we currently make up an address from 4 parts.
-The region index bits (RX) 0-32 we currently use bits 22-32
-The segment index (SX) being bits 33-43
-The page index (PX) being bits 44-51
-The byte index (BX) being bits 52-63
-
-Notes:
-1) s/390 has no PMD so the PMD is really the PGD also.
-A lot of this stuff is defined in pgtable.h.
-
-2) Also seeing as s/390's page indexes are only 1k in size
-(bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
-to make the best use of memory by updating 4 segment indices
-entries each time we mess with a PMD & use offsets
-0,1024,2048 & 3072 in this page as for our segment indexes.
-On z/Architecture our page indexes are now 2k in size
-( bits 12-19 x 8 bytes per pte ) we do a similar trick
-but only mess with 2 segment indices each time we mess with
-a PMD.
-
-3) As z/Architecture supports up to a massive 5-level page table lookup we
-can only use 3 currently on Linux ( as this is all the generic kernel
-currently supports ) however this may change in future
-this allows us to access ( according to my sums )
-4TB of virtual storage per process i.e.
-4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
-enough for another 2 or 3 of years I think :-).
-to do this we use a region-third-table designation type in
-our address space control registers.
-
-
-The Linux for s/390 & z/Architecture Kernel Task Structure
-==========================================================
-Each process/thread under Linux for S390 has its own kernel task_struct
-defined in linux/include/linux/sched.h
-The S390 on initialisation & resuming of a process on a cpu sets
-the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
-(which we use for per-processor globals).
-
-The kernel stack pointer is intimately tied with the task structure for
-each processor as follows.
-
- s/390
- ************************
- * 1 page kernel stack *
- * ( 4K ) *
- ************************
- * 1 page task_struct *
- * ( 4K ) *
-8K aligned ************************
-
- z/Architecture
- ************************
- * 2 page kernel stack *
- * ( 8K ) *
- ************************
- * 2 page task_struct *
- * ( 8K ) *
-16K aligned ************************
-
-What this means is that we don't need to dedicate any register or global
-variable to point to the current running process & can retrieve it with the
-following very simple construct for s/390 & one very similar for z/Architecture.
-
-static inline struct task_struct * get_current(void)
-{
- struct task_struct *current;
- __asm__("lhi %0,-8192\n\t"
- "nr %0,15"
- : "=r" (current) );
- return current;
-}
-
-i.e. just anding the current kernel stack pointer with the mask -8192.
-Thankfully because Linux doesn't have support for nested IO interrupts
-& our devices have large buffers can survive interrupts being shut for
-short amounts of time we don't need a separate stack for interrupts.
-
-
-
-
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-=================================================================
-Overview:
----------
-This is the code that gcc produces at the top & the bottom of
-each function. It usually is fairly consistent & similar from
-function to function & if you know its layout you can probably
-make some headway in finding the ultimate cause of a problem
-after a crash without a source level debugger.
-
-Note: To follow stackframes requires a knowledge of C or Pascal &
-limited knowledge of one assembly language.
-
-It should be noted that there are some differences between the
-s/390 and z/Architecture stack layouts as the z/Architecture stack layout
-didn't have to maintain compatibility with older linkage formats.
-
-Glossary:
----------
-alloca:
-This is a built in compiler function for runtime allocation
-of extra space on the callers stack which is obviously freed
-up on function exit ( e.g. the caller may choose to allocate nothing
-of a buffer of 4k if required for temporary purposes ), it generates
-very efficient code ( a few cycles ) when compared to alternatives
-like malloc.
-
-automatics: These are local variables on the stack,
-i.e they aren't in registers & they aren't static.
-
-back-chain:
-This is a pointer to the stack pointer before entering a
-framed functions ( see frameless function ) prologue got by
-dereferencing the address of the current stack pointer,
- i.e. got by accessing the 32 bit value at the stack pointers
-current location.
-
-base-pointer:
-This is a pointer to the back of the literal pool which
-is an area just behind each procedure used to store constants
-in each function.
-
-call-clobbered: The caller probably needs to save these registers if there
-is something of value in them, on the stack or elsewhere before making a
-call to another procedure so that it can restore it later.
-
-epilogue:
-The code generated by the compiler to return to the caller.
-
-frameless-function
-A frameless function in Linux for s390 & z/Architecture is one which doesn't
-need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
-given to it by the caller.
-A frameless function never:
-1) Sets up a back chain.
-2) Calls alloca.
-3) Calls other normal functions
-4) Has automatics.
-
-GOT-pointer:
-This is a pointer to the global-offset-table in ELF
-( Executable Linkable Format, Linux'es most common executable format ),
-all globals & shared library objects are found using this pointer.
-
-lazy-binding
-ELF shared libraries are typically only loaded when routines in the shared
-library are actually first called at runtime. This is lazy binding.
-
-procedure-linkage-table
-This is a table found from the GOT which contains pointers to routines
-in other shared libraries which can't be called to by easier means.
-
-prologue:
-The code generated by the compiler to set up the stack frame.
-
-outgoing-args:
-This is extra area allocated on the stack of the calling function if the
-parameters for the callee's cannot all be put in registers, the same
-area can be reused by each function the caller calls.
-
-routine-descriptor:
-A COFF executable format based concept of a procedure reference
-actually being 8 bytes or more as opposed to a simple pointer to the routine.
-This is typically defined as follows
-Routine Descriptor offset 0=Pointer to Function
-Routine Descriptor offset 4=Pointer to Table of Contents
-The table of contents/TOC is roughly equivalent to a GOT pointer.
-& it means that shared libraries etc. can be shared between several
-environments each with their own TOC.
-
-
-static-chain: This is used in nested functions a concept adopted from pascal
-by gcc not used in ansi C or C++ ( although quite useful ), basically it
-is a pointer used to reference local variables of enclosing functions.
-You might come across this stuff once or twice in your lifetime.
-
-e.g.
-The function below should return 11 though gcc may get upset & toss warnings
-about unused variables.
-int FunctionA(int a)
-{
- int b;
- FunctionC(int c)
- {
- b=c+1;
- }
- FunctionC(10);
- return(b);
-}
-
-
-s/390 & z/Architecture Register usage
-=====================================
-r0 used by syscalls/assembly call-clobbered
-r1 used by syscalls/assembly call-clobbered
-r2 argument 0 / return value 0 call-clobbered
-r3 argument 1 / return value 1 (if long long) call-clobbered
-r4 argument 2 call-clobbered
-r5 argument 3 call-clobbered
-r6 argument 4 saved
-r7 pointer-to arguments 5 to ... saved
-r8 this & that saved
-r9 this & that saved
-r10 static-chain ( if nested function ) saved
-r11 frame-pointer ( if function used alloca ) saved
-r12 got-pointer saved
-r13 base-pointer saved
-r14 return-address saved
-r15 stack-pointer saved
-
-f0 argument 0 / return value ( float/double ) call-clobbered
-f2 argument 1 call-clobbered
-f4 z/Architecture argument 2 saved
-f6 z/Architecture argument 3 saved
-The remaining floating points
-f1,f3,f5 f7-f15 are call-clobbered.
-
-Notes:
-------
-1) The only requirement is that registers which are used
-by the callee are saved, e.g. the compiler is perfectly
-capable of using r11 for purposes other than a frame a
-frame pointer if a frame pointer is not needed.
-2) In functions with variable arguments e.g. printf the calling procedure
-is identical to one without variable arguments & the same number of
-parameters. However, the prologue of this function is somewhat more
-hairy owing to it having to move these parameters to the stack to
-get va_start, va_arg & va_end to work.
-3) Access registers are currently unused by gcc but are used in
-the kernel. Possibilities exist to use them at the moment for
-temporary storage but it isn't recommended.
-4) Only 4 of the floating point registers are used for
-parameter passing as older machines such as G3 only have only 4
-& it keeps the stack frame compatible with other compilers.
-However with IEEE floating point emulation under linux on the
-older machines you are free to use the other 12.
-5) A long long or double parameter cannot be have the
-first 4 bytes in a register & the second four bytes in the
-outgoing args area. It must be purely in the outgoing args
-area if crossing this boundary.
-6) Floating point parameters are mixed with outgoing args
-on the outgoing args area in the order the are passed in as parameters.
-7) Floating point arguments 2 & 3 are saved in the outgoing args area for
-z/Architecture
-
-
-Stack Frame Layout
-------------------
-s/390 z/Architecture
-0 0 back chain ( a 0 here signifies end of back chain )
-4 8 eos ( end of stack, not used on Linux for S390 used in other linkage formats )
-8 16 glue used in other s/390 linkage formats for saved routine descriptors etc.
-12 24 glue used in other s/390 linkage formats for saved routine descriptors etc.
-16 32 scratch area
-20 40 scratch area
-24 48 saved r6 of caller function
-28 56 saved r7 of caller function
-32 64 saved r8 of caller function
-36 72 saved r9 of caller function
-40 80 saved r10 of caller function
-44 88 saved r11 of caller function
-48 96 saved r12 of caller function
-52 104 saved r13 of caller function
-56 112 saved r14 of caller function
-60 120 saved r15 of caller function
-64 128 saved f4 of caller function
-72 132 saved f6 of caller function
-80 undefined
-96 160 outgoing args passed from caller to callee
-96+x 160+x possible stack alignment ( 8 bytes desirable )
-96+x+y 160+x+y alloca space of caller ( if used )
-96+x+y+z 160+x+y+z automatics of caller ( if used )
-0 back-chain
-
-A sample program with comments.
-===============================
-
-Comments on the function test
------------------------------
-1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
-used ( :-( ).
-2) This is a frameless function & no stack is bought.
-3) The compiler was clever enough to recognise that it could return the
-value in r2 as well as use it for the passed in parameter ( :-) ).
-4) The basr ( branch relative & save ) trick works as follows the instruction
-has a special case with r0,r0 with some instruction operands is understood as
-the literal value 0, some risc architectures also do this ). So now
-we are branching to the next address & the address new program counter is
-in r13,so now we subtract the size of the function prologue we have executed
-+ the size of the literal pool to get to the top of the literal pool
-0040037c int test(int b)
-{ # Function prologue below
- 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
- 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
- 400382: a7 da ff fa ahi %r13,-6 # basr trick
- return(5+b);
- # Huge main program
- 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
-
- # Function epilogue below
- 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
- 40038e: 07 fe br %r14 # return
-}
-
-Comments on the function main
------------------------------
-1) The compiler did this function optimally ( 8-) )
-
-Literal pool for main.
-400390: ff ff ff ec .long 0xffffffec
-main(int argc,char *argv[])
-{ # Function prologue below
- 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
- 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
- 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
- 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
- 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
- 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
-
- return(test(5)); # Main Program Below
- 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
- # literal pool
- 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
- 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
- # address using branch & save instruction.
-
- # Function Epilogue below
- 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
- 4003b8: 07 fe br %r14 # return to do program exit
-}
-
-
-Compiler updates
-----------------
-
-main(int argc,char *argv[])
-{
- 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
- 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
- 400504: 00 40 04 f4 .long 0x004004f4
- # compiler now puts constant pool in code to so it saves an instruction
- 400508: 18 0f lr %r0,%r15
- 40050a: a7 fa ff a0 ahi %r15,-96
- 40050e: 50 00 f0 00 st %r0,0(%r15)
- return(test(5));
- 400512: 58 10 d0 00 l %r1,0(%r13)
- 400516: a7 28 00 05 lhi %r2,5
- 40051a: 0d e1 basr %r14,%r1
- # compiler adds 1 extra instruction to epilogue this is done to
- # avoid processor pipeline stalls owing to data dependencies on g5 &
- # above as register 14 in the old code was needed directly after being loaded
- # by the lm %r11,%r15,140(%r15) for the br %14.
- 40051c: 58 40 f0 98 l %r4,152(%r15)
- 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
- 400524: 07 f4 br %r4
-}
-
-
-Hartmut ( our compiler developer ) also has been threatening to take out the
-stack backchain in optimised code as this also causes pipeline stalls, you
-have been warned.
-
-64 bit z/Architecture code disassembly
---------------------------------------
-
-If you understand the stuff above you'll understand the stuff
-below too so I'll avoid repeating myself & just say that
-some of the instructions have g's on the end of them to indicate
-they are 64 bit & the stack offsets are a bigger,
-the only other difference you'll find between 32 & 64 bit is that
-we now use f4 & f6 for floating point arguments on 64 bit.
-00000000800005b0 <test>:
-int test(int b)
-{
- return(5+b);
- 800005b0: a7 2a 00 05 ahi %r2,5
- 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
- 800005b8: 07 fe br %r14
- 800005ba: 07 07 bcr 0,%r7
-
-
-}
-
-00000000800005bc <main>:
-main(int argc,char *argv[])
-{
- 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
- 800005c2: b9 04 00 1f lgr %r1,%r15
- 800005c6: a7 fb ff 60 aghi %r15,-160
- 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
- return(test(5));
- 800005d0: a7 29 00 05 lghi %r2,5
- # brasl allows jumps > 64k & is overkill here bras would do fune
- 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
- 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
- 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
- 800005e6: 07 f4 br %r4
-}
-
-
-
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-====================================================================
--gdwarf-2 now works it should be considered the default debugging
-format for s/390 & z/Architecture as it is more reliable for debugging
-shared libraries, normal -g debugging works much better now
-Thanks to the IBM java compiler developers bug reports.
-
-This is typically done adding/appending the flags -g or -gdwarf-2 to the
-CFLAGS & LDFLAGS variables Makefile of the program concerned.
-
-If using gdb & you would like accurate displays of registers &
- stack traces compile without optimisation i.e make sure
-that there is no -O2 or similar on the CFLAGS line of the Makefile &
-the emitted gcc commands, obviously this will produce worse code
-( not advisable for shipment ) but it is an aid to the debugging process.
-
-This aids debugging because the compiler will copy parameters passed in
-in registers onto the stack so backtracing & looking at passed in
-parameters will work, however some larger programs which use inline functions
-will not compile without optimisation.
-
-Debugging with optimisation has since much improved after fixing
-some bugs, please make sure you are using gdb-5.0 or later developed
-after Nov'2000.
-
-
-
-Debugging under VM
-==================
-
-Notes
------
-Addresses & values in the VM debugger are always hex never decimal
-Address ranges are of the format <HexValue1>-<HexValue2> or
-<HexValue1>.<HexValue2>
-For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
-or 2000.1000
-
-The VM Debugger is case insensitive.
-
-VM's strengths are usually other debuggers weaknesses you can get at any
-resource no matter how sensitive e.g. memory management resources, change
-address translation in the PSW. For kernel hacking you will reap dividends if
-you get good at it.
-
-The VM Debugger displays operators but not operands, and also the debugger
-displays useful information on the same line as the author of the code probably
-felt that it was a good idea not to go over the 80 columns on the screen.
-This isn't as unintuitive as it may seem as the s/390 instructions are easy to
-decode mentally and you can make a good guess at a lot of them as all the
-operands are nibble (half byte aligned).
-So if you have an objdump listing by hand, it is quite easy to follow, and if
-you don't have an objdump listing keep a copy of the s/390 Reference Summary
-or alternatively the s/390 principles of operation next to you.
-e.g. even I can guess that
-0001AFF8' LR 180F CC 0
-is a ( load register ) lr r0,r15
-
-Also it is very easy to tell the length of a 390 instruction from the 2 most
-significant bits in the instruction (not that this info is really useful except
-if you are trying to make sense of a hexdump of code).
-Here is a table
-Bits Instruction Length
-------------------------------------------
-00 2 Bytes
-01 4 Bytes
-10 4 Bytes
-11 6 Bytes
-
-The debugger also displays other useful info on the same line such as the
-addresses being operated on destination addresses of branches & condition codes.
-e.g.
-00019736' AHI A7DAFF0E CC 1
-000198BA' BRC A7840004 -> 000198C2' CC 0
-000198CE' STM 900EF068 >> 0FA95E78 CC 2
-
-
-
-Useful VM debugger commands
----------------------------
-
-I suppose I'd better mention this before I start
-to list the current active traces do
-Q TR
-there can be a maximum of 255 of these per set
-( more about trace sets later ).
-To stop traces issue a
-TR END.
-To delete a particular breakpoint issue
-TR DEL <breakpoint number>
-
-The PA1 key drops to CP mode so you can issue debugger commands,
-Doing alt c (on my 3270 console at least ) clears the screen.
-hitting b <enter> comes back to the running operating system
-from cp mode ( in our case linux ).
-It is typically useful to add shortcuts to your profile.exec file
-if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
-file here are a few from mine.
-/* this gives me command history on issuing f12 */
-set pf12 retrieve
-/* this continues */
-set pf8 imm b
-/* goes to trace set a */
-set pf1 imm tr goto a
-/* goes to trace set b */
-set pf2 imm tr goto b
-/* goes to trace set c */
-set pf3 imm tr goto c
-
-
-
-Instruction Tracing
--------------------
-Setting a simple breakpoint
-TR I PSWA <address>
-To debug a particular function try
-TR I R <function address range>
-TR I on its own will single step.
-TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
-e.g.
-TR I DATA 4D R 0197BC.4000
-will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
-if you were inclined you could add traces for all branch instructions &
-suffix them with the run prefix so you would have a backtrace on screen
-when a program crashes.
-TR BR <INTO OR FROM> will trace branches into or out of an address.
-e.g.
-TR BR INTO 0 is often quite useful if a program is getting awkward & deciding
-to branch to 0 & crashing as this will stop at the address before in jumps to 0.
-TR I R <address range> RUN cmd d g
-single steps a range of addresses but stays running &
-displays the gprs on each step.
-
-
-
-Displaying & modifying Registers
---------------------------------
-D G will display all the gprs
-Adding a extra G to all the commands is necessary to access the full 64 bit
-content in VM on z/Architecture. Obviously this isn't required for access
-registers as these are still 32 bit.
-e.g. DGG instead of DG
-D X will display all the control registers
-D AR will display all the access registers
-D AR4-7 will display access registers 4 to 7
-CPU ALL D G will display the GRPS of all CPUS in the configuration
-D PSW will display the current PSW
-st PSW 2000 will put the value 2000 into the PSW &
-cause crash your machine.
-D PREFIX displays the prefix offset
-
-
-Displaying Memory
------------------
-To display memory mapped using the current PSW's mapping try
-D <range>
-To make VM display a message each time it hits a particular address and
-continue try
-D I<range> will disassemble/display a range of instructions.
-ST addr 32 bit word will store a 32 bit aligned address
-D T<range> will display the EBCDIC in an address (if you are that way inclined)
-D R<range> will display real addresses ( without DAT ) but with prefixing.
-There are other complex options to display if you need to get at say home space
-but are in primary space the easiest thing to do is to temporarily
-modify the PSW to the other addressing mode, display the stuff & then
-restore it.
-
-
-
-Hints
------
-If you want to issue a debugger command without halting your virtual machine
-with the PA1 key try prefixing the command with #CP e.g.
-#cp tr i pswa 2000
-also suffixing most debugger commands with RUN will cause them not
-to stop just display the mnemonic at the current instruction on the console.
-If you have several breakpoints you want to put into your program &
-you get fed up of cross referencing with System.map
-you can do the following trick for several symbols.
-grep do_signal System.map
-which emits the following among other things
-0001f4e0 T do_signal
-now you can do
-
-TR I PSWA 0001f4e0 cmd msg * do_signal
-This sends a message to your own console each time do_signal is entered.
-( As an aside I wrote a perl script once which automatically generated a REXX
-script with breakpoints on every kernel procedure, this isn't a good idea
-because there are thousands of these routines & VM can only set 255 breakpoints
-at a time so you nearly had to spend as long pruning the file down as you would
-entering the msgs by hand), however, the trick might be useful for a single
-object file. In the 3270 terminal emulator x3270 there is a very useful option
-in the file menu called "Save Screen In File" - this is very good for keeping a
-copy of traces.
-
-From CMS help <command name> will give you online help on a particular command.
-e.g.
-HELP DISPLAY
-
-Also CP has a file called profile.exec which automatically gets called
-on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
-CP has a feature similar to doskey, it may be useful for you to
-use profile.exec to define some keystrokes.
-e.g.
-SET PF9 IMM B
-This does a single step in VM on pressing F8.
-SET PF10 ^
-This sets up the ^ key.
-which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly
-into some 3270 consoles.
-SET PF11 ^-
-This types the starting keystrokes for a sysrq see SysRq below.
-SET PF12 RETRIEVE
-This retrieves command history on pressing F12.
-
-
-Sometimes in VM the display is set up to scroll automatically this
-can be very annoying if there are messages you wish to look at
-to stop this do
-TERM MORE 255 255
-This will nearly stop automatic screen updates, however it will
-cause a denial of service if lots of messages go to the 3270 console,
-so it would be foolish to use this as the default on a production machine.
-
-
-Tracing particular processes
-----------------------------
-The kernel's text segment is intentionally at an address in memory that it will
-very seldom collide with text segments of user programs ( thanks Martin ),
-this simplifies debugging the kernel.
-However it is quite common for user processes to have addresses which collide
-this can make debugging a particular process under VM painful under normal
-circumstances as the process may change when doing a
-TR I R <address range>.
-Thankfully after reading VM's online help I figured out how to debug
-I particular process.
-
-Your first problem is to find the STD ( segment table designation )
-of the program you wish to debug.
-There are several ways you can do this here are a few
-1) objdump --syms <program to be debugged> | grep main
-To get the address of main in the program.
-tr i pswa <address of main>
-Start the program, if VM drops to CP on what looks like the entry
-point of the main function this is most likely the process you wish to debug.
-Now do a D X13 or D XG13 on z/Architecture.
-On 31 bit the STD is bits 1-19 ( the STO segment table origin )
-& 25-31 ( the STL segment table length ) of CR13.
-now type
-TR I R STD <CR13's value> 0.7fffffff
-e.g.
-TR I R STD 8F32E1FF 0.7fffffff
-Another very useful variation is
-TR STORE INTO STD <CR13's value> <address range>
-for finding out when a particular variable changes.
-
-An alternative way of finding the STD of a currently running process
-is to do the following, ( this method is more complex but
-could be quite convenient if you aren't updating the kernel much &
-so your kernel structures will stay constant for a reasonable period of
-time ).
-
-grep task /proc/<pid>/status
-from this you should see something like
-task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
-This now gives you a pointer to the task structure.
-Now make CC:="s390-gcc -g" kernel/sched.s
-To get the task_struct stabinfo.
-( task_struct is defined in include/linux/sched.h ).
-Now we want to look at
-task->active_mm->pgd
-on my machine the active_mm in the task structure stab is
-active_mm:(4,12),672,32
-its offset is 672/8=84=0x54
-the pgd member in the mm_struct stab is
-pgd:(4,6)=*(29,5),96,32
-so its offset is 96/8=12=0xc
-
-so we'll
-hexdump -s 0xf160054 /dev/mem | more
-i.e. task_struct+active_mm offset
-to look at the active_mm member
-f160054 0fee cc60 0019 e334 0000 0000 0000 0011
-hexdump -s 0x0feecc6c /dev/mem | more
-i.e. active_mm+pgd offset
-feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
-we get something like
-now do
-TR I R STD <pgd|0x7f> 0.7fffffff
-i.e. the 0x7f is added because the pgd only
-gives the page table origin & we need to set the low bits
-to the maximum possible segment table length.
-TR I R STD 0f2c007f 0.7fffffff
-on z/Architecture you'll probably need to do
-TR I R STD <pgd|0x7> 0.ffffffffffffffff
-to set the TableType to 0x1 & the Table length to 3.
-
-
-
-Tracing Program Exceptions
---------------------------
-If you get a crash which says something like
-illegal operation or specification exception followed by a register dump
-You can restart linux & trace these using the tr prog <range or value> trace
-option.
-
-
-The most common ones you will normally be tracing for is
-1=operation exception
-2=privileged operation exception
-4=protection exception
-5=addressing exception
-6=specification exception
-10=segment translation exception
-11=page translation exception
-
-The full list of these is on page 22 of the current s/390 Reference Summary.
-e.g.
-tr prog 10 will trace segment translation exceptions.
-tr prog on its own will trace all program interruption codes.
-
-Trace Sets
-----------
-On starting VM you are initially in the INITIAL trace set.
-You can do a Q TR to verify this.
-If you have a complex tracing situation where you wish to wait for instance
-till a driver is open before you start tracing IO, but know in your
-heart that you are going to have to make several runs through the code till you
-have a clue whats going on.
-
-What you can do is
-TR I PSWA <Driver open address>
-hit b to continue till breakpoint
-reach the breakpoint
-now do your
-TR GOTO B
-TR IO 7c08-7c09 inst int run
-or whatever the IO channels you wish to trace are & hit b
-
-To got back to the initial trace set do
-TR GOTO INITIAL
-& the TR I PSWA <Driver open address> will be the only active breakpoint again.
-
-
-Tracing linux syscalls under VM
--------------------------------
-Syscalls are implemented on Linux for S390 by the Supervisor call instruction
-(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
-opcode and the second byte being the syscall number. They are traced using the
-simple command:
-TR SVC <Optional value or range>
-the syscalls are defined in linux/arch/s390/include/asm/unistd.h
-e.g. to trace all file opens just do
-TR SVC 5 ( as this is the syscall number of open )
-
-
-SMP Specific commands
----------------------
-To find out how many cpus you have
-Q CPUS displays all the CPU's available to your virtual machine
-To find the cpu that the current cpu VM debugger commands are being directed at
-do Q CPU to change the current cpu VM debugger commands are being directed at do
-CPU <desired cpu no>
-
-On a SMP guest issue a command to all CPUs try prefixing the command with cpu
-all. To issue a command to a particular cpu try cpu <cpu number> e.g.
-CPU 01 TR I R 2000.3000
-If you are running on a guest with several cpus & you have a IO related problem
-& cannot follow the flow of code but you know it isn't smp related.
-from the bash prompt issue
-shutdown -h now or halt.
-do a Q CPUS to find out how many cpus you have
-detach each one of them from cp except cpu 0
-by issuing a
-DETACH CPU 01-(number of cpus in configuration)
-& boot linux again.
-TR SIGP will trace inter processor signal processor instructions.
-DEFINE CPU 01-(number in configuration)
-will get your guests cpus back.
-
-
-Help for displaying ascii textstrings
--------------------------------------
-On the very latest VM Nucleus'es VM can now display ascii
-( thanks Neale for the hint ) by doing
-D TX<lowaddr>.<len>
-e.g.
-D TX0.100
-
-Alternatively
-=============
-Under older VM debuggers (I love EBDIC too) you can use following little
-program which converts a command line of hex digits to ascii text. It can be
-compiled under linux and you can copy the hex digits from your x3270 terminal
-to your xterm if you are debugging from a linuxbox.
-
-This is quite useful when looking at a parameter passed in as a text string
-under VM ( unless you are good at decoding ASCII in your head ).
-
-e.g. consider tracing an open syscall
-TR SVC 5
-We have stopped at a breakpoint
-000151B0' SVC 0A05 -> 0001909A' CC 0
-
-D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
-(for the layout of the prefix area consult the "Fixed Storage Locations"
-chapter of the s/390 Reference Summary if you have it available).
-V00000020 070C2000 800151B2
-The problem state bit wasn't set & it's also too early in the boot sequence
-for it to be a userspace SVC if it was we would have to temporarily switch the
-psw to user space addressing so we could get at the first parameter of the open
-in gpr2.
-Next do a
-D G2
-GPR 2 = 00014CB4
-Now display what gpr2 is pointing to
-D 00014CB4.20
-V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
-V00014CC4 FC00014C B4001001 E0001000 B8070707
-Now copy the text till the first 00 hex ( which is the end of the string
-to an xterm & do hex2ascii on it.
-hex2ascii 2F646576 2F636F6E 736F6C65 00
-outputs
-Decoded Hex:=/ d e v / c o n s o l e 0x00
-We were opening the console device,
-
-You can compile the code below yourself for practice :-),
-/*
- * hex2ascii.c
- * a useful little tool for converting a hexadecimal command line to ascii
- *
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
- */
-#include <stdio.h>
-
-int main(int argc,char *argv[])
-{
- int cnt1,cnt2,len,toggle=0;
- int startcnt=1;
- unsigned char c,hex;
-
- if(argc>1&&(strcmp(argv[1],"-a")==0))
- startcnt=2;
- printf("Decoded Hex:=");
- for(cnt1=startcnt;cnt1<argc;cnt1++)
- {
- len=strlen(argv[cnt1]);
- for(cnt2=0;cnt2<len;cnt2++)
- {
- c=argv[cnt1][cnt2];
- if(c>='0'&&c<='9')
- c=c-'0';
- if(c>='A'&&c<='F')
- c=c-'A'+10;
- if(c>='a'&&c<='f')
- c=c-'a'+10;
- switch(toggle)
- {
- case 0:
- hex=c<<4;
- toggle=1;
- break;
- case 1:
- hex+=c;
- if(hex<32||hex>127)
- {
- if(startcnt==1)
- printf("0x%02X ",(int)hex);
- else
- printf(".");
- }
- else
- {
- printf("%c",hex);
- if(startcnt==1)
- printf(" ");
- }
- toggle=0;
- break;
- }
- }
- }
- printf("\n");
-}
-
-
-
-
-Stack tracing under VM
-----------------------
-A basic backtrace
------------------
-
-Here are the tricks I use 9 out of 10 times it works pretty well,
-
-When your backchain reaches a dead end
---------------------------------------
-This can happen when an exception happens in the kernel and the kernel is
-entered twice. If you reach the NULL pointer at the end of the back chain you
-should be able to sniff further back if you follow the following tricks.
-1) A kernel address should be easy to recognise since it is in
-primary space & the problem state bit isn't set & also
-The Hi bit of the address is set.
-2) Another backchain should also be easy to recognise since it is an
-address pointing to another address approximately 100 bytes or 0x70 hex
-behind the current stackpointer.
-
-
-Here is some practice.
-boot the kernel & hit PA1 at some random time
-d g to display the gprs, this should display something like
-GPR 0 = 00000001 00156018 0014359C 00000000
-GPR 4 = 00000001 001B8888 000003E0 00000000
-GPR 8 = 00100080 00100084 00000000 000FE000
-GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
-Note that GPR14 is a return address but as we are real men we are going to
-trace the stack.
-display 0x40 bytes after the stack pointer.
-
-V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
-V000FFEE8 00000000 00000000 000003E0 00000000
-V000FFEF8 00100080 00100084 00000000 000FE000
-V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
-
-
-Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
-you look above at our stackframe & also agrees with GPR14.
-
-now backchain
-d 000FFF38.40
-we now are taking the contents of SP to get our first backchain.
-
-V000FFF38 000FFFA0 00000000 00014995 00147094
-V000FFF48 00147090 001470A0 000003E0 00000000
-V000FFF58 00100080 00100084 00000000 001BF1D0
-V000FFF68 00010400 800149BA 80014CA6 000FFF38
-
-This displays a 2nd return address of 80014CA6
-
-now do d 000FFFA0.40 for our 3rd backchain
-
-V000FFFA0 04B52002 0001107F 00000000 00000000
-V000FFFB0 00000000 00000000 FF000000 0001107F
-V000FFFC0 00000000 00000000 00000000 00000000
-V000FFFD0 00010400 80010802 8001085A 000FFFA0
-
-
-our 3rd return address is 8001085A
-
-as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
-kernel entry routines for the sake of optimisation don't set up a backchain.
-
-now look at System.map to see if the addresses make any sense.
-
-grep -i 0001b3 System.map
-outputs among other things
-0001b304 T cpu_idle
-so 8001B36A
-is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
-
-
-grep -i 00014 System.map
-produces among other things
-00014a78 T start_kernel
-so 0014CA6 is start_kernel+some hex number I can't add in my head.
-
-grep -i 00108 System.map
-this produces
-00010800 T _stext
-so 8001085A is _stext+0x5a
-
-Congrats you've done your first backchain.
-
-
-
-s/390 & z/Architecture IO Overview
-==================================
-
-I am not going to give a course in 390 IO architecture as this would take me
-quite a while and I'm no expert. Instead I'll give a 390 IO architecture
-summary for Dummies. If you have the s/390 principles of operation available
-read this instead. If nothing else you may find a few useful keywords in here
-and be able to use them on a web search engine to find more useful information.
-
-Unlike other bus architectures modern 390 systems do their IO using mostly
-fibre optics and devices such as tapes and disks can be shared between several
-mainframes. Also S390 can support up to 65536 devices while a high end PC based
-system might be choking with around 64.
-
-Here is some of the common IO terminology:
-
-Subchannel:
-This is the logical number most IO commands use to talk to an IO device. There
-can be up to 0x10000 (65536) of these in a configuration, typically there are a
-few hundred. Under VM for simplicity they are allocated contiguously, however
-on the native hardware they are not. They typically stay consistent between
-boots provided no new hardware is inserted or removed.
-Under Linux for s390 we use these as IRQ's and also when issuing an IO command
-(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
-START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
-of the device we wish to talk to. The most important of these instructions are
-START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
-completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
-up to 8 channel paths to a device, this offers redundancy if one is not
-available.
-
-Device Number:
-This number remains static and is closely tied to the hardware. There are 65536
-of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
-another lsb 8 bits. These remain static even if more devices are inserted or
-removed from the hardware. There is a 1 to 1 mapping between subchannels and
-device numbers, provided devices aren't inserted or removed.
-
-Channel Control Words:
-CCWs are linked lists of instructions initially pointed to by an operation
-request block (ORB), which is initially given to Start Subchannel (SSCH)
-command along with the subchannel number for the IO subsystem to process
-while the CPU continues executing normal code.
-CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
-Format 1 (31 bit). These are typically used to issue read and write (and many
-other) instructions. They consist of a length field and an absolute address
-field.
-Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
-when the channel is idle, and the second for device end (secondary status).
-Sometimes you get both concurrently. You check how the IO went on by issuing a
-TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
-response block (IRB). If you get channel and device end status in the IRB
-without channel checks etc. your IO probably went okay. If you didn't you
-probably need to examine the IRB, extended status word etc.
-If an error occurs, more sophisticated control units have a facility known as
-concurrent sense. This means that if an error occurs Extended sense information
-will be presented in the Extended status word in the IRB. If not you have to
-issue a subsequent SENSE CCW command after the test subchannel.
-
-
-TPI (Test pending interrupt) can also be used for polled IO, but in
-multitasking multiprocessor systems it isn't recommended except for
-checking special cases (i.e. non looping checks for pending IO etc.).
-
-Store Subchannel and Modify Subchannel can be used to examine and modify
-operating characteristics of a subchannel (e.g. channel paths).
-
-Other IO related Terms:
-Sysplex: S390's Clustering Technology
-QDIO: S390's new high speed IO architecture to support devices such as gigabit
-ethernet, this architecture is also designed to be forward compatible with
-upcoming 64 bit machines.
-
-
-General Concepts
-
-Input Output Processors (IOP's) are responsible for communicating between
-the mainframe CPU's & the channel & relieve the mainframe CPU's from the
-burden of communicating with IO devices directly, this allows the CPU's to
-concentrate on data processing.
-
-IOP's can use one or more links ( known as channel paths ) to talk to each
-IO device. It first checks for path availability & chooses an available one,
-then starts ( & sometimes terminates IO ).
-There are two types of channel path: ESCON & the Parallel IO interface.
-
-IO devices are attached to control units, control units provide the
-logic to interface the channel paths & channel path IO protocols to
-the IO devices, they can be integrated with the devices or housed separately
-& often talk to several similar devices ( typical examples would be raid
-controllers or a control unit which connects to 1000 3270 terminals ).
-
-
- +---------------------------------------------------------------+
- | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
- | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
- | | | | | | | | | | Memory | | Storage | |
- | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
- |---------------------------------------------------------------+
- | IOP | IOP | IOP |
- |---------------------------------------------------------------
- | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
- ----------------------------------------------------------------
- || ||
- || Bus & Tag Channel Path || ESCON
- || ====================== || Channel
- || || || || Path
- +----------+ +----------+ +----------+
- | | | | | |
- | CU | | CU | | CU |
- | | | | | |
- +----------+ +----------+ +----------+
- | | | | |
-+----------+ +----------+ +----------+ +----------+ +----------+
-|I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
-+----------+ +----------+ +----------+ +----------+ +----------+
- CPU = Central Processing Unit
- C = Channel
- IOP = IP Processor
- CU = Control Unit
-
-The 390 IO systems come in 2 flavours the current 390 machines support both
-
-The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
-sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
-Interface (OEMI).
-
-This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
-and control lines on the "Tag" cable. These can operate in byte multiplex mode
-for sharing between several slow devices or burst mode and monopolize the
-channel for the whole burst. Up to 256 devices can be addressed on one of these
-cables. These cables are about one inch in diameter. The maximum unextended
-length supported by these cables is 125 Meters but this can be extended up to
-2km with a fibre optic channel extended such as a 3044. The maximum burst speed
-supported is 4.5 megabytes per second. However, some really old processors
-support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
-One of these paths can be daisy chained to up to 8 control units.
-
-
-ESCON if fibre optic it is also called FICON
-Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
-lasers for communication at a signaling rate of up to 200 megabits/sec. As
-10bits are transferred for every 8 bits info this drops to 160 megabits/sec
-and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
-operates in burst mode.
-
-ESCONs typical max cable length is 3km for the led version and 20km for the
-laser version known as XDF (extended distance facility). This can be further
-extended by using an ESCON director which triples the above mentioned ranges.
-Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
-the standard Bus & Tag control protocol is however present within the packets.
-Up to 256 devices can be attached to each control unit that uses one of these
-interfaces.
-
-Common 390 Devices include:
-Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
-Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
-DASD's direct access storage devices ( otherwise known as hard disks ).
-Tape Drives.
-CTC ( Channel to Channel Adapters ),
-ESCON or Parallel Cables used as a very high speed serial link
-between 2 machines.
-
-
-Debugging IO on s/390 & z/Architecture under VM
-===============================================
-
-Now we are ready to go on with IO tracing commands under VM
-
-A few self explanatory queries:
-Q OSA
-Q CTC
-Q DISK ( This command is CMS specific )
-Q DASD
-
-
-
-
-
-
-Q OSA on my machine returns
-OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
-OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
-OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
-OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
-
-If you have a guest with certain privileges you may be able to see devices
-which don't belong to you. To avoid this, add the option V.
-e.g.
-Q V OSA
-
-Now using the device numbers returned by this command we will
-Trace the io starting up on the first device 7c08 & 7c09
-In our simplest case we can trace the
-start subchannels
-like TR SSCH 7C08-7C09
-or the halt subchannels
-or TR HSCH 7C08-7C09
-MSCH's ,STSCH's I think you can guess the rest
-
-A good trick is tracing all the IO's and CCWS and spooling them into the reader
-of another VM guest so he can ftp the logfile back to his own machine. I'll do
-a small bit of this and give you a look at the output.
-
-1) Spool stdout to VM reader
-SP PRT TO (another vm guest ) or * for the local vm guest
-2) Fill the reader with the trace
-TR IO 7c08-7c09 INST INT CCW PRT RUN
-3) Start up linux
-i 00c
-4) Finish the trace
-TR END
-5) close the reader
-C PRT
-6) list reader contents
-RDRLIST
-7) copy it to linux4's minidisk
-RECEIVE / LOG TXT A1 ( replace
-8)
-filel & press F11 to look at it
-You should see something like:
-
-00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
- CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
- CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
- IDAL 43D8AFE8
- IDAL 0FB76000
-00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
-00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
- CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
- KEY 0 FPI C0 CC 0 CTLS 4007
-00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
-
-If you don't like messing up your readed ( because you possibly booted from it )
-you can alternatively spool it to another readers guest.
-
-
-Other common VM device related commands
----------------------------------------------
-These commands are listed only because they have
-been of use to me in the past & may be of use to
-you too. For more complete info on each of the commands
-use type HELP <command> from CMS.
-detaching devices
-DET <devno range>
-ATT <devno range> <guest>
-attach a device to guest * for your own guest
-READY <devno> cause VM to issue a fake interrupt.
-
-The VARY command is normally only available to VM administrators.
-VARY ON PATH <path> TO <devno range>
-VARY OFF PATH <PATH> FROM <devno range>
-This is used to switch on or off channel paths to devices.
-
-Q CHPID <channel path ID>
-This displays state of devices using this channel path
-D SCHIB <subchannel>
-This displays the subchannel information SCHIB block for the device.
-this I believe is also only available to administrators.
-DEFINE CTC <devno>
-defines a virtual CTC channel to channel connection
-2 need to be defined on each guest for the CTC driver to use.
-COUPLE devno userid remote devno
-Joins a local virtual device to a remote virtual device
-( commonly used for the CTC driver ).
-
-Building a VM ramdisk under CMS which linux can use
-def vfb-<blocksize> <subchannel> <number blocks>
-blocksize is commonly 4096 for linux.
-Formatting it
-format <subchannel> <driver letter e.g. x> (blksize <blocksize>
-
-Sharing a disk between multiple guests
-LINK userid devno1 devno2 mode password
-
-
-
-GDB on S390
-===========
-N.B. if compiling for debugging gdb works better without optimisation
-( see Compiling programs for debugging )
-
-invocation
-----------
-gdb <victim program> <optional corefile>
-
-Online help
------------
-help: gives help on commands
-e.g.
-help
-help display
-Note gdb's online help is very good use it.
-
-
-Assembly
---------
-info registers: displays registers other than floating point.
-info all-registers: displays floating points as well.
-disassemble: disassembles
-e.g.
-disassemble without parameters will disassemble the current function
-disassemble $pc $pc+10
-
-Viewing & modifying variables
------------------------------
-print or p: displays variable or register
-e.g. p/x $sp will display the stack pointer
-
-display: prints variable or register each time program stops
-e.g.
-display/x $pc will display the program counter
-display argc
-
-undisplay : undo's display's
-
-info breakpoints: shows all current breakpoints
-
-info stack: shows stack back trace (if this doesn't work too well, I'll show
-you the stacktrace by hand below).
-
-info locals: displays local variables.
-
-info args: display current procedure arguments.
-
-set args: will set argc & argv each time the victim program is invoked.
-
-set <variable>=value
-set argc=100
-set $pc=0
-
-
-
-Modifying execution
--------------------
-step: steps n lines of sourcecode
-step steps 1 line.
-step 100 steps 100 lines of code.
-
-next: like step except this will not step into subroutines
-
-stepi: steps a single machine code instruction.
-e.g. stepi 100
-
-nexti: steps a single machine code instruction but will not step into
-subroutines.
-
-finish: will run until exit of the current routine
-
-run: (re)starts a program
-
-cont: continues a program
-
-quit: exits gdb.
-
-
-breakpoints
-------------
-
-break
-sets a breakpoint
-e.g.
-
-break main
-
-break *$pc
-
-break *0x400618
-
-Here's a really useful one for large programs
-rbr
-Set a breakpoint for all functions matching REGEXP
-e.g.
-rbr 390
-will set a breakpoint with all functions with 390 in their name.
-
-info breakpoints
-lists all breakpoints
-
-delete: delete breakpoint by number or delete them all
-e.g.
-delete 1 will delete the first breakpoint
-delete will delete them all
-
-watch: This will set a watchpoint ( usually hardware assisted ),
-This will watch a variable till it changes
-e.g.
-watch cnt, will watch the variable cnt till it changes.
-As an aside unfortunately gdb's, architecture independent watchpoint code
-is inconsistent & not very good, watchpoints usually work but not always.
-
-info watchpoints: Display currently active watchpoints
-
-condition: ( another useful one )
-Specify breakpoint number N to break only if COND is true.
-Usage is `condition N COND', where N is an integer and COND is an
-expression to be evaluated whenever breakpoint N is reached.
-
-
-
-User defined functions/macros
------------------------------
-define: ( Note this is very very useful,simple & powerful )
-usage define <name> <list of commands> end
-
-examples which you should consider putting into .gdbinit in your home directory
-define d
-stepi
-disassemble $pc $pc+10
-end
-
-define e
-nexti
-disassemble $pc $pc+10
-end
-
-
-Other hard to classify stuff
-----------------------------
-signal n:
-sends the victim program a signal.
-e.g. signal 3 will send a SIGQUIT.
-
-info signals:
-what gdb does when the victim receives certain signals.
-
-list:
-e.g.
-list lists current function source
-list 1,10 list first 10 lines of current file.
-list test.c:1,10
-
-
-directory:
-Adds directories to be searched for source if gdb cannot find the source.
-(note it is a bit sensitive about slashes)
-e.g. To add the root of the filesystem to the searchpath do
-directory //
-
-
-call <function>
-This calls a function in the victim program, this is pretty powerful
-e.g.
-(gdb) call printf("hello world")
-outputs:
-$1 = 11
-
-You might now be thinking that the line above didn't work, something extra had
-to be done.
-(gdb) call fflush(stdout)
-hello world$2 = 0
-As an aside the debugger also calls malloc & free under the hood
-to make space for the "hello world" string.
-
-
-
-hints
------
-1) command completion works just like bash
-( if you are a bad typist like me this really helps )
-e.g. hit br <TAB> & cursor up & down :-).
-
-2) if you have a debugging problem that takes a few steps to recreate
-put the steps into a file called .gdbinit in your current working directory
-if you have defined a few extra useful user defined commands put these in
-your home directory & they will be read each time gdb is launched.
-
-A typical .gdbinit file might be.
-break main
-run
-break runtime_exception
-cont
-
-
-stack chaining in gdb by hand
------------------------------
-This is done using a the same trick described for VM
-p/x (*($sp+56))&0x7fffffff get the first backchain.
-
-For z/Architecture
-Replace 56 with 112 & ignore the &0x7fffffff
-in the macros below & do nasty casts to longs like the following
-as gdb unfortunately deals with printed arguments as ints which
-messes up everything.
-i.e. here is a 3rd backchain dereference
-p/x *(long *)(***(long ***)$sp+112)
-
-
-this outputs
-$5 = 0x528f18
-on my machine.
-Now you can use
-info symbol (*($sp+56))&0x7fffffff
-you might see something like.
-rl_getc + 36 in section .text telling you what is located at address 0x528f18
-Now do.
-p/x (*(*$sp+56))&0x7fffffff
-This outputs
-$6 = 0x528ed0
-Now do.
-info symbol (*(*$sp+56))&0x7fffffff
-rl_read_key + 180 in section .text
-now do
-p/x (*(**$sp+56))&0x7fffffff
-& so on.
-
-Disassembling instructions without debug info
----------------------------------------------
-gdb typically complains if there is a lack of debugging
-symbols in the disassemble command with
-"No function contains specified address." To get around
-this do
-x/<number lines to disassemble>xi <address>
-e.g.
-x/20xi 0x400730
-
-
-
-Note: Remember gdb has history just like bash you don't need to retype the
-whole line just use the up & down arrows.
-
-
-
-For more info
--------------
-From your linuxbox do
-man gdb or info gdb.
-
-core dumps
-----------
-What a core dump ?,
-A core dump is a file generated by the kernel (if allowed) which contains the
-registers and all active pages of the program which has crashed.
-From this file gdb will allow you to look at the registers, stack trace and
-memory of the program as if it just crashed on your system. It is usually
-called core and created in the current working directory.
-This is very useful in that a customer can mail a core dump to a technical
-support department and the technical support department can reconstruct what
-happened. Provided they have an identical copy of this program with debugging
-symbols compiled in and the source base of this build is available.
-In short it is far more useful than something like a crash log could ever hope
-to be.
-
-Why have I never seen one ?.
-Probably because you haven't used the command
-ulimit -c unlimited in bash
-to allow core dumps, now do
-ulimit -a
-to verify that the limit was accepted.
-
-A sample core dump
-To create this I'm going to do
-ulimit -c unlimited
-gdb
-to launch gdb (my victim app. ) now be bad & do the following from another
-telnet/xterm session to the same machine
-ps -aux | grep gdb
-kill -SIGSEGV <gdb's pid>
-or alternatively use killall -SIGSEGV gdb if you have the killall command.
-Now look at the core dump.
-./gdb core
-Displays the following
-GNU gdb 4.18
-Copyright 1998 Free Software Foundation, Inc.
-GDB is free software, covered by the GNU General Public License, and you are
-welcome to change it and/or distribute copies of it under certain conditions.
-Type "show copying" to see the conditions.
-There is absolutely no warranty for GDB. Type "show warranty" for details.
-This GDB was configured as "s390-ibm-linux"...
-Core was generated by `./gdb'.
-Program terminated with signal 11, Segmentation fault.
-Reading symbols from /usr/lib/libncurses.so.4...done.
-Reading symbols from /lib/libm.so.6...done.
-Reading symbols from /lib/libc.so.6...done.
-Reading symbols from /lib/ld-linux.so.2...done.
-#0 0x40126d1a in read () from /lib/libc.so.6
-Setting up the environment for debugging gdb.
-Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
-Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
-(top-gdb) info stack
-#0 0x40126d1a in read () from /lib/libc.so.6
-#1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
-#2 0x528ed0 in rl_read_key () at input.c:381
-#3 0x5167e6 in readline_internal_char () at readline.c:454
-#4 0x5168ee in readline_internal_charloop () at readline.c:507
-#5 0x51692c in readline_internal () at readline.c:521
-#6 0x5164fe in readline (prompt=0x7ffff810)
- at readline.c:349
-#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
- annotation_suffix=0x4d6b44 "prompt") at top.c:2091
-#8 0x4d6cf0 in command_loop () at top.c:1345
-#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
-
-
-LDD
-===
-This is a program which lists the shared libraries which a library needs,
-Note you also get the relocations of the shared library text segments which
-help when using objdump --source.
-e.g.
- ldd ./gdb
-outputs
-libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
-libm.so.6 => /lib/libm.so.6 (0x4005e000)
-libc.so.6 => /lib/libc.so.6 (0x40084000)
-/lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
-
-
-Debugging shared libraries
-==========================
-Most programs use shared libraries, however it can be very painful
-when you single step instruction into a function like printf for the
-first time & you end up in functions like _dl_runtime_resolve this is
-the ld.so doing lazy binding, lazy binding is a concept in ELF where
-shared library functions are not loaded into memory unless they are
-actually used, great for saving memory but a pain to debug.
-To get around this either relink the program -static or exit gdb type
-export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
-the program in question.
-
-
-
-Debugging modules
-=================
-As modules are dynamically loaded into the kernel their address can be
-anywhere to get around this use the -m option with insmod to emit a load
-map which can be piped into a file if required.
-
-The proc file system
-====================
-What is it ?.
-It is a filesystem created by the kernel with files which are created on demand
-by the kernel if read, or can be used to modify kernel parameters,
-it is a powerful concept.
-
-e.g.
-
-cat /proc/sys/net/ipv4/ip_forward
-On my machine outputs
-0
-telling me ip_forwarding is not on to switch it on I can do
-echo 1 > /proc/sys/net/ipv4/ip_forward
-cat it again
-cat /proc/sys/net/ipv4/ip_forward
-On my machine now outputs
-1
-IP forwarding is on.
-There is a lot of useful info in here best found by going in and having a look
-around, so I'll take you through some entries I consider important.
-
-All the processes running on the machine have their own entry defined by
-/proc/<pid>
-So lets have a look at the init process
-cd /proc/1
-
-cat cmdline
-emits
-init [2]
-
-cd /proc/1/fd
-This contains numerical entries of all the open files,
-some of these you can cat e.g. stdout (2)
-
-cat /proc/29/maps
-on my machine emits
-
-00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
-00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
-0047e000-00492000 rwxp 00000000 00:00 0
-40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
-40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
-40016000-40017000 rwxp 00000000 00:00 0
-40017000-40018000 rw-p 00000000 00:00 0
-40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
-4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
-4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
-4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
-40111000-40114000 rw-p 00000000 00:00 0
-40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
-4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
-7fffd000-80000000 rwxp ffffe000 00:00 0
-
-
-Showing us the shared libraries init uses where they are in memory
-& memory access permissions for each virtual memory area.
-
-/proc/1/cwd is a softlink to the current working directory.
-/proc/1/root is the root of the filesystem for this process.
-
-/proc/1/mem is the current running processes memory which you
-can read & write to like a file.
-strace uses this sometimes as it is a bit faster than the
-rather inefficient ptrace interface for peeking at DATA.
-
-
-cat status
-
-Name: init
-State: S (sleeping)
-Pid: 1
-PPid: 0
-Uid: 0 0 0 0
-Gid: 0 0 0 0
-Groups:
-VmSize: 408 kB
-VmLck: 0 kB
-VmRSS: 208 kB
-VmData: 24 kB
-VmStk: 8 kB
-VmExe: 368 kB
-VmLib: 0 kB
-SigPnd: 0000000000000000
-SigBlk: 0000000000000000
-SigIgn: 7fffffffd7f0d8fc
-SigCgt: 00000000280b2603
-CapInh: 00000000fffffeff
-CapPrm: 00000000ffffffff
-CapEff: 00000000fffffeff
-
-User PSW: 070de000 80414146
-task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
-User GPRS:
-00000400 00000000 0000000b 7ffffa90
-00000000 00000000 00000000 0045d9f4
-0045cafc 7ffffa90 7fffff18 0045cb08
-00010400 804039e8 80403af8 7ffff8b0
-User ACRS:
-00000000 00000000 00000000 00000000
-00000001 00000000 00000000 00000000
-00000000 00000000 00000000 00000000
-00000000 00000000 00000000 00000000
-Kernel BackChain CallChain BackChain CallChain
- 004b7ca8 8002bd0c 004b7d18 8002b92c
- 004b7db8 8005cd50 004b7e38 8005d12a
- 004b7f08 80019114
-Showing among other things memory usage & status of some signals &
-the processes'es registers from the kernel task_structure
-as well as a backchain which may be useful if a process crashes
-in the kernel for some unknown reason.
-
-Some driver debugging techniques
-================================
-debug feature
--------------
-Some of our drivers now support a "debug feature" in
-/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
-for more info.
-e.g.
-to switch on the lcs "debug feature"
-echo 5 > /proc/s390dbf/lcs/level
-& then after the error occurred.
-cat /proc/s390dbf/lcs/sprintf >/logfile
-the logfile now contains some information which may help
-tech support resolve a problem in the field.
-
-
-
-high level debugging network drivers
-------------------------------------
-ifconfig is a quite useful command
-it gives the current state of network drivers.
-
-If you suspect your network device driver is dead
-one way to check is type
-ifconfig <network device>
-e.g. tr0
-You should see something like
-tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
- inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
- UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
- RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
- TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
- collisions:0 txqueuelen:100
-
-if the device doesn't say up
-try
-/etc/rc.d/init.d/network start
-( this starts the network stack & hopefully calls ifconfig tr0 up ).
-ifconfig looks at the output of /proc/net/dev and presents it in a more
-presentable form.
-Now ping the device from a machine in the same subnet.
-if the RX packets count & TX packets counts don't increment you probably
-have problems.
-next
-cat /proc/net/arp
-Do you see any hardware addresses in the cache if not you may have problems.
-Next try
-ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of
-ifconfig. Do you see any replies from machines other than the local machine
-if not you may have problems. also if the TX packets count in ifconfig
-hasn't incremented either you have serious problems in your driver
-(e.g. the txbusy field of the network device being stuck on )
-or you may have multiple network devices connected.
-
-
-chandev
--------
-There is a new device layer for channel devices, some
-drivers e.g. lcs are registered with this layer.
-If the device uses the channel device layer you'll be
-able to find what interrupts it uses & the current state
-of the device.
-See the manpage chandev.8 &type cat /proc/chandev for more info.
-
-
-SysRq
-=====
-This is now supported by linux for s/390 & z/Architecture.
-To enable it do compile the kernel with
-Kernel Hacking -> Magic SysRq Key Enabled
-echo "1" > /proc/sys/kernel/sysrq
-also type
-echo "8" >/proc/sys/kernel/printk
-To make printk output go to console.
-On 390 all commands are prefixed with
-^-
-e.g.
-^-t will show tasks.
-^-? or some unknown command will display help.
-The sysrq key reading is very picky ( I have to type the keys in an
- xterm session & paste them into the x3270 console )
-& it may be wise to predefine the keys as described in the VM hints above
-
-This is particularly useful for syncing disks unmounting & rebooting
-if the machine gets partially hung.
-
-Read Documentation/admin-guide/sysrq.rst for more info
-
-References:
-===========
-Enterprise Systems Architecture Reference Summary
-Enterprise Systems Architecture Principles of Operation
-Hartmut Penners s390 stack frame sheet.
-IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
-Various bits of man & info pages of Linux.
-Linux & GDB source.
-Various info & man pages.
-CMS Help on tracing commands.
-Linux for s/390 Elf Application Binary Interface
-Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
-z/Architecture Principles of Operation SA22-7832-00
-Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
-Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
-
-Special Thanks
-==============
-Special thanks to Neale Ferguson who maintains a much
-prettier HTML version of this page at
-http://linuxvm.org/penguinvm/
-Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.rst
index 480a78ef5a1e..7006d8209d2e 100644
--- a/Documentation/s390/cds.txt
+++ b/Documentation/s390/cds.rst
@@ -1,14 +1,18 @@
+===========================
Linux for S/390 and zSeries
+===========================
Common Device Support (CDS)
Device Driver I/O Support Routines
-Authors : Ingo Adlung
- Cornelia Huck
+Authors:
+ - Ingo Adlung
+ - Cornelia Huck
Copyright, IBM Corp. 1999-2002
Introduction
+============
This document describes the common device support routines for Linux/390.
Different than other hardware architectures, ESA/390 has defined a unified
@@ -27,18 +31,20 @@ Operation manual (IBM Form. No. SA22-7201).
In order to build common device support for ESA/390 I/O interfaces, a
functional layer was introduced that provides generic I/O access methods to
-the hardware.
+the hardware.
-The common device support layer comprises the I/O support routines defined
-below. Some of them implement common Linux device driver interfaces, while
+The common device support layer comprises the I/O support routines defined
+below. Some of them implement common Linux device driver interfaces, while
some of them are ESA/390 platform specific.
Note:
-In order to write a driver for S/390, you also need to look into the interface
-described in Documentation/s390/driver-model.txt.
+ In order to write a driver for S/390, you also need to look into the interface
+ described in Documentation/s390/driver-model.rst.
Note for porting drivers from 2.4:
+
The major changes are:
+
* The functions use a ccw_device instead of an irq (subchannel).
* All drivers must define a ccw_driver (see driver-model.txt) and the associated
functions.
@@ -57,19 +63,16 @@ The major changes are:
ccw_device_get_ciw()
get commands from extended sense data.
-ccw_device_start()
-ccw_device_start_timeout()
-ccw_device_start_key()
-ccw_device_start_key_timeout()
+ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
initiate an I/O request.
ccw_device_resume()
resume channel program execution.
-ccw_device_halt()
+ccw_device_halt()
terminate the current I/O request processed on the device.
-do_IRQ()
+do_IRQ()
generic interrupt routine. This function is called by the interrupt entry
routine whenever an I/O interrupt is presented to the system. The do_IRQ()
routine determines the interrupt status and calls the device specific
@@ -82,12 +85,15 @@ first level interrupt handler only and does not comprise a device driver
callable interface. Instead, the functional description of do_IO() also
describes the input to the device specific interrupt handler.
-Note: All explanations apply also to the 64 bit architecture s390x.
+Note:
+ All explanations apply also to the 64 bit architecture s390x.
Common Device Support (CDS) for Linux/390 Device Drivers
+========================================================
General Information
+-------------------
The following chapters describe the I/O related interface routines the
Linux/390 common device support (CDS) provides to allow for device specific
@@ -101,6 +107,7 @@ can be found in the architecture specific C header file
linux/arch/s390/include/asm/irq.h.
Overview of CDS interface concepts
+----------------------------------
Different to other hardware platforms, the ESA/390 architecture doesn't define
interrupt lines managed by a specific interrupt controller and bus systems
@@ -126,7 +133,7 @@ has to call every single device driver registered on this IRQ in order to
determine the device driver owning the device that raised the interrupt.
Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
-For internal use of the common I/O layer, these are still there. However,
+For internal use of the common I/O layer, these are still there. However,
device drivers should use the new calling interface via the ccw_device only.
During its startup the Linux/390 system checks for peripheral devices. Each
@@ -134,7 +141,7 @@ of those devices is uniquely defined by a so called subchannel by the ESA/390
channel subsystem. While the subchannel numbers are system generated, each
subchannel also takes a user defined attribute, the so called device number.
Both subchannel number and device number cannot exceed 65535. During sysfs
-initialisation, the information about control unit type and device types that
+initialisation, the information about control unit type and device types that
imply specific I/O commands (channel command words - CCWs) in order to operate
the device are gathered. Device drivers can retrieve this set of hardware
information during their initialization step to recognize the devices they
@@ -164,18 +171,26 @@ get_ciw() - get command information word
This call enables a device driver to get information about supported commands
from the extended SenseID data.
-struct ciw *
-ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+::
-cdev - The ccw_device for which the command is to be retrieved.
-cmd - The command type to be retrieved.
+ struct ciw *
+ ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+
+==== ========================================================
+cdev The ccw_device for which the command is to be retrieved.
+cmd The command type to be retrieved.
+==== ========================================================
ccw_device_get_ciw() returns:
-NULL - No extended data available, invalid device or command not found.
-!NULL - The command requested.
+===== ================================================================
+ NULL No extended data available, invalid device or command not found.
+!NULL The command requested.
+===== ================================================================
+
+::
-ccw_device_start() - Initiate I/O Request
+ ccw_device_start() - Initiate I/O Request
The ccw_device_start() routines is the I/O request front-end processor. All
device driver I/O requests must be issued using this routine. A device driver
@@ -186,93 +201,105 @@ This description also covers the status information passed to the device
driver's interrupt handler as this is related to the rules (flags) defined
with the associated I/O request when calling ccw_device_start().
-int ccw_device_start(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- unsigned long flags);
-int ccw_device_start_timeout(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- unsigned long flags,
- int expires);
-int ccw_device_start_key(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- __u8 key,
- unsigned long flags);
-int ccw_device_start_key_timeout(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- __u8 key,
- unsigned long flags,
- int expires);
-
-cdev : ccw_device the I/O is destined for
-cpa : logical start address of channel program
-user_intparm : user specific interrupt information; will be presented
- back to the device driver's interrupt handler. Allows a
- device driver to associate the interrupt with a
- particular I/O request.
-lpm : defines the channel path to be used for a specific I/O
- request. A value of 0 will make cio use the opm.
-key : the storage key to use for the I/O (useful for operating on a
- storage with a storage key != default key)
-flag : defines the action to be performed for I/O processing
-expires : timeout value in jiffies. The common I/O layer will terminate
- the running program after this and call the interrupt handler
- with ERR_PTR(-ETIMEDOUT) as irb.
-
-Possible flag values are :
-
-DOIO_ALLOW_SUSPEND - channel program may become suspended
-DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually
- this implies the channel program might
- become modified
-DOIO_SUPPRESS_INTER - don't call the handler on intermediate status
-
-The cpa parameter points to the first format 1 CCW of a channel program :
-
-struct ccw1 {
- __u8 cmd_code;/* command code */
- __u8 flags; /* flags, like IDA addressing, etc. */
- __u16 count; /* byte count */
- __u32 cda; /* data address */
-} __attribute__ ((packed,aligned(8)));
-
-with the following CCW flags values defined :
-
-CCW_FLAG_DC - data chaining
-CCW_FLAG_CC - command chaining
-CCW_FLAG_SLI - suppress incorrect length
-CCW_FLAG_SKIP - skip
-CCW_FLAG_PCI - PCI
-CCW_FLAG_IDA - indirect addressing
-CCW_FLAG_SUSPEND - suspend
+::
+
+ int ccw_device_start(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags);
+ int ccw_device_start_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags,
+ int expires);
+ int ccw_device_start_key(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags);
+ int ccw_device_start_key_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags,
+ int expires);
+
+============= =============================================================
+cdev ccw_device the I/O is destined for
+cpa logical start address of channel program
+user_intparm user specific interrupt information; will be presented
+ back to the device driver's interrupt handler. Allows a
+ device driver to associate the interrupt with a
+ particular I/O request.
+lpm defines the channel path to be used for a specific I/O
+ request. A value of 0 will make cio use the opm.
+key the storage key to use for the I/O (useful for operating on a
+ storage with a storage key != default key)
+flag defines the action to be performed for I/O processing
+expires timeout value in jiffies. The common I/O layer will terminate
+ the running program after this and call the interrupt handler
+ with ERR_PTR(-ETIMEDOUT) as irb.
+============= =============================================================
+
+Possible flag values are:
+
+========================= =============================================
+DOIO_ALLOW_SUSPEND channel program may become suspended
+DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually
+ this implies the channel program might
+ become modified
+DOIO_SUPPRESS_INTER don't call the handler on intermediate status
+========================= =============================================
+
+The cpa parameter points to the first format 1 CCW of a channel program::
+
+ struct ccw1 {
+ __u8 cmd_code;/* command code */
+ __u8 flags; /* flags, like IDA addressing, etc. */
+ __u16 count; /* byte count */
+ __u32 cda; /* data address */
+ } __attribute__ ((packed,aligned(8)));
+
+with the following CCW flags values defined:
+
+=================== =========================
+CCW_FLAG_DC data chaining
+CCW_FLAG_CC command chaining
+CCW_FLAG_SLI suppress incorrect length
+CCW_FLAG_SKIP skip
+CCW_FLAG_PCI PCI
+CCW_FLAG_IDA indirect addressing
+CCW_FLAG_SUSPEND suspend
+=================== =========================
Via ccw_device_set_options(), the device driver may specify the following
options for the device:
-DOIO_EARLY_NOTIFICATION - allow for early interrupt notification
-DOIO_REPORT_ALL - report all interrupt conditions
+========================= ======================================
+DOIO_EARLY_NOTIFICATION allow for early interrupt notification
+DOIO_REPORT_ALL report all interrupt conditions
+========================= ======================================
-The ccw_device_start() function returns :
+The ccw_device_start() function returns:
- 0 - successful completion or request successfully initiated
--EBUSY - The device is currently processing a previous I/O request, or there is
- a status pending at the device.
--ENODEV - cdev is invalid, the device is not operational or the ccw_device is
- not online.
+======== ======================================================================
+ 0 successful completion or request successfully initiated
+ -EBUSY The device is currently processing a previous I/O request, or there is
+ a status pending at the device.
+-ENODEV cdev is invalid, the device is not operational or the ccw_device is
+ not online.
+======== ======================================================================
When the I/O request completes, the CDS first level interrupt handler will
accumulate the status in a struct irb and then call the device interrupt handler.
-The intparm field will contain the value the device driver has associated with a
-particular I/O request. If a pending device status was recognized,
+The intparm field will contain the value the device driver has associated with a
+particular I/O request. If a pending device status was recognized,
intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
by an alert status notification. In any case this status is not related to the
current (last) I/O request. In case of a delayed status notification no special
@@ -282,9 +309,11 @@ never started, even though ccw_device_start() returned with successful completio
The irb may contain an error value, and the device driver should check for this
first:
--ETIMEDOUT: the common I/O layer terminated the request after the specified
- timeout value
--EIO: the common I/O layer terminated the request due to an error state
+========== =================================================================
+-ETIMEDOUT the common I/O layer terminated the request after the specified
+ timeout value
+-EIO the common I/O layer terminated the request due to an error state
+========== =================================================================
If the concurrent sense flag in the extended status word (esw) in the irb is
set, the field erw.scnt in the esw describes the number of device specific
@@ -294,6 +323,7 @@ sensing by the device driver itself is required.
The device interrupt handler can use the following definitions to investigate
the primary unit check source coded in sense byte 0 :
+======================= ====
SNS0_CMD_REJECT 0x80
SNS0_INTERVENTION_REQ 0x40
SNS0_BUS_OUT_CHECK 0x20
@@ -301,36 +331,41 @@ SNS0_EQUIPMENT_CHECK 0x10
SNS0_DATA_CHECK 0x08
SNS0_OVERRUN 0x04
SNS0_INCOMPL_DOMAIN 0x01
+======================= ====
Depending on the device status, multiple of those values may be set together.
Please refer to the device specific documentation for details.
The irb->scsw.cstat field provides the (accumulated) subchannel status :
-SCHN_STAT_PCI - program controlled interrupt
-SCHN_STAT_INCORR_LEN - incorrect length
-SCHN_STAT_PROG_CHECK - program check
-SCHN_STAT_PROT_CHECK - protection check
-SCHN_STAT_CHN_DATA_CHK - channel data check
-SCHN_STAT_CHN_CTRL_CHK - channel control check
-SCHN_STAT_INTF_CTRL_CHK - interface control check
-SCHN_STAT_CHAIN_CHECK - chaining check
+========================= ============================
+SCHN_STAT_PCI program controlled interrupt
+SCHN_STAT_INCORR_LEN incorrect length
+SCHN_STAT_PROG_CHECK program check
+SCHN_STAT_PROT_CHECK protection check
+SCHN_STAT_CHN_DATA_CHK channel data check
+SCHN_STAT_CHN_CTRL_CHK channel control check
+SCHN_STAT_INTF_CTRL_CHK interface control check
+SCHN_STAT_CHAIN_CHECK chaining check
+========================= ============================
The irb->scsw.dstat field provides the (accumulated) device status :
-DEV_STAT_ATTENTION - attention
-DEV_STAT_STAT_MOD - status modifier
-DEV_STAT_CU_END - control unit end
-DEV_STAT_BUSY - busy
-DEV_STAT_CHN_END - channel end
-DEV_STAT_DEV_END - device end
-DEV_STAT_UNIT_CHECK - unit check
-DEV_STAT_UNIT_EXCEP - unit exception
+===================== =================
+DEV_STAT_ATTENTION attention
+DEV_STAT_STAT_MOD status modifier
+DEV_STAT_CU_END control unit end
+DEV_STAT_BUSY busy
+DEV_STAT_CHN_END channel end
+DEV_STAT_DEV_END device end
+DEV_STAT_UNIT_CHECK unit check
+DEV_STAT_UNIT_EXCEP unit exception
+===================== =================
Please see the ESA/390 Principles of Operation manual for details on the
individual flag meanings.
-Usage Notes :
+Usage Notes:
ccw_device_start() must be called disabled and with the ccw device lock held.
@@ -374,32 +409,39 @@ secondary status without error (alert status) is presented, this indicates
successful completion for all overlapping ccw_device_start() requests that have
been issued since the last secondary (final) status.
-Channel programs that intend to set the suspend flag on a channel command word
-(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
-suspend flag will cause a channel program check. At the time the channel program
-becomes suspended an intermediate interrupt will be generated by the channel
+Channel programs that intend to set the suspend flag on a channel command word
+(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
+suspend flag will cause a channel program check. At the time the channel program
+becomes suspended an intermediate interrupt will be generated by the channel
subsystem.
-ccw_device_resume() - Resume Channel Program Execution
+ccw_device_resume() - Resume Channel Program Execution
-If a device driver chooses to suspend the current channel program execution by
-setting the CCW suspend flag on a particular CCW, the channel program execution
-is suspended. In order to resume channel program execution the CIO layer
-provides the ccw_device_resume() routine.
+If a device driver chooses to suspend the current channel program execution by
+setting the CCW suspend flag on a particular CCW, the channel program execution
+is suspended. In order to resume channel program execution the CIO layer
+provides the ccw_device_resume() routine.
-int ccw_device_resume(struct ccw_device *cdev);
+::
-cdev - ccw_device the resume operation is requested for
+ int ccw_device_resume(struct ccw_device *cdev);
+
+==== ================================================
+cdev ccw_device the resume operation is requested for
+==== ================================================
The ccw_device_resume() function returns:
- 0 - suspended channel program is resumed
--EBUSY - status pending
--ENODEV - cdev invalid or not-operational subchannel
--EINVAL - resume function not applicable
--ENOTCONN - there is no I/O request pending for completion
+========= ==============================================
+ 0 suspended channel program is resumed
+ -EBUSY status pending
+ -ENODEV cdev invalid or not-operational subchannel
+ -EINVAL resume function not applicable
+-ENOTCONN there is no I/O request pending for completion
+========= ==============================================
Usage Notes:
+
Please have a look at the ccw_device_start() usage notes for more details on
suspended channel programs.
@@ -412,22 +454,28 @@ command is provided.
ccw_device_halt() must be called disabled and with the ccw device lock held.
-int ccw_device_halt(struct ccw_device *cdev,
- unsigned long intparm);
+::
+
+ int ccw_device_halt(struct ccw_device *cdev,
+ unsigned long intparm);
-cdev : ccw_device the halt operation is requested for
-intparm : interruption parameter; value is only used if no I/O
- is outstanding, otherwise the intparm associated with
- the I/O request is returned
+======= =====================================================
+cdev ccw_device the halt operation is requested for
+intparm interruption parameter; value is only used if no I/O
+ is outstanding, otherwise the intparm associated with
+ the I/O request is returned
+======= =====================================================
-The ccw_device_halt() function returns :
+The ccw_device_halt() function returns:
- 0 - request successfully initiated
--EBUSY - the device is currently busy, or status pending.
--ENODEV - cdev invalid.
--EINVAL - The device is not operational or the ccw device is not online.
+======= ==============================================================
+ 0 request successfully initiated
+-EBUSY the device is currently busy, or status pending.
+-ENODEV cdev invalid.
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
-Usage Notes :
+Usage Notes:
A device driver may write a never-ending channel program by writing a channel
program that at its end loops back to its beginning by means of a transfer in
@@ -438,25 +486,34 @@ can then perform an appropriate action. Prior to interrupt of an outstanding
read to a network device (with or without PCI flag) a ccw_device_halt()
is required to end the pending operation.
-ccw_device_clear() - Terminage I/O Request Processing
+::
+
+ ccw_device_clear() - Terminage I/O Request Processing
In order to terminate all I/O processing at the subchannel, the clear subchannel
(CSCH) command is used. It can be issued via ccw_device_clear().
ccw_device_clear() must be called disabled and with the ccw device lock held.
-int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
+::
+
+ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
-cdev: ccw_device the clear operation is requested for
-intparm: interruption parameter (see ccw_device_halt())
+======= ===============================================
+cdev ccw_device the clear operation is requested for
+intparm interruption parameter (see ccw_device_halt())
+======= ===============================================
The ccw_device_clear() function returns:
- 0 - request successfully initiated
--ENODEV - cdev invalid
--EINVAL - The device is not operational or the ccw device is not online.
+======= ==============================================================
+ 0 request successfully initiated
+-ENODEV cdev invalid
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
Miscellaneous Support Routines
+------------------------------
This chapter describes various routines to be used in a Linux/390 device
driver programming environment.
@@ -466,7 +523,8 @@ get_ccwdev_lock()
Get the address of the device specific lock. This is then used in
spin_lock() / spin_unlock() calls.
+::
-__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
+ __u8 ccw_device_get_path_mask(struct ccw_device *cdev);
Get the mask of the path currently available for cdev.
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/common_io.rst
index 6e0f63f343b4..846485681ce7 100644
--- a/Documentation/s390/CommonIO
+++ b/Documentation/s390/common_io.rst
@@ -1,5 +1,9 @@
-S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
-============================================================================
+======================
+S/390 common I/O-Layer
+======================
+
+command line parameters, procfs and debugfs entries
+===================================================
Command line parameters
-----------------------
@@ -13,7 +17,7 @@ Command line parameters
device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
The given devices will be ignored by the common I/O-layer; no detection
- and device sensing will be done on any of those devices. The subchannel to
+ and device sensing will be done on any of those devices. The subchannel to
which the device in question is attached will be treated as if no device was
attached.
@@ -28,14 +32,20 @@ Command line parameters
keywords can be used to refer to the CCW based boot device and CCW console
device respectively (these are probably useful only when combined with the '!'
operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
- The command line is parsed from left to right.
+ The command line
+ is parsed from left to right.
+
+ For example::
- For example,
cio_ignore=0.0.0023-0.0.0042,0.0.4711
+
will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
0.0.4711, if detected.
- As another example,
+
+ As another example::
+
cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
+
will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
By default, no devices are ignored.
@@ -48,40 +58,45 @@ Command line parameters
Lists the ranges of devices (by bus id) which are ignored by common I/O.
- You can un-ignore certain or all devices by piping to /proc/cio_ignore.
- "free all" will un-ignore all ignored devices,
+ You can un-ignore certain or all devices by piping to /proc/cio_ignore.
+ "free all" will un-ignore all ignored devices,
"free <device range>, <device range>, ..." will un-ignore the specified
devices.
For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
+
- echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
- echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
0.0.0041;
- - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
+ - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
devices.
- When a device is un-ignored, device recognition and sensing is performed and
+ When a device is un-ignored, device recognition and sensing is performed and
the device driver will be notified if possible, so the device will become
available to the system. Note that un-ignoring is performed asynchronously.
- You can also add ranges of devices to be ignored by piping to
+ You can also add ranges of devices to be ignored by piping to
/proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
specified devices.
Note: While already known devices can be added to the list of devices to be
- ignored, there will be no effect on then. However, if such a device
+ ignored, there will be no effect on then. However, if such a device
disappears and then reappears, it will then be ignored. To make
known devices go away, you need the "purge" command (see below).
- For example,
+ For example::
+
"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
+
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
devices.
- You can remove already known but now ignored devices via
+ You can remove already known but now ignored devices via::
+
"echo purge > /proc/cio_ignore"
+
All devices ignored but still registered and not online (= not in use)
will be deregistered and thus removed from the system.
@@ -115,11 +130,11 @@ debugfs entries
Various debug messages from the common I/O-layer.
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
- Logs the calling of functions in the common I/O-layer and, if applicable,
+ Logs the calling of functions in the common I/O-layer and, if applicable,
which subchannel they were called for, as well as dumps of some data
structures (like irb in an error case).
- The level of logging can be changed to be more or less verbose by piping to
+ The level of logging can be changed to be more or less verbose by piping to
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
- documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
+ documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
for details.
diff --git a/Documentation/s390/DASD b/Documentation/s390/dasd.rst
index 9963f1e9c98a..9e22247285c8 100644
--- a/Documentation/s390/DASD
+++ b/Documentation/s390/dasd.rst
@@ -1,4 +1,6 @@
+==================
DASD device driver
+==================
S/390's disk devices (DASDs) are managed by Linux via the DASD device
driver. It is valid for all types of DASDs and represents them to
@@ -14,14 +16,14 @@ parameters are to be given in hexadecimal notation without a leading
If you supply kernel parameters the different instances are processed
in order of appearance and a minor number is reserved for any device
covered by the supplied range up to 64 volumes. Additional DASDs are
-ignored. If you do not supply the 'dasd=' kernel parameter at all, the
+ignored. If you do not supply the 'dasd=' kernel parameter at all, the
DASD driver registers all supported DASDs of your system to a minor
number in ascending order of the subchannel number.
The driver currently supports ECKD-devices and there are stubs for
support of the FBA and CKD architectures. For the FBA architecture
only some smart data structures are missing to make the support
-complete.
+complete.
We performed our testing on 3380 and 3390 type disks of different
sizes, under VM and on the bare hardware (LPAR), using internal disks
of the multiprise as well as a RAMAC virtual array. Disks exported by
@@ -34,19 +36,22 @@ accessibility of the DASD from other OSs. In a later stage we will
provide support of partitions, maybe VTOC oriented or using a kind of
partition table in the label record.
-USAGE
+Usage
+=====
-Low-level format (?CKD only)
For using an ECKD-DASD as a Linux harddisk you have to low-level
format the tracks by issuing the BLKDASDFORMAT-ioctl on that
device. This will erase any data on that volume including IBM volume
-labels, VTOCs etc. The ioctl may take a 'struct format_data *' or
-'NULL' as an argument.
-typedef struct {
+labels, VTOCs etc. The ioctl may take a `struct format_data *` or
+'NULL' as an argument::
+
+ typedef struct {
int start_unit;
int stop_unit;
int blksize;
-} format_data_t;
+ } format_data_t;
+
When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
and stop_unit are the first and last track to be formatted. If
@@ -56,17 +61,23 @@ up to the last track. blksize can be any power of two between 512 and
1kB blocks anyway and you gain approx. 50% of capacity increasing your
blksize from 512 byte to 1kB.
--Make a filesystem
+Make a filesystem
+=================
+
Then you can mk??fs the filesystem of your choice on that volume or
partition. For reasons of sanity you should build your filesystem on
-the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
+the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
but may be sure that you can reuse your data after introduction of a
real partition table.
-BUGS:
+Bugs
+====
+
- Performance sometimes is rather low because we don't fully exploit clustering
-TODO-List:
+TODO-List
+=========
+
- Add IBM'S Disk layout to genhd
- Enhance driver to use more than one major number
- Enable usage as a module
diff --git a/Documentation/s390/debugging390.rst b/Documentation/s390/debugging390.rst
new file mode 100644
index 000000000000..d49305fd5e1a
--- /dev/null
+++ b/Documentation/s390/debugging390.rst
@@ -0,0 +1,2613 @@
+=============================================
+Debugging on Linux for s/390 & z/Architecture
+=============================================
+
+Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+
+Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
+
+.. Best viewed with fixed width fonts
+
+Overview of Document:
+=====================
+This document is intended to give a good overview of how to debug Linux for
+s/390 and z/Architecture. It is not intended as a complete reference and not a
+tutorial on the fundamentals of C & assembly. It doesn't go into
+390 IO in any detail. It is intended to complement the documents in the
+reference section below & any other worthwhile references you get.
+
+It is intended like the Enterprise Systems Architecture/390 Reference Summary
+to be printed out & used as a quick cheat sheet self help style reference when
+problems occur.
+
+.. Contents
+ ========
+ Register Set
+ Address Spaces on Intel Linux
+ Address Spaces on Linux for s/390 & z/Architecture
+ The Linux for s/390 & z/Architecture Kernel Task Structure
+ Register Usage & Stackframes on Linux for s/390 & z/Architecture
+ A sample program with comments
+ Compiling programs for debugging on Linux for s/390 & z/Architecture
+ Debugging under VM
+ s/390 & z/Architecture IO Overview
+ Debugging IO on s/390 & z/Architecture under VM
+ GDB on s/390 & z/Architecture
+ Stack chaining in gdb by hand
+ Examining core dumps
+ ldd
+ Debugging modules
+ The proc file system
+ SysRq
+ References
+ Special Thanks
+
+Register Set
+============
+The current architectures have the following registers.
+
+16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
+r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
+
+16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
+kernel usage only, used for memory management, interrupt control, debugging
+control etc.
+
+16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
+normally not used by normal programs but potentially could be used as
+temporary storage. These registers have a 1:1 association with general
+purpose registers and are designed to be used in the so-called access
+register mode to select different address spaces.
+Access register 0 (and access register 1 on z/Architecture, which needs a
+64 bit pointer) is currently used by the pthread library as a pointer to
+the current running threads private area.
+
+16 64-bit floating point registers (fp0-fp15 ) IEEE & HFP floating
+point format compliant on G5 upwards & a Floating point control reg (FPC)
+
+4 64-bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
+
+Note:
+ Linux (currently) always uses IEEE & emulates G5 IEEE format on older
+ machines, ( provided the kernel is configured for this ).
+
+
+The PSW is the most important register on the machine it
+is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
+a program counter (pc), condition code register,memory space designator.
+In IBM standard notation I am counting bit 0 as the MSB.
+It has several advantages over a normal program counter
+in that you can change address translation & program counter
+in a single instruction. To change address translation,
+e.g. switching address translation off requires that you
+have a logical=physical mapping for the address you are
+currently running at.
+
++-------------------------+-------------------------------------------------+
+| Bit | |
++--------+----------------+ Value |
+| s/390 | z/Architecture | |
++========+================+=================================================+
+| 0 | 0 | Reserved (must be 0) otherwise specification |
+| | | exception occurs. |
++--------+----------------+-------------------------------------------------+
+| 1 | 1 | Program Event Recording 1 PER enabled, |
+| | | PER is used to facilitate debugging e.g. |
+| | | single stepping. |
++--------+----------------+-------------------------------------------------+
+| 2-4 | 2-4 | Reserved (must be 0). |
++--------+----------------+-------------------------------------------------+
+| 5 | 5 | Dynamic address translation 1=DAT on. |
++--------+----------------+-------------------------------------------------+
+| 6 | 6 | Input/Output interrupt Mask |
++--------+----------------+-------------------------------------------------+
+| 7 | 7 | External interrupt Mask used primarily for |
+| | | interprocessor signalling and clock interrupts. |
++--------+----------------+-------------------------------------------------+
+| 8-11 | 8-11 | PSW Key used for complex memory protection |
+| | | mechanism (not used under linux) |
++--------+----------------+-------------------------------------------------+
+| 12 | 12 | 1 on s/390 0 on z/Architecture |
++--------+----------------+-------------------------------------------------+
+| 13 | 13 | Machine Check Mask 1=enable machine check |
+| | | interrupts |
++--------+----------------+-------------------------------------------------+
+| 14 | 14 | Wait State. Set this to 1 to stop the processor |
+| | | except for interrupts and give time to other |
+| | | LPARS. Used in CPU idle in the kernel to |
+| | | increase overall usage of processor resources. |
++--------+----------------+-------------------------------------------------+
+| 15 | 15 | Problem state (if set to 1 certain instructions |
+| | | are disabled). All linux user programs run with |
+| | | this bit 1 (useful info for debugging under VM).|
++--------+----------------+-------------------------------------------------+
+| 16-17 | 16-17 | Address Space Control |
+| | | |
+| | | 00 Primary Space Mode: |
+| | | |
+| | | The register CR1 contains the primary |
+| | | address-space control element (PASCE), which |
+| | | points to the primary space region/segment |
+| | | table origin. |
+| | | |
+| | | 01 Access register mode |
+| | | |
+| | | 10 Secondary Space Mode: |
+| | | |
+| | | The register CR7 contains the secondary |
+| | | address-space control element (SASCE), which |
+| | | points to the secondary space region or |
+| | | segment table origin. |
+| | | |
+| | | 11 Home Space Mode: |
+| | | |
+| | | The register CR13 contains the home space |
+| | | address-space control element (HASCE), which |
+| | | points to the home space region/segment |
+| | | table origin. |
+| | | |
+| | | See "Address Spaces on Linux for s/390 & |
+| | | z/Architecture" below for more information |
+| | | about address space usage in Linux. |
++--------+----------------+-------------------------------------------------+
+| 18-19 | 18-19 | Condition codes (CC) |
++--------+----------------+-------------------------------------------------+
+| 20 | 20 | Fixed point overflow mask if 1=FPU exceptions |
+| | | for this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 21 | 21 | Decimal overflow mask if 1=FPU exceptions for |
+| | | this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 22 | 22 | Exponent underflow mask if 1=FPU exceptions |
+| | | for this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 23 | 23 | Significance Mask if 1=FPU exceptions for this |
+| | | event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 24-31 | 24-30 | Reserved Must be 0. |
+| +----------------+-------------------------------------------------+
+| | 31 | Extended Addressing Mode |
+| +----------------+-------------------------------------------------+
+| | 32 | Basic Addressing Mode |
+| | | |
+| | | Used to set addressing mode |
+| | | |
+| | | +---------+----------+----------+ |
+| | | | PSW 31 | PSW 32 | | |
+| | | +---------+----------+----------+ |
+| | | | 0 | 0 | 24 bit | |
+| | | +---------+----------+----------+ |
+| | | | 0 | 1 | 31 bit | |
+| | | +---------+----------+----------+ |
+| | | | 1 | 1 | 64 bit | |
+| | | +---------+----------+----------+ |
++--------+----------------+-------------------------------------------------+
+| 32 | | 1=31 bit addressing mode 0=24 bit addressing |
+| | | mode (for backward compatibility), linux |
+| | | always runs with this bit set to 1 |
++--------+----------------+-------------------------------------------------+
+| 33-64 | | Instruction address. |
+| +----------------+-------------------------------------------------+
+| | 33-63 | Reserved must be 0 |
+| +----------------+-------------------------------------------------+
+| | 64-127 | Address |
+| | | |
+| | | - In 24 bits mode bits 64-103=0 bits 104-127 |
+| | | Address |
+| | | - In 31 bits mode bits 64-96=0 bits 97-127 |
+| | | Address |
+| | | |
+| | | Note: |
+| | | unlike 31 bit mode on s/390 bit 96 must be |
+| | | zero when loading the address with LPSWE |
+| | | otherwise a specification exception occurs, |
+| | | LPSW is fully backward compatible. |
++--------+----------------+-------------------------------------------------+
+
+Prefix Page(s)
+--------------
+This per cpu memory area is too intimately tied to the processor not to mention.
+It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
+z/Architecture and is exchanged with one page on s/390 or two pages on
+z/Architecture in absolute storage by the set prefix instruction during Linux
+startup.
+
+This page is mapped to a different prefix for each processor in an SMP
+configuration (assuming the OS designer is sane of course).
+
+Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
+z/Architecture are used by the processor itself for holding such information
+as exception indications and entry points for exceptions.
+
+Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
+z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
+0x1000, too, which is used by Linux).
+
+The closest thing to this on traditional architectures is the interrupt
+vector table. This is a good thing & does simplify some of the kernel coding
+however it means that we now cannot catch stray NULL pointers in the
+kernel without hard coded checks.
+
+
+
+Address Spaces on Intel Linux
+=============================
+
+The traditional Intel Linux is approximately mapped as follows forgive
+the ascii art::
+
+ 0xFFFFFFFF 4GB Himem *****************
+ * *
+ * Kernel Space *
+ * *
+ ***************** ****************
+ User Space Himem * User Stack * * *
+ (typically 0xC0000000 3GB ) ***************** * *
+ * Shared Libs * * Next Process *
+ ***************** * to *
+ * * <== * Run * <==
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+ 0x00000000 ***************** ****************
+
+Now it is easy to see that on Intel it is quite easy to recognise a kernel
+address as being one greater than user space himem (in this case 0xC0000000),
+and addresses of less than this are the ones in the current running program on
+this processor (if an smp box).
+
+If using the virtual machine ( VM ) as a debugger it is quite difficult to
+know which user process is running as the address space you are looking at
+could be from any process in the run queue.
+
+The limitation of Intels addressing technique is that the linux
+kernel uses a very simple real address to virtual addressing technique
+of Real Address=Virtual Address-User Space Himem.
+This means that on Intel the kernel linux can typically only address
+Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
+can typically use.
+
+They can lower User Himem to 2GB or lower & thus be
+able to use 2GB of RAM however this shrinks the maximum size
+of User Space from 3GB to 2GB they have a no win limit of 4GB unless
+they go to 64 Bit.
+
+
+On 390 our limitations & strengths make us slightly different.
+For backward compatibility we are only allowed use 31 bits (2GB)
+of our 32 bit addresses, however, we use entirely separate address
+spaces for the user & kernel.
+
+This means we can support 2GB of non Extended RAM on s/390, & more
+with the Extended memory management swap device &
+currently 4TB of physical memory currently on z/Architecture.
+
+
+Address Spaces on Linux for s/390 & z/Architecture
+==================================================
+
+Our addressing scheme is basically as follows::
+
+ Primary Space Home Space
+ Himem 0x7fffffff 2GB on s/390 ***************** ****************
+ currently 0x3ffffffffff (2^42)-1 * User Stack * * *
+ on z/Architecture. ***************** * *
+ * Shared Libs * * *
+ ***************** * *
+ * * * Kernel *
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+ 0x00000000 ***************** ****************
+
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+
+- runs in home space mode
+- cr1 contains the user or kernel asce
+
+ - the kernel asce is loaded when a uaccess requires primary or
+ secondary address mode
+
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+
+- primary space mode in case of a uaccess (copy_to_user) and uses
+ e.g. the mvcp instruction to access user space. However the kernel
+ will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+ instructions come from primary address space and data from secondary
+ space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
+
+Virtual Addresses on s/390 & z/Architecture
+===========================================
+
+A virtual address on s/390 is made up of 3 parts
+The SX (segment index, roughly corresponding to the PGD & PMD in Linux
+terminology) being bits 1-11.
+
+The PX (page index, corresponding to the page table entry (pte) in Linux
+terminology) being bits 12-19.
+
+The remaining bits BX (the byte index are the offset in the page )
+i.e. bits 20 to 31.
+
+On z/Architecture in linux we currently make up an address from 4 parts.
+
+- The region index bits (RX) 0-32 we currently use bits 22-32
+- The segment index (SX) being bits 33-43
+- The page index (PX) being bits 44-51
+- The byte index (BX) being bits 52-63
+
+Notes:
+ 1) s/390 has no PMD so the PMD is really the PGD also.
+ A lot of this stuff is defined in pgtable.h.
+
+ 2) Also seeing as s/390's page indexes are only 1k in size
+ (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
+ to make the best use of memory by updating 4 segment indices
+ entries each time we mess with a PMD & use offsets
+ 0,1024,2048 & 3072 in this page as for our segment indexes.
+ On z/Architecture our page indexes are now 2k in size
+ ( bits 12-19 x 8 bytes per pte ) we do a similar trick
+ but only mess with 2 segment indices each time we mess with
+ a PMD.
+
+ 3) As z/Architecture supports up to a massive 5-level page table lookup we
+ can only use 3 currently on Linux ( as this is all the generic kernel
+ currently supports ) however this may change in future
+ this allows us to access ( according to my sums )
+ 4TB of virtual storage per process i.e.
+ 4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
+ enough for another 2 or 3 of years I think :-).
+ to do this we use a region-third-table designation type in
+ our address space control registers.
+
+
+The Linux for s/390 & z/Architecture Kernel Task Structure
+==========================================================
+Each process/thread under Linux for S390 has its own kernel task_struct
+defined in linux/include/linux/sched.h
+The S390 on initialisation & resuming of a process on a cpu sets
+the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
+(which we use for per-processor globals).
+
+The kernel stack pointer is intimately tied with the task structure for
+each processor as follows::
+
+ s/390
+ ************************
+ * 1 page kernel stack *
+ * ( 4K ) *
+ ************************
+ * 1 page task_struct *
+ * ( 4K ) *
+ 8K aligned ************************
+
+ z/Architecture
+ ************************
+ * 2 page kernel stack *
+ * ( 8K ) *
+ ************************
+ * 2 page task_struct *
+ * ( 8K ) *
+ 16K aligned ************************
+
+What this means is that we don't need to dedicate any register or global
+variable to point to the current running process & can retrieve it with the
+following very simple construct for s/390 & one very similar for
+z/Architecture::
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+ __asm__("lhi %0,-8192\n\t"
+ "nr %0,15"
+ : "=r" (current) );
+ return current;
+ }
+
+i.e. just anding the current kernel stack pointer with the mask -8192.
+Thankfully because Linux doesn't have support for nested IO interrupts
+& our devices have large buffers can survive interrupts being shut for
+short amounts of time we don't need a separate stack for interrupts.
+
+
+
+
+Register Usage & Stackframes on Linux for s/390 & z/Architecture
+=================================================================
+Overview:
+---------
+This is the code that gcc produces at the top & the bottom of
+each function. It usually is fairly consistent & similar from
+function to function & if you know its layout you can probably
+make some headway in finding the ultimate cause of a problem
+after a crash without a source level debugger.
+
+Note: To follow stackframes requires a knowledge of C or Pascal &
+limited knowledge of one assembly language.
+
+It should be noted that there are some differences between the
+s/390 and z/Architecture stack layouts as the z/Architecture stack layout
+didn't have to maintain compatibility with older linkage formats.
+
+Glossary:
+---------
+alloca:
+ This is a built in compiler function for runtime allocation
+ of extra space on the callers stack which is obviously freed
+ up on function exit ( e.g. the caller may choose to allocate nothing
+ of a buffer of 4k if required for temporary purposes ), it generates
+ very efficient code ( a few cycles ) when compared to alternatives
+ like malloc.
+
+automatics:
+ These are local variables on the stack, i.e they aren't in registers &
+ they aren't static.
+
+back-chain:
+ This is a pointer to the stack pointer before entering a
+ framed functions ( see frameless function ) prologue got by
+ dereferencing the address of the current stack pointer,
+ i.e. got by accessing the 32 bit value at the stack pointers
+ current location.
+
+base-pointer:
+ This is a pointer to the back of the literal pool which
+ is an area just behind each procedure used to store constants
+ in each function.
+
+call-clobbered:
+ The caller probably needs to save these registers if there
+ is something of value in them, on the stack or elsewhere before making a
+ call to another procedure so that it can restore it later.
+
+epilogue:
+ The code generated by the compiler to return to the caller.
+
+frameless-function:
+ A frameless function in Linux for s390 & z/Architecture is one which doesn't
+ need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
+ given to it by the caller.
+
+ A frameless function never:
+
+ 1) Sets up a back chain.
+ 2) Calls alloca.
+ 3) Calls other normal functions
+ 4) Has automatics.
+
+GOT-pointer:
+ This is a pointer to the global-offset-table in ELF
+ ( Executable Linkable Format, Linux'es most common executable format ),
+ all globals & shared library objects are found using this pointer.
+
+lazy-binding
+ ELF shared libraries are typically only loaded when routines in the shared
+ library are actually first called at runtime. This is lazy binding.
+
+procedure-linkage-table
+ This is a table found from the GOT which contains pointers to routines
+ in other shared libraries which can't be called to by easier means.
+
+prologue:
+ The code generated by the compiler to set up the stack frame.
+
+outgoing-args:
+ This is extra area allocated on the stack of the calling function if the
+ parameters for the callee's cannot all be put in registers, the same
+ area can be reused by each function the caller calls.
+
+routine-descriptor:
+ A COFF executable format based concept of a procedure reference
+ actually being 8 bytes or more as opposed to a simple pointer to the routine.
+ This is typically defined as follows:
+
+ - Routine Descriptor offset 0=Pointer to Function
+ - Routine Descriptor offset 4=Pointer to Table of Contents
+
+ The table of contents/TOC is roughly equivalent to a GOT pointer.
+ & it means that shared libraries etc. can be shared between several
+ environments each with their own TOC.
+
+static-chain:
+ This is used in nested functions a concept adopted from pascal
+ by gcc not used in ansi C or C++ ( although quite useful ), basically it
+ is a pointer used to reference local variables of enclosing functions.
+ You might come across this stuff once or twice in your lifetime.
+
+ e.g.
+
+ The function below should return 11 though gcc may get upset & toss warnings
+ about unused variables::
+
+ int FunctionA(int a)
+ {
+ int b;
+ FunctionC(int c)
+ {
+ b=c+1;
+ }
+ FunctionC(10);
+ return(b);
+ }
+
+
+s/390 & z/Architecture Register usage
+=====================================
+
+======== ========================================== ===============
+r0 used by syscalls/assembly call-clobbered
+r1 used by syscalls/assembly call-clobbered
+r2 argument 0 / return value 0 call-clobbered
+r3 argument 1 / return value 1 (if long long) call-clobbered
+r4 argument 2 call-clobbered
+r5 argument 3 call-clobbered
+r6 argument 4 saved
+r7 pointer-to arguments 5 to ... saved
+r8 this & that saved
+r9 this & that saved
+r10 static-chain ( if nested function ) saved
+r11 frame-pointer ( if function used alloca ) saved
+r12 got-pointer saved
+r13 base-pointer saved
+r14 return-address saved
+r15 stack-pointer saved
+
+f0 argument 0 / return value ( float/double ) call-clobbered
+f2 argument 1 call-clobbered
+f4 z/Architecture argument 2 saved
+f6 z/Architecture argument 3 saved
+======== ========================================== ===============
+
+The remaining floating points
+f1,f3,f5 f7-f15 are call-clobbered.
+
+Notes:
+------
+1) The only requirement is that registers which are used
+ by the callee are saved, e.g. the compiler is perfectly
+ capable of using r11 for purposes other than a frame a
+ frame pointer if a frame pointer is not needed.
+2) In functions with variable arguments e.g. printf the calling procedure
+ is identical to one without variable arguments & the same number of
+ parameters. However, the prologue of this function is somewhat more
+ hairy owing to it having to move these parameters to the stack to
+ get va_start, va_arg & va_end to work.
+3) Access registers are currently unused by gcc but are used in
+ the kernel. Possibilities exist to use them at the moment for
+ temporary storage but it isn't recommended.
+4) Only 4 of the floating point registers are used for
+ parameter passing as older machines such as G3 only have only 4
+ & it keeps the stack frame compatible with other compilers.
+ However with IEEE floating point emulation under linux on the
+ older machines you are free to use the other 12.
+5) A long long or double parameter cannot be have the
+ first 4 bytes in a register & the second four bytes in the
+ outgoing args area. It must be purely in the outgoing args
+ area if crossing this boundary.
+6) Floating point parameters are mixed with outgoing args
+ on the outgoing args area in the order the are passed in as parameters.
+7) Floating point arguments 2 & 3 are saved in the outgoing args area for
+ z/Architecture
+
+
+Stack Frame Layout
+------------------
+
+========= ============== ======================================================
+s/390 z/Architecture
+========= ============== ======================================================
+0 0 back chain ( a 0 here signifies end of back chain )
+4 8 eos ( end of stack, not used on Linux for S390 used
+ in other linkage formats )
+8 16 glue used in other s/390 linkage formats for saved
+ routine descriptors etc.
+12 24 glue used in other s/390 linkage formats for saved
+ routine descriptors etc.
+16 32 scratch area
+20 40 scratch area
+24 48 saved r6 of caller function
+28 56 saved r7 of caller function
+32 64 saved r8 of caller function
+36 72 saved r9 of caller function
+40 80 saved r10 of caller function
+44 88 saved r11 of caller function
+48 96 saved r12 of caller function
+52 104 saved r13 of caller function
+56 112 saved r14 of caller function
+60 120 saved r15 of caller function
+64 128 saved f4 of caller function
+72 132 saved f6 of caller function
+80 undefined
+96 160 outgoing args passed from caller to callee
+96+x 160+x possible stack alignment ( 8 bytes desirable )
+96+x+y 160+x+y alloca space of caller ( if used )
+96+x+y+z 160+x+y+z automatics of caller ( if used )
+0 back-chain
+========= ============== ======================================================
+
+A sample program with comments.
+===============================
+
+Comments on the function test
+-----------------------------
+1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
+ used ( :-( ).
+2) This is a frameless function & no stack is bought.
+3) The compiler was clever enough to recognise that it could return the
+ value in r2 as well as use it for the passed in parameter ( :-) ).
+4) The basr ( branch relative & save ) trick works as follows the instruction
+ has a special case with r0,r0 with some instruction operands is understood as
+ the literal value 0, some risc architectures also do this ). So now
+ we are branching to the next address & the address new program counter is
+ in r13,so now we subtract the size of the function prologue we have executed
+ the size of the literal pool to get to the top of the literal pool::
+
+
+ 0040037c int test(int b)
+ { # Function prologue below
+ 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
+ 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
+ 400382: a7 da ff fa ahi %r13,-6 # basr trick
+ return(5+b);
+ # Huge main program
+ 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
+
+ # Function epilogue below
+ 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
+ 40038e: 07 fe br %r14 # return
+ }
+
+Comments on the function main
+-----------------------------
+1) The compiler did this function optimally ( 8-) )::
+
+ Literal pool for main.
+ 400390: ff ff ff ec .long 0xffffffec
+ main(int argc,char *argv[])
+ { # Function prologue below
+ 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
+ 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
+ 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
+ 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
+ 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
+ 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
+
+ return(test(5)); # Main Program Below
+ 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
+ # literal pool
+ 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
+ 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
+ # address using branch & save instruction.
+
+ # Function Epilogue below
+ 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
+ 4003b8: 07 fe br %r14 # return to do program exit
+ }
+
+
+Compiler updates
+----------------
+
+::
+
+ main(int argc,char *argv[])
+ {
+ 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
+ 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
+ 400504: 00 40 04 f4 .long 0x004004f4
+ # compiler now puts constant pool in code to so it saves an instruction
+ 400508: 18 0f lr %r0,%r15
+ 40050a: a7 fa ff a0 ahi %r15,-96
+ 40050e: 50 00 f0 00 st %r0,0(%r15)
+ return(test(5));
+ 400512: 58 10 d0 00 l %r1,0(%r13)
+ 400516: a7 28 00 05 lhi %r2,5
+ 40051a: 0d e1 basr %r14,%r1
+ # compiler adds 1 extra instruction to epilogue this is done to
+ # avoid processor pipeline stalls owing to data dependencies on g5 &
+ # above as register 14 in the old code was needed directly after being loaded
+ # by the lm %r11,%r15,140(%r15) for the br %14.
+ 40051c: 58 40 f0 98 l %r4,152(%r15)
+ 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
+ 400524: 07 f4 br %r4
+ }
+
+
+Hartmut ( our compiler developer ) also has been threatening to take out the
+stack backchain in optimised code as this also causes pipeline stalls, you
+have been warned.
+
+64 bit z/Architecture code disassembly
+--------------------------------------
+
+If you understand the stuff above you'll understand the stuff
+below too so I'll avoid repeating myself & just say that
+some of the instructions have g's on the end of them to indicate
+they are 64 bit & the stack offsets are a bigger,
+the only other difference you'll find between 32 & 64 bit is that
+we now use f4 & f6 for floating point arguments on 64 bit::
+
+ 00000000800005b0 <test>:
+ int test(int b)
+ {
+ return(5+b);
+ 800005b0: a7 2a 00 05 ahi %r2,5
+ 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
+ 800005b8: 07 fe br %r14
+ 800005ba: 07 07 bcr 0,%r7
+
+
+ }
+
+ 00000000800005bc <main>:
+ main(int argc,char *argv[])
+ {
+ 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
+ 800005c2: b9 04 00 1f lgr %r1,%r15
+ 800005c6: a7 fb ff 60 aghi %r15,-160
+ 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
+ return(test(5));
+ 800005d0: a7 29 00 05 lghi %r2,5
+ # brasl allows jumps > 64k & is overkill here bras would do fune
+ 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
+ 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
+ 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
+ 800005e6: 07 f4 br %r4
+ }
+
+
+
+Compiling programs for debugging on Linux for s/390 & z/Architecture
+====================================================================
+-gdwarf-2 now works it should be considered the default debugging
+format for s/390 & z/Architecture as it is more reliable for debugging
+shared libraries, normal -g debugging works much better now
+Thanks to the IBM java compiler developers bug reports.
+
+This is typically done adding/appending the flags -g or -gdwarf-2 to the
+CFLAGS & LDFLAGS variables Makefile of the program concerned.
+
+If using gdb & you would like accurate displays of registers &
+stack traces compile without optimisation i.e make sure
+that there is no -O2 or similar on the CFLAGS line of the Makefile &
+the emitted gcc commands, obviously this will produce worse code
+( not advisable for shipment ) but it is an aid to the debugging process.
+
+This aids debugging because the compiler will copy parameters passed in
+in registers onto the stack so backtracing & looking at passed in
+parameters will work, however some larger programs which use inline functions
+will not compile without optimisation.
+
+Debugging with optimisation has since much improved after fixing
+some bugs, please make sure you are using gdb-5.0 or later developed
+after Nov'2000.
+
+
+
+Debugging under VM
+==================
+
+Notes
+-----
+Addresses & values in the VM debugger are always hex never decimal
+Address ranges are of the format <HexValue1>-<HexValue2> or
+<HexValue1>.<HexValue2>
+For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
+or 2000.1000
+
+The VM Debugger is case insensitive.
+
+VM's strengths are usually other debuggers weaknesses you can get at any
+resource no matter how sensitive e.g. memory management resources, change
+address translation in the PSW. For kernel hacking you will reap dividends if
+you get good at it.
+
+The VM Debugger displays operators but not operands, and also the debugger
+displays useful information on the same line as the author of the code probably
+felt that it was a good idea not to go over the 80 columns on the screen.
+This isn't as unintuitive as it may seem as the s/390 instructions are easy to
+decode mentally and you can make a good guess at a lot of them as all the
+operands are nibble (half byte aligned).
+So if you have an objdump listing by hand, it is quite easy to follow, and if
+you don't have an objdump listing keep a copy of the s/390 Reference Summary
+or alternatively the s/390 principles of operation next to you.
+e.g. even I can guess that
+0001AFF8' LR 180F CC 0
+is a ( load register ) lr r0,r15
+
+Also it is very easy to tell the length of a 390 instruction from the 2 most
+significant bits in the instruction (not that this info is really useful except
+if you are trying to make sense of a hexdump of code).
+Here is a table
+
+======================= ==================
+Bits Instruction Length
+======================= ==================
+00 2 Bytes
+01 4 Bytes
+10 4 Bytes
+11 6 Bytes
+======================= ==================
+
+The debugger also displays other useful info on the same line such as the
+addresses being operated on destination addresses of branches & condition codes.
+e.g.::
+
+ 00019736' AHI A7DAFF0E CC 1
+ 000198BA' BRC A7840004 -> 000198C2' CC 0
+ 000198CE' STM 900EF068 >> 0FA95E78 CC 2
+
+
+
+Useful VM debugger commands
+---------------------------
+
+I suppose I'd better mention this before I start
+to list the current active traces do::
+
+ Q TR
+
+there can be a maximum of 255 of these per set
+( more about trace sets later ).
+
+To stop traces issue a::
+
+ TR END.
+
+To delete a particular breakpoint issue::
+
+ TR DEL <breakpoint number>
+
+The PA1 key drops to CP mode so you can issue debugger commands,
+Doing alt c (on my 3270 console at least ) clears the screen.
+
+hitting b <enter> comes back to the running operating system
+from cp mode ( in our case linux ).
+
+It is typically useful to add shortcuts to your profile.exec file
+if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
+file here are a few from mine::
+
+ /* this gives me command history on issuing f12 */
+ set pf12 retrieve
+ /* this continues */
+ set pf8 imm b
+ /* goes to trace set a */
+ set pf1 imm tr goto a
+ /* goes to trace set b */
+ set pf2 imm tr goto b
+ /* goes to trace set c */
+ set pf3 imm tr goto c
+
+
+
+Instruction Tracing
+-------------------
+Setting a simple breakpoint::
+
+ TR I PSWA <address>
+
+To debug a particular function try::
+
+ TR I R <function address range>
+ TR I on its own will single step.
+ TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
+
+e.g.::
+
+ TR I DATA 4D R 0197BC.4000
+
+will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
+
+if you were inclined you could add traces for all branch instructions &
+suffix them with the run prefix so you would have a backtrace on screen
+when a program crashes::
+
+ TR BR <INTO OR FROM> will trace branches into or out of an address.
+
+e.g.::
+
+ TR BR INTO 0
+
+is often quite useful if a program is getting awkward & deciding
+to branch to 0 & crashing as this will stop at the address before in jumps to 0.
+
+::
+
+ TR I R <address range> RUN cmd d g
+
+single steps a range of addresses but stays running &
+displays the gprs on each step.
+
+
+
+Displaying & modifying Registers
+--------------------------------
+D G
+ will display all the gprs
+
+Adding a extra G to all the commands is necessary to access the full 64 bit
+content in VM on z/Architecture. Obviously this isn't required for access
+registers as these are still 32 bit.
+
+e.g.
+
+DGG
+ instead of DG
+
+D X
+ will display all the control registers
+D AR
+ will display all the access registers
+D AR4-7
+ will display access registers 4 to 7
+CPU ALL D G
+ will display the GRPS of all CPUS in the configuration
+D PSW
+ will display the current PSW
+st PSW 2000
+ will put the value 2000 into the PSW & cause crash your machine.
+D PREFIX
+ displays the prefix offset
+
+
+Displaying Memory
+-----------------
+To display memory mapped using the current PSW's mapping try::
+
+ D <range>
+
+To make VM display a message each time it hits a particular address and
+continue try:
+
+D I<range>
+ will disassemble/display a range of instructions.
+
+ST addr 32 bit word
+ will store a 32 bit aligned address
+D T<range>
+ will display the EBCDIC in an address (if you are that way inclined)
+D R<range>
+ will display real addresses ( without DAT ) but with prefixing.
+
+There are other complex options to display if you need to get at say home space
+but are in primary space the easiest thing to do is to temporarily
+modify the PSW to the other addressing mode, display the stuff & then
+restore it.
+
+
+
+Hints
+-----
+If you want to issue a debugger command without halting your virtual machine
+with the PA1 key try prefixing the command with #CP e.g.::
+
+ #cp tr i pswa 2000
+
+also suffixing most debugger commands with RUN will cause them not
+to stop just display the mnemonic at the current instruction on the console.
+
+If you have several breakpoints you want to put into your program &
+you get fed up of cross referencing with System.map
+you can do the following trick for several symbols.
+
+::
+
+ grep do_signal System.map
+
+which emits the following among other things::
+
+ 0001f4e0 T do_signal
+
+now you can do::
+
+ TR I PSWA 0001f4e0 cmd msg * do_signal
+
+This sends a message to your own console each time do_signal is entered.
+( As an aside I wrote a perl script once which automatically generated a REXX
+script with breakpoints on every kernel procedure, this isn't a good idea
+because there are thousands of these routines & VM can only set 255 breakpoints
+at a time so you nearly had to spend as long pruning the file down as you would
+entering the msgs by hand), however, the trick might be useful for a single
+object file. In the 3270 terminal emulator x3270 there is a very useful option
+in the file menu called "Save Screen In File" - this is very good for keeping a
+copy of traces.
+
+From CMS help <command name> will give you online help on a particular command.
+e.g.::
+
+ HELP DISPLAY
+
+Also CP has a file called profile.exec which automatically gets called
+on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
+CP has a feature similar to doskey, it may be useful for you to
+use profile.exec to define some keystrokes.
+
+SET PF9 IMM B
+ This does a single step in VM on pressing F8.
+
+SET PF10 ^
+ This sets up the ^ key.
+ which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed
+ directly into some 3270 consoles.
+
+SET PF11 ^-
+ This types the starting keystrokes for a sysrq see SysRq below.
+SET PF12 RETRIEVE
+ This retrieves command history on pressing F12.
+
+
+Sometimes in VM the display is set up to scroll automatically this
+can be very annoying if there are messages you wish to look at
+to stop this do
+
+TERM MORE 255 255
+ This will nearly stop automatic screen updates, however it will
+ cause a denial of service if lots of messages go to the 3270 console,
+ so it would be foolish to use this as the default on a production machine.
+
+
+Tracing particular processes
+----------------------------
+The kernel's text segment is intentionally at an address in memory that it will
+very seldom collide with text segments of user programs ( thanks Martin ),
+this simplifies debugging the kernel.
+However it is quite common for user processes to have addresses which collide
+this can make debugging a particular process under VM painful under normal
+circumstances as the process may change when doing a::
+
+ TR I R <address range>.
+
+Thankfully after reading VM's online help I figured out how to debug
+I particular process.
+
+Your first problem is to find the STD ( segment table designation )
+of the program you wish to debug.
+There are several ways you can do this here are a few
+
+Run::
+
+ objdump --syms <program to be debugged> | grep main
+
+To get the address of main in the program. Then::
+
+ tr i pswa <address of main>
+
+Start the program, if VM drops to CP on what looks like the entry
+point of the main function this is most likely the process you wish to debug.
+Now do a D X13 or D XG13 on z/Architecture.
+
+On 31 bit the STD is bits 1-19 ( the STO segment table origin )
+& 25-31 ( the STL segment table length ) of CR13.
+
+now type::
+
+ TR I R STD <CR13's value> 0.7fffffff
+
+e.g.::
+
+ TR I R STD 8F32E1FF 0.7fffffff
+
+Another very useful variation is::
+
+ TR STORE INTO STD <CR13's value> <address range>
+
+for finding out when a particular variable changes.
+
+An alternative way of finding the STD of a currently running process
+is to do the following, ( this method is more complex but
+could be quite convenient if you aren't updating the kernel much &
+so your kernel structures will stay constant for a reasonable period of
+time ).
+
+::
+
+ grep task /proc/<pid>/status
+
+from this you should see something like::
+
+ task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
+
+This now gives you a pointer to the task structure.
+
+Now make::
+
+ CC:="s390-gcc -g" kernel/sched.s
+
+To get the task_struct stabinfo.
+
+( task_struct is defined in include/linux/sched.h ).
+
+Now we want to look at
+task->active_mm->pgd
+
+on my machine the active_mm in the task structure stab is
+active_mm:(4,12),672,32
+
+its offset is 672/8=84=0x54
+
+the pgd member in the mm_struct stab is
+pgd:(4,6)=*(29,5),96,32
+so its offset is 96/8=12=0xc
+
+so we'll::
+
+ hexdump -s 0xf160054 /dev/mem | more
+
+i.e. task_struct+active_mm offset
+to look at the active_mm member::
+
+ f160054 0fee cc60 0019 e334 0000 0000 0000 0011
+
+::
+
+ hexdump -s 0x0feecc6c /dev/mem | more
+
+i.e. active_mm+pgd offset::
+
+ feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
+
+we get something like
+now do::
+
+ TR I R STD <pgd|0x7f> 0.7fffffff
+
+i.e. the 0x7f is added because the pgd only
+gives the page table origin & we need to set the low bits
+to the maximum possible segment table length.
+
+::
+
+ TR I R STD 0f2c007f 0.7fffffff
+
+on z/Architecture you'll probably need to do::
+
+ TR I R STD <pgd|0x7> 0.ffffffffffffffff
+
+to set the TableType to 0x1 & the Table length to 3.
+
+
+
+Tracing Program Exceptions
+--------------------------
+If you get a crash which says something like
+illegal operation or specification exception followed by a register dump
+You can restart linux & trace these using the tr prog <range or value> trace
+option.
+
+
+The most common ones you will normally be tracing for is:
+
+- 1=operation exception
+- 2=privileged operation exception
+- 4=protection exception
+- 5=addressing exception
+- 6=specification exception
+- 10=segment translation exception
+- 11=page translation exception
+
+The full list of these is on page 22 of the current s/390 Reference Summary.
+e.g.
+
+tr prog 10 will trace segment translation exceptions.
+
+tr prog on its own will trace all program interruption codes.
+
+Trace Sets
+----------
+On starting VM you are initially in the INITIAL trace set.
+You can do a Q TR to verify this.
+If you have a complex tracing situation where you wish to wait for instance
+till a driver is open before you start tracing IO, but know in your
+heart that you are going to have to make several runs through the code till you
+have a clue whats going on.
+
+What you can do is::
+
+ TR I PSWA <Driver open address>
+
+hit b to continue till breakpoint
+
+reach the breakpoint
+
+now do your::
+
+ TR GOTO B
+ TR IO 7c08-7c09 inst int run
+
+or whatever the IO channels you wish to trace are & hit b
+
+To got back to the initial trace set do::
+
+ TR GOTO INITIAL
+
+& the TR I PSWA <Driver open address> will be the only active breakpoint again.
+
+
+Tracing linux syscalls under VM
+-------------------------------
+Syscalls are implemented on Linux for S390 by the Supervisor call instruction
+(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
+opcode and the second byte being the syscall number. They are traced using the
+simple command::
+
+ TR SVC <Optional value or range>
+
+the syscalls are defined in linux/arch/s390/include/asm/unistd.h
+e.g. to trace all file opens just do::
+
+ TR SVC 5 ( as this is the syscall number of open )
+
+
+SMP Specific commands
+---------------------
+To find out how many cpus you have
+Q CPUS displays all the CPU's available to your virtual machine
+To find the cpu that the current cpu VM debugger commands are being directed at
+do Q CPU to change the current cpu VM debugger commands are being directed at
+do::
+
+ CPU <desired cpu no>
+
+On a SMP guest issue a command to all CPUs try prefixing the command with cpu
+all. To issue a command to a particular cpu try cpu <cpu number> e.g.::
+
+ CPU 01 TR I R 2000.3000
+
+If you are running on a guest with several cpus & you have a IO related problem
+& cannot follow the flow of code but you know it isn't smp related.
+
+from the bash prompt issue::
+
+ shutdown -h now or halt.
+
+do a::
+
+ Q CPUS
+
+to find out how many cpus you have detach each one of them from cp except
+cpu 0 by issuing a::
+
+ DETACH CPU 01-(number of cpus in configuration)
+
+& boot linux again.
+
+TR SIGP
+ will trace inter processor signal processor instructions.
+
+DEFINE CPU 01-(number in configuration)
+ will get your guests cpus back.
+
+
+Help for displaying ascii textstrings
+-------------------------------------
+On the very latest VM Nucleus'es VM can now display ascii
+( thanks Neale for the hint ) by doing::
+
+ D TX<lowaddr>.<len>
+
+e.g.::
+
+ D TX0.100
+
+Alternatively
+=============
+Under older VM debuggers (I love EBDIC too) you can use following little
+program which converts a command line of hex digits to ascii text. It can be
+compiled under linux and you can copy the hex digits from your x3270 terminal
+to your xterm if you are debugging from a linuxbox.
+
+This is quite useful when looking at a parameter passed in as a text string
+under VM ( unless you are good at decoding ASCII in your head ).
+
+e.g. consider tracing an open syscall::
+
+ TR SVC 5
+
+We have stopped at a breakpoint::
+
+ 000151B0' SVC 0A05 -> 0001909A' CC 0
+
+D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
+(for the layout of the prefix area consult the "Fixed Storage Locations"
+chapter of the s/390 Reference Summary if you have it available).
+
+::
+
+ V00000020 070C2000 800151B2
+
+The problem state bit wasn't set & it's also too early in the boot sequence
+for it to be a userspace SVC if it was we would have to temporarily switch the
+psw to user space addressing so we could get at the first parameter of the open
+in gpr2.
+
+Next do a::
+
+ D G2
+ GPR 2 = 00014CB4
+
+Now display what gpr2 is pointing to::
+
+ D 00014CB4.20
+ V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
+ V00014CC4 FC00014C B4001001 E0001000 B8070707
+
+Now copy the text till the first 00 hex ( which is the end of the string
+to an xterm & do hex2ascii on it::
+
+ hex2ascii 2F646576 2F636F6E 736F6C65 00
+
+outputs::
+
+ Decoded Hex:=/ d e v / c o n s o l e 0x00
+
+We were opening the console device,
+
+You can compile the code below yourself for practice :-),
+
+::
+
+ /*
+ * hex2ascii.c
+ * a useful little tool for converting a hexadecimal command line to ascii
+ *
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
+ */
+ #include <stdio.h>
+
+ int main(int argc,char *argv[])
+ {
+ int cnt1,cnt2,len,toggle=0;
+ int startcnt=1;
+ unsigned char c,hex;
+
+ if(argc>1&&(strcmp(argv[1],"-a")==0))
+ startcnt=2;
+ printf("Decoded Hex:=");
+ for(cnt1=startcnt;cnt1<argc;cnt1++)
+ {
+ len=strlen(argv[cnt1]);
+ for(cnt2=0;cnt2<len;cnt2++)
+ {
+ c=argv[cnt1][cnt2];
+ if(c>='0'&&c<='9')
+ c=c-'0';
+ if(c>='A'&&c<='F')
+ c=c-'A'+10;
+ if(c>='a'&&c<='f')
+ c=c-'a'+10;
+ switch(toggle)
+ {
+ case 0:
+ hex=c<<4;
+ toggle=1;
+ break;
+ case 1:
+ hex+=c;
+ if(hex<32||hex>127)
+ {
+ if(startcnt==1)
+ printf("0x%02X ",(int)hex);
+ else
+ printf(".");
+ }
+ else
+ {
+ printf("%c",hex);
+ if(startcnt==1)
+ printf(" ");
+ }
+ toggle=0;
+ break;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+
+
+
+Stack tracing under VM
+----------------------
+A basic backtrace
+-----------------
+
+Here are the tricks I use 9 out of 10 times it works pretty well,
+
+When your backchain reaches a dead end
+--------------------------------------
+This can happen when an exception happens in the kernel and the kernel is
+entered twice. If you reach the NULL pointer at the end of the back chain you
+should be able to sniff further back if you follow the following tricks.
+1) A kernel address should be easy to recognise since it is in
+primary space & the problem state bit isn't set & also
+The Hi bit of the address is set.
+2) Another backchain should also be easy to recognise since it is an
+address pointing to another address approximately 100 bytes or 0x70 hex
+behind the current stackpointer.
+
+
+Here is some practice.
+
+boot the kernel & hit PA1 at some random time
+
+d g to display the gprs, this should display something like::
+
+ GPR 0 = 00000001 00156018 0014359C 00000000
+ GPR 4 = 00000001 001B8888 000003E0 00000000
+ GPR 8 = 00100080 00100084 00000000 000FE000
+ GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
+
+Note that GPR14 is a return address but as we are real men we are going to
+trace the stack.
+display 0x40 bytes after the stack pointer::
+
+ V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
+ V000FFEE8 00000000 00000000 000003E0 00000000
+ V000FFEF8 00100080 00100084 00000000 000FE000
+ V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
+
+
+Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
+you look above at our stackframe & also agrees with GPR14.
+
+now backchain::
+
+ d 000FFF38.40
+
+we now are taking the contents of SP to get our first backchain::
+
+ V000FFF38 000FFFA0 00000000 00014995 00147094
+ V000FFF48 00147090 001470A0 000003E0 00000000
+ V000FFF58 00100080 00100084 00000000 001BF1D0
+ V000FFF68 00010400 800149BA 80014CA6 000FFF38
+
+This displays a 2nd return address of 80014CA6
+
+now do::
+
+ d 000FFFA0.40
+
+for our 3rd backchain::
+
+ V000FFFA0 04B52002 0001107F 00000000 00000000
+ V000FFFB0 00000000 00000000 FF000000 0001107F
+ V000FFFC0 00000000 00000000 00000000 00000000
+ V000FFFD0 00010400 80010802 8001085A 000FFFA0
+
+
+our 3rd return address is 8001085A
+
+as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
+kernel entry routines for the sake of optimisation don't set up a backchain.
+
+now look at System.map to see if the addresses make any sense::
+
+ grep -i 0001b3 System.map
+
+outputs among other things::
+
+ 0001b304 T cpu_idle
+
+so 8001B36A
+is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
+
+::
+
+ grep -i 00014 System.map
+
+produces among other things::
+
+ 00014a78 T start_kernel
+
+so 0014CA6 is start_kernel+some hex number I can't add in my head.
+
+::
+
+ grep -i 00108 System.map
+
+this produces::
+
+ 00010800 T _stext
+
+so 8001085A is _stext+0x5a
+
+Congrats you've done your first backchain.
+
+
+
+s/390 & z/Architecture IO Overview
+==================================
+
+I am not going to give a course in 390 IO architecture as this would take me
+quite a while and I'm no expert. Instead I'll give a 390 IO architecture
+summary for Dummies. If you have the s/390 principles of operation available
+read this instead. If nothing else you may find a few useful keywords in here
+and be able to use them on a web search engine to find more useful information.
+
+Unlike other bus architectures modern 390 systems do their IO using mostly
+fibre optics and devices such as tapes and disks can be shared between several
+mainframes. Also S390 can support up to 65536 devices while a high end PC based
+system might be choking with around 64.
+
+Here is some of the common IO terminology:
+
+Subchannel:
+ This is the logical number most IO commands use to talk to an IO device. There
+ can be up to 0x10000 (65536) of these in a configuration, typically there are a
+ few hundred. Under VM for simplicity they are allocated contiguously, however
+ on the native hardware they are not. They typically stay consistent between
+ boots provided no new hardware is inserted or removed.
+
+ Under Linux for s390 we use these as IRQ's and also when issuing an IO command
+ (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
+ START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
+ of the device we wish to talk to. The most important of these instructions are
+ START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
+ completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
+ up to 8 channel paths to a device, this offers redundancy if one is not
+ available.
+
+Device Number:
+ This number remains static and is closely tied to the hardware. There are 65536
+ of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
+ another lsb 8 bits. These remain static even if more devices are inserted or
+ removed from the hardware. There is a 1 to 1 mapping between subchannels and
+ device numbers, provided devices aren't inserted or removed.
+
+Channel Control Words:
+ CCWs are linked lists of instructions initially pointed to by an operation
+ request block (ORB), which is initially given to Start Subchannel (SSCH)
+ command along with the subchannel number for the IO subsystem to process
+ while the CPU continues executing normal code.
+ CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
+ Format 1 (31 bit). These are typically used to issue read and write (and many
+ other) instructions. They consist of a length field and an absolute address
+ field.
+
+ Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
+ when the channel is idle, and the second for device end (secondary status).
+ Sometimes you get both concurrently. You check how the IO went on by issuing a
+ TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
+ response block (IRB). If you get channel and device end status in the IRB
+ without channel checks etc. your IO probably went okay. If you didn't you
+ probably need to examine the IRB, extended status word etc.
+ If an error occurs, more sophisticated control units have a facility known as
+ concurrent sense. This means that if an error occurs Extended sense information
+ will be presented in the Extended status word in the IRB. If not you have to
+ issue a subsequent SENSE CCW command after the test subchannel.
+
+
+TPI (Test pending interrupt) can also be used for polled IO, but in
+multitasking multiprocessor systems it isn't recommended except for
+checking special cases (i.e. non looping checks for pending IO etc.).
+
+Store Subchannel and Modify Subchannel can be used to examine and modify
+operating characteristics of a subchannel (e.g. channel paths).
+
+Other IO related Terms:
+
+Sysplex:
+ S390's Clustering Technology
+QDIO:
+ S390's new high speed IO architecture to support devices such as gigabit
+ ethernet, this architecture is also designed to be forward compatible with
+ upcoming 64 bit machines.
+
+
+General Concepts
+----------------
+
+Input Output Processors (IOP's) are responsible for communicating between
+the mainframe CPU's & the channel & relieve the mainframe CPU's from the
+burden of communicating with IO devices directly, this allows the CPU's to
+concentrate on data processing.
+
+IOP's can use one or more links ( known as channel paths ) to talk to each
+IO device. It first checks for path availability & chooses an available one,
+then starts ( & sometimes terminates IO ).
+There are two types of channel path: ESCON & the Parallel IO interface.
+
+IO devices are attached to control units, control units provide the
+logic to interface the channel paths & channel path IO protocols to
+the IO devices, they can be integrated with the devices or housed separately
+& often talk to several similar devices ( typical examples would be raid
+controllers or a control unit which connects to 1000 3270 terminals )::
+
+
+ +---------------------------------------------------------------+
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
+ | | | | | | | | | | Memory | | Storage | |
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ |---------------------------------------------------------------+
+ | IOP | IOP | IOP |
+ |---------------------------------------------------------------
+ | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
+ ----------------------------------------------------------------
+ || ||
+ || Bus & Tag Channel Path || ESCON
+ || ====================== || Channel
+ || || || || Path
+ +----------+ +----------+ +----------+
+ | | | | | |
+ | CU | | CU | | CU |
+ | | | | | |
+ +----------+ +----------+ +----------+
+ | | | | |
+ +----------+ +----------+ +----------+ +----------+ +----------+
+ |I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
+ +----------+ +----------+ +----------+ +----------+ +----------+
+ CPU = Central Processing Unit
+ C = Channel
+ IOP = IP Processor
+ CU = Control Unit
+
+The 390 IO systems come in 2 flavours the current 390 machines support both
+
+The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
+sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
+Interface (OEMI).
+
+This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
+and control lines on the "Tag" cable. These can operate in byte multiplex mode
+for sharing between several slow devices or burst mode and monopolize the
+channel for the whole burst. Up to 256 devices can be addressed on one of these
+cables. These cables are about one inch in diameter. The maximum unextended
+length supported by these cables is 125 Meters but this can be extended up to
+2km with a fibre optic channel extended such as a 3044. The maximum burst speed
+supported is 4.5 megabytes per second. However, some really old processors
+support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
+One of these paths can be daisy chained to up to 8 control units.
+
+
+ESCON if fibre optic it is also called FICON
+Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
+lasers for communication at a signaling rate of up to 200 megabits/sec. As
+10bits are transferred for every 8 bits info this drops to 160 megabits/sec
+and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
+operates in burst mode.
+
+ESCONs typical max cable length is 3km for the led version and 20km for the
+laser version known as XDF (extended distance facility). This can be further
+extended by using an ESCON director which triples the above mentioned ranges.
+Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
+the standard Bus & Tag control protocol is however present within the packets.
+Up to 256 devices can be attached to each control unit that uses one of these
+interfaces.
+
+Common 390 Devices include:
+Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
+Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
+DASD's direct access storage devices ( otherwise known as hard disks ).
+Tape Drives.
+CTC ( Channel to Channel Adapters ),
+ESCON or Parallel Cables used as a very high speed serial link
+between 2 machines.
+
+
+Debugging IO on s/390 & z/Architecture under VM
+===============================================
+
+Now we are ready to go on with IO tracing commands under VM
+
+A few self explanatory queries::
+
+ Q OSA
+ Q CTC
+ Q DISK ( This command is CMS specific )
+ Q DASD
+
+Q OSA on my machine returns::
+
+ OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
+ OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
+ OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
+ OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
+
+If you have a guest with certain privileges you may be able to see devices
+which don't belong to you. To avoid this, add the option V.
+e.g.::
+
+ Q V OSA
+
+Now using the device numbers returned by this command we will
+Trace the io starting up on the first device 7c08 & 7c09
+In our simplest case we can trace the
+start subchannels
+like TR SSCH 7C08-7C09
+or the halt subchannels
+or TR HSCH 7C08-7C09
+MSCH's ,STSCH's I think you can guess the rest
+
+A good trick is tracing all the IO's and CCWS and spooling them into the reader
+of another VM guest so he can ftp the logfile back to his own machine. I'll do
+a small bit of this and give you a look at the output.
+
+1) Spool stdout to VM reader::
+
+ SP PRT TO (another vm guest ) or * for the local vm guest
+
+2) Fill the reader with the trace::
+
+ TR IO 7c08-7c09 INST INT CCW PRT RUN
+
+3) Start up linux::
+
+ i 00c
+4) Finish the trace::
+
+ TR END
+
+5) close the reader::
+
+ C PRT
+
+6) list reader contents::
+
+ RDRLIST
+
+7) copy it to linux4's minidisk::
+
+ RECEIVE / LOG TXT A1 ( replace
+
+8)
+filel & press F11 to look at it
+You should see something like::
+
+ 00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
+ CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
+ CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
+ IDAL 43D8AFE8
+ IDAL 0FB76000
+ 00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
+ 00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
+ CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
+ KEY 0 FPI C0 CC 0 CTLS 4007
+ 00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
+
+If you don't like messing up your readed ( because you possibly booted from it )
+you can alternatively spool it to another readers guest.
+
+
+Other common VM device related commands
+---------------------------------------------
+These commands are listed only because they have
+been of use to me in the past & may be of use to
+you too. For more complete info on each of the commands
+use type HELP <command> from CMS.
+
+detaching devices::
+
+ DET <devno range>
+ ATT <devno range> <guest>
+
+attach a device to guest * for your own guest
+
+READY <devno>
+ cause VM to issue a fake interrupt.
+
+The VARY command is normally only available to VM administrators::
+
+ VARY ON PATH <path> TO <devno range>
+ VARY OFF PATH <PATH> FROM <devno range>
+
+This is used to switch on or off channel paths to devices.
+
+Q CHPID <channel path ID>
+ This displays state of devices using this channel path
+
+D SCHIB <subchannel>
+ This displays the subchannel information SCHIB block for the device.
+ this I believe is also only available to administrators.
+
+DEFINE CTC <devno>
+ defines a virtual CTC channel to channel connection
+ 2 need to be defined on each guest for the CTC driver to use.
+
+COUPLE devno userid remote devno
+ Joins a local virtual device to a remote virtual device
+ ( commonly used for the CTC driver ).
+
+Building a VM ramdisk under CMS which linux can use::
+
+ def vfb-<blocksize> <subchannel> <number blocks>
+
+blocksize is commonly 4096 for linux.
+
+Formatting it::
+
+ format <subchannel> <driver letter e.g. x> (blksize <blocksize>
+
+Sharing a disk between multiple guests::
+
+ LINK userid devno1 devno2 mode password
+
+
+
+GDB on S390
+===========
+N.B. if compiling for debugging gdb works better without optimisation
+( see Compiling programs for debugging )
+
+invocation
+----------
+gdb <victim program> <optional corefile>
+
+Online help
+-----------
+help: gives help on commands
+
+e.g.::
+
+ help
+ help display
+
+Note gdb's online help is very good use it.
+
+
+Assembly
+--------
+info registers:
+ displays registers other than floating point.
+
+info all-registers:
+ displays floating points as well.
+
+disassemble:
+ disassembles
+
+e.g.::
+
+ disassemble without parameters will disassemble the current function
+ disassemble $pc $pc+10
+
+Viewing & modifying variables
+-----------------------------
+print or p:
+ displays variable or register
+
+e.g. p/x $sp will display the stack pointer
+
+display:
+ prints variable or register each time program stops
+
+e.g.::
+
+ display/x $pc will display the program counter
+ display argc
+
+undisplay:
+ undo's display's
+
+info breakpoints:
+ shows all current breakpoints
+
+info stack:
+ shows stack back trace (if this doesn't work too well, I'll show
+ you the stacktrace by hand below).
+
+info locals:
+ displays local variables.
+
+info args:
+ display current procedure arguments.
+
+set args:
+ will set argc & argv each time the victim program is invoked
+
+e.g.::
+
+ set <variable>=value
+ set argc=100
+ set $pc=0
+
+
+
+Modifying execution
+-------------------
+step:
+ steps n lines of sourcecode
+
+step
+ steps 1 line.
+
+step 100
+ steps 100 lines of code.
+
+next:
+ like step except this will not step into subroutines
+
+stepi:
+ steps a single machine code instruction.
+
+e.g.::
+
+ stepi 100
+
+nexti:
+ steps a single machine code instruction but will not step into
+ subroutines.
+
+finish:
+ will run until exit of the current routine
+
+run:
+ (re)starts a program
+
+cont:
+ continues a program
+
+quit:
+ exits gdb.
+
+
+breakpoints
+------------
+
+break
+ sets a breakpoint
+
+e.g.::
+
+ break main
+ break *$pc
+ break *0x400618
+
+Here's a really useful one for large programs
+
+rbr
+ Set a breakpoint for all functions matching REGEXP
+
+e.g.::
+
+ rbr 390
+
+will set a breakpoint with all functions with 390 in their name.
+
+info breakpoints
+ lists all breakpoints
+
+delete:
+ delete breakpoint by number or delete them all
+
+e.g.
+
+delete 1
+ will delete the first breakpoint
+
+
+delete
+ will delete them all
+
+watch:
+ This will set a watchpoint ( usually hardware assisted ),
+
+This will watch a variable till it changes
+
+e.g.
+
+watch cnt
+ will watch the variable cnt till it changes.
+
+As an aside unfortunately gdb's, architecture independent watchpoint code
+is inconsistent & not very good, watchpoints usually work but not always.
+
+info watchpoints:
+ Display currently active watchpoints
+
+condition: ( another useful one )
+ Specify breakpoint number N to break only if COND is true.
+
+Usage is `condition N COND`, where N is an integer and COND is an
+expression to be evaluated whenever breakpoint N is reached.
+
+
+
+User defined functions/macros
+-----------------------------
+define: ( Note this is very very useful,simple & powerful )
+
+usage define <name> <list of commands> end
+
+examples which you should consider putting into .gdbinit in your home
+directory::
+
+ define d
+ stepi
+ disassemble $pc $pc+10
+ end
+ define e
+ nexti
+ disassemble $pc $pc+10
+ end
+
+
+Other hard to classify stuff
+----------------------------
+signal n:
+ sends the victim program a signal.
+
+e.g. `signal 3` will send a SIGQUIT.
+
+info signals:
+ what gdb does when the victim receives certain signals.
+
+list:
+
+e.g.:
+
+list
+ lists current function source
+list 1,10
+ list first 10 lines of current file.
+
+list test.c:1,10
+
+
+directory:
+ Adds directories to be searched for source if gdb cannot find the source.
+ (note it is a bit sensitive about slashes)
+
+e.g. To add the root of the filesystem to the searchpath do::
+
+ directory //
+
+
+call <function>
+This calls a function in the victim program, this is pretty powerful
+e.g.
+(gdb) call printf("hello world")
+outputs:
+$1 = 11
+
+You might now be thinking that the line above didn't work, something extra had
+to be done.
+(gdb) call fflush(stdout)
+hello world$2 = 0
+As an aside the debugger also calls malloc & free under the hood
+to make space for the "hello world" string.
+
+
+
+hints
+-----
+1) command completion works just like bash
+ ( if you are a bad typist like me this really helps )
+
+e.g. hit br <TAB> & cursor up & down :-).
+
+2) if you have a debugging problem that takes a few steps to recreate
+put the steps into a file called .gdbinit in your current working directory
+if you have defined a few extra useful user defined commands put these in
+your home directory & they will be read each time gdb is launched.
+
+A typical .gdbinit file might be.::
+
+ break main
+ run
+ break runtime_exception
+ cont
+
+
+stack chaining in gdb by hand
+-----------------------------
+This is done using a the same trick described for VM::
+
+ p/x (*($sp+56))&0x7fffffff
+
+get the first backchain.
+
+For z/Architecture
+Replace 56 with 112 & ignore the &0x7fffffff
+in the macros below & do nasty casts to longs like the following
+as gdb unfortunately deals with printed arguments as ints which
+messes up everything.
+
+i.e. here is a 3rd backchain dereference::
+
+ p/x *(long *)(***(long ***)$sp+112)
+
+
+this outputs::
+
+ $5 = 0x528f18
+
+on my machine.
+
+Now you can use::
+
+ info symbol (*($sp+56))&0x7fffffff
+
+you might see something like::
+
+ rl_getc + 36 in section .text
+
+telling you what is located at address 0x528f18
+Now do::
+
+ p/x (*(*$sp+56))&0x7fffffff
+
+This outputs::
+
+ $6 = 0x528ed0
+
+Now do::
+
+ info symbol (*(*$sp+56))&0x7fffffff
+ rl_read_key + 180 in section .text
+
+now do::
+
+ p/x (*(**$sp+56))&0x7fffffff
+
+& so on.
+
+Disassembling instructions without debug info
+---------------------------------------------
+gdb typically complains if there is a lack of debugging
+symbols in the disassemble command with
+"No function contains specified address." To get around
+this do::
+
+ x/<number lines to disassemble>xi <address>
+
+e.g.::
+
+ x/20xi 0x400730
+
+
+
+Note:
+ Remember gdb has history just like bash you don't need to retype the
+ whole line just use the up & down arrows.
+
+
+
+For more info
+-------------
+From your linuxbox do::
+
+ man gdb
+
+or::
+
+ info gdb.
+
+core dumps
+----------
+
+What a core dump ?
+^^^^^^^^^^^^^^^^^^
+
+A core dump is a file generated by the kernel (if allowed) which contains the
+registers and all active pages of the program which has crashed.
+
+From this file gdb will allow you to look at the registers, stack trace and
+memory of the program as if it just crashed on your system. It is usually
+called core and created in the current working directory.
+
+This is very useful in that a customer can mail a core dump to a technical
+support department and the technical support department can reconstruct what
+happened. Provided they have an identical copy of this program with debugging
+symbols compiled in and the source base of this build is available.
+
+In short it is far more useful than something like a crash log could ever hope
+to be.
+
+Why have I never seen one ?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Probably because you haven't used the command::
+
+ ulimit -c unlimited in bash
+
+to allow core dumps, now do::
+
+ ulimit -a
+
+to verify that the limit was accepted.
+
+A sample core dump
+ To create this I'm going to do::
+
+ ulimit -c unlimited
+ gdb
+
+to launch gdb (my victim app. ) now be bad & do the following from another
+telnet/xterm session to the same machine::
+
+ ps -aux | grep gdb
+ kill -SIGSEGV <gdb's pid>
+
+or alternatively use `killall -SIGSEGV gdb` if you have the killall command.
+
+Now look at the core dump::
+
+ ./gdb core
+
+Displays the following::
+
+ GNU gdb 4.18
+ Copyright 1998 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "s390-ibm-linux"...
+ Core was generated by `./gdb'.
+ Program terminated with signal 11, Segmentation fault.
+ Reading symbols from /usr/lib/libncurses.so.4...done.
+ Reading symbols from /lib/libm.so.6...done.
+ Reading symbols from /lib/libc.so.6...done.
+ Reading symbols from /lib/ld-linux.so.2...done.
+ #0 0x40126d1a in read () from /lib/libc.so.6
+ Setting up the environment for debugging gdb.
+ Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
+ Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
+ (top-gdb) info stack
+ #0 0x40126d1a in read () from /lib/libc.so.6
+ #1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
+ #2 0x528ed0 in rl_read_key () at input.c:381
+ #3 0x5167e6 in readline_internal_char () at readline.c:454
+ #4 0x5168ee in readline_internal_charloop () at readline.c:507
+ #5 0x51692c in readline_internal () at readline.c:521
+ #6 0x5164fe in readline (prompt=0x7ffff810)
+ at readline.c:349
+ #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
+ annotation_suffix=0x4d6b44 "prompt") at top.c:2091
+ #8 0x4d6cf0 in command_loop () at top.c:1345
+ #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
+
+
+LDD
+===
+This is a program which lists the shared libraries which a library needs,
+Note you also get the relocations of the shared library text segments which
+help when using objdump --source.
+
+e.g.::
+
+ ldd ./gdb
+
+outputs::
+
+ libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
+ libm.so.6 => /lib/libm.so.6 (0x4005e000)
+ libc.so.6 => /lib/libc.so.6 (0x40084000)
+ /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
+
+
+Debugging shared libraries
+==========================
+Most programs use shared libraries, however it can be very painful
+when you single step instruction into a function like printf for the
+first time & you end up in functions like _dl_runtime_resolve this is
+the ld.so doing lazy binding, lazy binding is a concept in ELF where
+shared library functions are not loaded into memory unless they are
+actually used, great for saving memory but a pain to debug.
+
+To get around this either relink the program -static or exit gdb type
+export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
+the program in question.
+
+
+
+Debugging modules
+=================
+As modules are dynamically loaded into the kernel their address can be
+anywhere to get around this use the -m option with insmod to emit a load
+map which can be piped into a file if required.
+
+The proc file system
+====================
+What is it ?.
+It is a filesystem created by the kernel with files which are created on demand
+by the kernel if read, or can be used to modify kernel parameters,
+it is a powerful concept.
+
+e.g.::
+
+ cat /proc/sys/net/ipv4/ip_forward
+
+On my machine outputs::
+
+ 0
+
+telling me ip_forwarding is not on to switch it on I can do::
+
+ echo 1 > /proc/sys/net/ipv4/ip_forward
+
+cat it again::
+
+ cat /proc/sys/net/ipv4/ip_forward
+
+On my machine now outputs::
+
+ 1
+
+IP forwarding is on.
+
+There is a lot of useful info in here best found by going in and having a look
+around, so I'll take you through some entries I consider important.
+
+All the processes running on the machine have their own entry defined by
+/proc/<pid>
+
+So lets have a look at the init process::
+
+ cd /proc/1
+ cat cmdline
+
+emits::
+
+ init [2]
+
+::
+
+ cd /proc/1/fd
+
+This contains numerical entries of all the open files,
+some of these you can cat e.g. stdout (2)::
+
+ cat /proc/29/maps
+
+on my machine emits::
+
+ 00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
+ 00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
+ 0047e000-00492000 rwxp 00000000 00:00 0
+ 40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
+ 40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
+ 40016000-40017000 rwxp 00000000 00:00 0
+ 40017000-40018000 rw-p 00000000 00:00 0
+ 40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
+ 4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
+ 4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
+ 4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
+ 40111000-40114000 rw-p 00000000 00:00 0
+ 40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
+ 4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
+ 7fffd000-80000000 rwxp ffffe000 00:00 0
+
+
+Showing us the shared libraries init uses where they are in memory
+& memory access permissions for each virtual memory area.
+
+/proc/1/cwd is a softlink to the current working directory.
+
+/proc/1/root is the root of the filesystem for this process.
+
+/proc/1/mem is the current running processes memory which you
+can read & write to like a file.
+
+strace uses this sometimes as it is a bit faster than the
+rather inefficient ptrace interface for peeking at DATA.
+
+::
+
+ cat status
+
+ Name: init
+ State: S (sleeping)
+ Pid: 1
+ PPid: 0
+ Uid: 0 0 0 0
+ Gid: 0 0 0 0
+ Groups:
+ VmSize: 408 kB
+ VmLck: 0 kB
+ VmRSS: 208 kB
+ VmData: 24 kB
+ VmStk: 8 kB
+ VmExe: 368 kB
+ VmLib: 0 kB
+ SigPnd: 0000000000000000
+ SigBlk: 0000000000000000
+ SigIgn: 7fffffffd7f0d8fc
+ SigCgt: 00000000280b2603
+ CapInh: 00000000fffffeff
+ CapPrm: 00000000ffffffff
+ CapEff: 00000000fffffeff
+
+ User PSW: 070de000 80414146
+ task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
+ User GPRS:
+ 00000400 00000000 0000000b 7ffffa90
+ 00000000 00000000 00000000 0045d9f4
+ 0045cafc 7ffffa90 7fffff18 0045cb08
+ 00010400 804039e8 80403af8 7ffff8b0
+ User ACRS:
+ 00000000 00000000 00000000 00000000
+ 00000001 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000
+ Kernel BackChain CallChain BackChain CallChain
+ 004b7ca8 8002bd0c 004b7d18 8002b92c
+ 004b7db8 8005cd50 004b7e38 8005d12a
+ 004b7f08 80019114
+
+Showing among other things memory usage & status of some signals &
+the processes'es registers from the kernel task_structure
+as well as a backchain which may be useful if a process crashes
+in the kernel for some unknown reason.
+
+Some driver debugging techniques
+================================
+debug feature
+-------------
+Some of our drivers now support a "debug feature" in
+/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
+for more info.
+
+e.g.
+to switch on the lcs "debug feature"::
+
+ echo 5 > /proc/s390dbf/lcs/level
+
+& then after the error occurred::
+
+ cat /proc/s390dbf/lcs/sprintf >/logfile
+
+the logfile now contains some information which may help
+tech support resolve a problem in the field.
+
+
+
+high level debugging network drivers
+------------------------------------
+ifconfig is a quite useful command
+it gives the current state of network drivers.
+
+If you suspect your network device driver is dead
+one way to check is type::
+
+ ifconfig <network device>
+
+e.g. tr0
+
+You should see something like::
+
+ ifconfig tr0
+ tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
+ inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
+ UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
+ RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
+ collisions:0 txqueuelen:100
+
+if the device doesn't say up
+try::
+
+ /etc/rc.d/init.d/network start
+
+( this starts the network stack & hopefully calls ifconfig tr0 up ).
+ifconfig looks at the output of /proc/net/dev and presents it in a more
+presentable form.
+
+Now ping the device from a machine in the same subnet.
+
+if the RX packets count & TX packets counts don't increment you probably
+have problems.
+
+next::
+
+ cat /proc/net/arp
+
+Do you see any hardware addresses in the cache if not you may have problems.
+Next try::
+
+ ping -c 5 <broadcast_addr>
+
+i.e. the Bcast field above in the output of
+ifconfig. Do you see any replies from machines other than the local machine
+if not you may have problems. also if the TX packets count in ifconfig
+hasn't incremented either you have serious problems in your driver
+(e.g. the txbusy field of the network device being stuck on )
+or you may have multiple network devices connected.
+
+
+chandev
+-------
+There is a new device layer for channel devices, some
+drivers e.g. lcs are registered with this layer.
+
+If the device uses the channel device layer you'll be
+able to find what interrupts it uses & the current state
+of the device.
+
+See the manpage chandev.8 &type cat /proc/chandev for more info.
+
+
+SysRq
+=====
+This is now supported by linux for s/390 & z/Architecture.
+
+To enable it do compile the kernel with::
+
+ Kernel Hacking -> Magic SysRq Key Enabled
+
+Then::
+
+ echo "1" > /proc/sys/kernel/sysrq
+
+also type::
+
+ echo "8" >/proc/sys/kernel/printk
+
+To make printk output go to console.
+
+On 390 all commands are prefixed with::
+
+ ^-
+
+e.g.::
+
+ ^-t will show tasks.
+ ^-? or some unknown command will display help.
+
+The sysrq key reading is very picky ( I have to type the keys in an
+xterm session & paste them into the x3270 console )
+& it may be wise to predefine the keys as described in the VM hints above
+
+This is particularly useful for syncing disks unmounting & rebooting
+if the machine gets partially hung.
+
+Read Documentation/admin-guide/sysrq.rst for more info
+
+References:
+===========
+- Enterprise Systems Architecture Reference Summary
+- Enterprise Systems Architecture Principles of Operation
+- Hartmut Penners s390 stack frame sheet.
+- IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
+- Various bits of man & info pages of Linux.
+- Linux & GDB source.
+- Various info & man pages.
+- CMS Help on tracing commands.
+- Linux for s/390 Elf Application Binary Interface
+- Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
+- z/Architecture Principles of Operation SA22-7832-00
+- Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
+- Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
+
+Special Thanks
+==============
+Special thanks to Neale Ferguson who maintains a much
+prettier HTML version of this page at
+http://linuxvm.org/penguinvm/
+Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.rst
index ed265cf54cde..ad4bc2dbea43 100644
--- a/Documentation/s390/driver-model.txt
+++ b/Documentation/s390/driver-model.rst
@@ -1,5 +1,6 @@
+=============================
S/390 driver model interfaces
------------------------------
+=============================
1. CCW devices
--------------
@@ -7,13 +8,13 @@ S/390 driver model interfaces
All devices which can be addressed by means of ccws are called 'CCW devices' -
even if they aren't actually driven by ccws.
-All ccw devices are accessed via a subchannel, this is reflected in the
-structures under devices/:
+All ccw devices are accessed via a subchannel, this is reflected in the
+structures under devices/::
-devices/
+ devices/
- system/
- css0/
- - 0.0.0000/0.0.0815/
+ - 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
- 0.0.0002/
- 0.1.0000/0.1.1234/
@@ -35,14 +36,18 @@ be found under bus/ccw/devices/.
All ccw devices export some data via sysfs.
-cutype: The control unit type / model.
+cutype:
+ The control unit type / model.
-devtype: The device type / model, if applicable.
+devtype:
+ The device type / model, if applicable.
-availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
+availability:
+ Can be 'good' or 'boxed'; 'no path' or 'no device' for
disconnected devices.
-online: An interface to set the device online and offline.
+online:
+ An interface to set the device online and offline.
In the special case of the device being disconnected (see the
notify function under 1.2), piping 0 to online will forcibly delete
the device.
@@ -52,9 +57,11 @@ The device drivers can add entries to export per-device data and interfaces.
There is also some data exported on a per-subchannel basis (see under
bus/css/devices/):
-chpids: Via which chpids the device is connected.
+chpids:
+ Via which chpids the device is connected.
-pimpampom: The path installed, path available and path operational masks.
+pimpampom:
+ The path installed, path available and path operational masks.
There also might be additional data, for example for block devices.
@@ -74,77 +81,93 @@ b. After a. has been performed, if necessary, the device is finally brought up
------------------------------------
The basic struct ccw_device and struct ccw_driver data structures can be found
-under include/asm/ccwdev.h.
+under include/asm/ccwdev.h::
-struct ccw_device {
- spinlock_t *ccwlock;
- struct ccw_device_private *private;
- struct ccw_device_id id;
+ struct ccw_device {
+ spinlock_t *ccwlock;
+ struct ccw_device_private *private;
+ struct ccw_device_id id;
- struct ccw_driver *drv;
- struct device dev;
+ struct ccw_driver *drv;
+ struct device dev;
int online;
void (*handler) (struct ccw_device *dev, unsigned long intparm,
- struct irb *irb);
-};
+ struct irb *irb);
+ };
-struct ccw_driver {
- struct module *owner;
- struct ccw_device_id *ids;
- int (*probe) (struct ccw_device *);
+ struct ccw_driver {
+ struct module *owner;
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
int (*remove) (struct ccw_device *);
int (*set_online) (struct ccw_device *);
int (*set_offline) (struct ccw_device *);
int (*notify) (struct ccw_device *, int);
struct device_driver driver;
char *name;
-};
+ };
The 'private' field contains data needed for internal i/o operation only, and
is not available to the device driver.
Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
and/or device types/models it is interested. This information can later be found
-in the struct ccw_device_id fields:
+in the struct ccw_device_id fields::
-struct ccw_device_id {
- __u16 match_flags;
+ struct ccw_device_id {
+ __u16 match_flags;
- __u16 cu_type;
- __u16 dev_type;
- __u8 cu_model;
- __u8 dev_model;
+ __u16 cu_type;
+ __u16 dev_type;
+ __u8 cu_model;
+ __u8 dev_model;
unsigned long driver_info;
-};
+ };
The functions in ccw_driver should be used in the following way:
-probe: This function is called by the device layer for each device the driver
+
+probe:
+ This function is called by the device layer for each device the driver
is interested in. The driver should only allocate private structures
to put in dev->driver_data and create attributes (if needed). Also,
the interrupt handler (see below) should be set here.
-int (*probe) (struct ccw_device *cdev);
+::
+
+ int (*probe) (struct ccw_device *cdev);
-Parameters: cdev - the device to be probed.
+Parameters:
+ cdev
+ - the device to be probed.
-remove: This function is called by the device layer upon removal of the driver,
+remove:
+ This function is called by the device layer upon removal of the driver,
the device or the module. The driver should perform cleanups here.
-int (*remove) (struct ccw_device *cdev);
+::
-Parameters: cdev - the device to be removed.
+ int (*remove) (struct ccw_device *cdev);
+Parameters:
+ cdev
+ - the device to be removed.
-set_online: This function is called by the common I/O layer when the device is
+
+set_online:
+ This function is called by the common I/O layer when the device is
activated via the 'online' attribute. The driver should finally
setup and activate the device here.
-int (*set_online) (struct ccw_device *);
+::
+
+ int (*set_online) (struct ccw_device *);
-Parameters: cdev - the device to be activated. The common layer has
+Parameters:
+ cdev
+ - the device to be activated. The common layer has
verified that the device is not already online.
@@ -152,15 +175,22 @@ set_offline: This function is called by the common I/O layer when the device is
de-activated via the 'online' attribute. The driver should shut
down the device, but not de-allocate its private data.
-int (*set_offline) (struct ccw_device *);
+::
-Parameters: cdev - the device to be deactivated. The common layer has
+ int (*set_offline) (struct ccw_device *);
+
+Parameters:
+ cdev
+ - the device to be deactivated. The common layer has
verified that the device is online.
-notify: This function is called by the common I/O layer for some state changes
+notify:
+ This function is called by the common I/O layer for some state changes
of the device.
+
Signalled to the driver are:
+
* In online state, device detached (CIO_GONE) or last path gone
(CIO_NO_PATH). The driver must return !0 to keep the device; for
return code 0, the device will be deleted as usual (also when no
@@ -173,32 +203,40 @@ notify: This function is called by the common I/O layer for some state changes
return code of the notify function the device driver signals if it
wants the device back: !0 for keeping, 0 to make the device being
removed and re-registered.
-
-int (*notify) (struct ccw_device *, int);
-Parameters: cdev - the device whose state changed.
- event - the event that happened. This can be one of CIO_GONE,
- CIO_NO_PATH or CIO_OPER.
+::
+
+ int (*notify) (struct ccw_device *, int);
+
+Parameters:
+ cdev
+ - the device whose state changed.
+
+ event
+ - the event that happened. This can be one of CIO_GONE,
+ CIO_NO_PATH or CIO_OPER.
The handler field of the struct ccw_device is meant to be set to the interrupt
-handler for the device. In order to accommodate drivers which use several
+handler for the device. In order to accommodate drivers which use several
distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
instead of ccw_driver.
The handler is registered with the common layer during set_online() processing
before the driver is called, and is deregistered during set_offline() after the
-driver has been called. Also, after registering / before deregistering, path
+driver has been called. Also, after registering / before deregistering, path
grouping resp. disbanding of the path group (if applicable) are performed.
-void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+::
-Parameters: dev - the device the handler is called for
+ void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+
+Parameters: dev - the device the handler is called for
intparm - the intparm which allows the device driver to identify
- the i/o the interrupt is associated with, or to recognize
- the interrupt as unsolicited.
- irb - interruption response block which contains the accumulated
- status.
+ the i/o the interrupt is associated with, or to recognize
+ the interrupt as unsolicited.
+ irb - interruption response block which contains the accumulated
+ status.
-The device driver is called from the common ccw_device layer and can retrieve
+The device driver is called from the common ccw_device layer and can retrieve
information about the interrupt from the irb parameter.
@@ -237,23 +275,27 @@ only the logical state and not the physical state, since we cannot track the
latter consistently due to lacking machine support (we don't need to be aware
of it anyway).
-status - Can be 'online' or 'offline'.
+status
+ - Can be 'online' or 'offline'.
Piping 'on' or 'off' sets the chpid logically online/offline.
Piping 'on' to an online chpid triggers path reprobing for all devices
the chpid connects to. This can be used to force the kernel to re-use
a channel path the user knows to be online, but the machine hasn't
created a machine check for.
-type - The physical type of the channel path.
+type
+ - The physical type of the channel path.
-shared - Whether the channel path is shared.
+shared
+ - Whether the channel path is shared.
-cmg - The channel measurement group.
+cmg
+ - The channel measurement group.
3. System devices
-----------------
-3.1 xpram
+3.1 xpram
---------
xpram shows up under devices/system/ as 'xpram'.
@@ -279,9 +321,8 @@ Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interfa
number is assigned sequentially to the connections defined via the 'connection'
attribute.
-user - shows the connection partner.
-
-buffer - maximum buffer size.
- Pipe to it to change buffer size.
-
+user
+ - shows the connection partner.
+buffer
+ - maximum buffer size. Pipe to it to change buffer size.
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
new file mode 100644
index 000000000000..1a914da2a07b
--- /dev/null
+++ b/Documentation/s390/index.rst
@@ -0,0 +1,30 @@
+:orphan:
+
+=================
+s390 Architecture
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ cds
+ 3270
+ debugging390
+ driver-model
+ monreader
+ qeth
+ s390dbf
+ vfio-ap
+ vfio-ccw
+ zfcpdump
+ dasd
+ common_io
+
+ text_files
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/s390/monreader.txt b/Documentation/s390/monreader.rst
index d3729585fdb0..1e857575c113 100644
--- a/Documentation/s390/monreader.txt
+++ b/Documentation/s390/monreader.rst
@@ -1,24 +1,26 @@
+=================================================
+Linux API for read access to z/VM Monitor Records
+=================================================
Date : 2004-Nov-26
+
Author: Gerald Schaefer (geraldsc@de.ibm.com)
- Linux API for read access to z/VM Monitor Records
- =================================================
Description
===========
This item delivers a new Linux API in the form of a misc char device that is
usable from user space and allows read access to the z/VM Monitor Records
-collected by the *MONITOR System Service of z/VM.
+collected by the `*MONITOR` System Service of z/VM.
User Requirements
=================
The z/VM guest on which you want to access this API needs to be configured in
-order to allow IUCV connections to the *MONITOR service, i.e. it needs the
-IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is
+order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
+IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
This item will use the IUCV device driver to access the z/VM services, so you
need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
@@ -50,7 +52,9 @@ Your guest virtual storage has to end below the starting address of the DCSS
and you have to specify the "mem=" kernel parameter in your parmfile with a
value greater than the ending address of the DCSS.
-Example: DEF STOR 140M
+Example::
+
+ DEF STOR 140M
This defines 140MB storage size for your guest, the parameter "mem=160M" is
added to the parmfile.
@@ -66,24 +70,27 @@ kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
in the parmfile.
The default name for the DCSS is "MONDCSS" if none is specified. In case that
-there are other users already connected to the *MONITOR service (e.g.
+there are other users already connected to the `*MONITOR` service (e.g.
Performance Toolkit), the monitor DCSS is already defined and you have to use
the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
of the monitor DCSS, if already defined, and the users connected to the
-*MONITOR service.
+`*MONITOR` service.
Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
DCSS if your z/VM doesn't have one already, you need Class E privileges to
define and save a DCSS.
Example:
--------
-modprobe monreader mondcss=MYDCSS
+
+::
+
+ modprobe monreader mondcss=MYDCSS
This loads the module and sets the DCSS name to "MYDCSS".
NOTE:
-----
-This API provides no interface to control the *MONITOR service, e.g. specify
+This API provides no interface to control the `*MONITOR` service, e.g. specify
which data should be collected. This can be done by the CP command MONITOR
(Class E privileged), see "CP Command and Utility Reference".
@@ -98,6 +105,7 @@ If your distribution does not support udev, a device node will not be created
automatically and you have to create it manually after loading the module.
Therefore you need to know the major and minor numbers of the device. These
numbers can be found in /sys/class/misc/monreader/dev.
+
Typing cat /sys/class/misc/monreader/dev will give an output of the form
<major>:<minor>. The device node can be created via the mknod command, enter
mknod <name> c <major> <minor>, where <name> is the name of the device node
@@ -105,10 +113,13 @@ to be created.
Example:
--------
-# modprobe monreader
-# cat /sys/class/misc/monreader/dev
-10:63
-# mknod /dev/monreader c 10 63
+
+::
+
+ # modprobe monreader
+ # cat /sys/class/misc/monreader/dev
+ 10:63
+ # mknod /dev/monreader c 10 63
This loads the module with the default monitor DCSS (MONDCSS) and creates a
device node.
@@ -133,20 +144,21 @@ last byte of data. The start address is needed to handle "end-of-frame" records
correctly (domain 1, record 13), i.e. it can be used to determine the record
start offset relative to a 4K page (frame) boundary.
-See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description
+See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
of the monitor control element layout. The layout of the monitor records can
be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
-The layout of the data stream provided by the monreader device is as follows:
-...
-<0 byte read>
-<first MCE> \
-<first set of records> |
-... |- data set
-<last MCE> |
-<last set of records> /
-<0 byte read>
-...
+The layout of the data stream provided by the monreader device is as follows::
+
+ ...
+ <0 byte read>
+ <first MCE> \
+ <first set of records> |
+ ... |- data set
+ <last MCE> |
+ <last set of records> /
+ <0 byte read>
+ ...
There may be more than one combination of MCE and corresponding record set
within one data set and the end of each data set is indicated by a successful
@@ -165,15 +177,19 @@ As with most char devices, error conditions are indicated by returning a
negative value for the number of bytes read. In this case, the errno variable
indicates the error condition:
-EIO: reply failed, read data is invalid and the application
+EIO:
+ reply failed, read data is invalid and the application
should discard the data read since the last successful read with 0 size.
-EFAULT: copy_to_user failed, read data is invalid and the application should
- discard the data read since the last successful read with 0 size.
-EAGAIN: occurs on a non-blocking read if there is no data available at the
- moment. There is no data missing or corrupted, just try again or rather
- use polling for non-blocking reads.
-EOVERFLOW: message limit reached, the data read since the last successful
- read with 0 size is valid but subsequent records may be missing.
+EFAULT:
+ copy_to_user failed, read data is invalid and the application should
+ discard the data read since the last successful read with 0 size.
+EAGAIN:
+ occurs on a non-blocking read if there is no data available at the
+ moment. There is no data missing or corrupted, just try again or rather
+ use polling for non-blocking reads.
+EOVERFLOW:
+ message limit reached, the data read since the last successful
+ read with 0 size is valid but subsequent records may be missing.
In the last case (EOVERFLOW) there may be missing data, in the first two cases
(EIO, EFAULT) there will be missing data. It's up to the application if it will
@@ -183,7 +199,7 @@ Open:
-----
Only one user is allowed to open the char device. If it is already in use, the
open function will fail (return a negative value) and set errno to EBUSY.
-The open function may also fail if an IUCV connection to the *MONITOR service
+The open function may also fail if an IUCV connection to the `*MONITOR` service
cannot be established. In this case errno will be set to EIO and an error
message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
codes are described in the "z/VM Performance" book, Appendix A.
@@ -194,4 +210,3 @@ As soon as the device is opened, incoming messages will be accepted and they
will account for the message limit, i.e. opening the device without reading
from it will provoke the "message limit reached" error (EOVERFLOW error code)
eventually.
-
diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.rst
index aa06fcf5f8c2..f02fdaa68de0 100644
--- a/Documentation/s390/qeth.txt
+++ b/Documentation/s390/qeth.rst
@@ -1,8 +1,12 @@
+=============================
IBM s390 QDIO Ethernet Driver
+=============================
OSA and HiperSockets Bridge Port Support
+========================================
Uevents
+-------
To generate the events the device must be assigned a role of either
a primary or a secondary Bridge Port. For more information, see
@@ -13,12 +17,15 @@ of some configured Bridge Port device on the channel changes, a udev
event with ACTION=CHANGE is emitted on behalf of the corresponding
ccwgroup device. The event has the following attributes:
-BRIDGEPORT=statechange - indicates that the Bridge Port device changed
+BRIDGEPORT=statechange
+ indicates that the Bridge Port device changed
its state.
-ROLE={primary|secondary|none} - the role assigned to the port.
+ROLE={primary|secondary|none}
+ the role assigned to the port.
-STATE={active|standby|inactive} - the newly assumed state of the port.
+STATE={active|standby|inactive}
+ the newly assumed state of the port.
When run on HiperSockets Bridge Capable Port hardware with host address
notifications enabled, a udev event with ACTION=CHANGE is emitted.
@@ -26,25 +33,32 @@ It is emitted on behalf of the corresponding ccwgroup device when a host
or a VLAN is registered or unregistered on the network served by the device.
The event has the following attributes:
-BRIDGEDHOST={reset|register|deregister|abort} - host address
+BRIDGEDHOST={reset|register|deregister|abort}
+ host address
notifications are started afresh, a new host or VLAN is registered or
deregistered on the Bridge Port HiperSockets channel, or address
notifications are aborted.
-VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included
+VLAN=numeric-vlan-id
+ VLAN ID on which the event occurred. Not included
if no VLAN is involved in the event.
-MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered
+MAC=xx:xx:xx:xx:xx:xx
+ MAC address of the host that is being registered
or deregistered from the HiperSockets channel. Not reported if the
event reports the creation or destruction of a VLAN.
-NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number).
+NTOK_BUSID=x.y.zzzz
+ device bus ID (CSSID, SSID and device number).
-NTOK_IID=xx - device IID.
+NTOK_IID=xx
+ device IID.
-NTOK_CHPID=xx - device CHPID.
+NTOK_CHPID=xx
+ device CHPID.
-NTOK_CHID=xxxx - device channel ID.
+NTOK_CHID=xxxx
+ device channel ID.
-Note that the NTOK_* attributes refer to devices other than the one
+Note that the `NTOK_*` attributes refer to devices other than the one
connected to the system on which the OS is running.
diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst
new file mode 100644
index 000000000000..cdb36842b898
--- /dev/null
+++ b/Documentation/s390/s390dbf.rst
@@ -0,0 +1,487 @@
+==================
+S390 Debug Feature
+==================
+
+files:
+ - arch/s390/kernel/debug.c
+ - arch/s390/include/asm/debug.h
+
+Description:
+------------
+The goal of this feature is to provide a kernel debug logging API
+where log records can be stored efficiently in memory, where each component
+(e.g. device drivers) can have one separate debug log.
+One purpose of this is to inspect the debug logs after a production system crash
+in order to analyze the reason for the crash.
+
+If the system still runs but only a subcomponent which uses dbf fails,
+it is possible to look at the debug logs on a live system via the Linux
+debugfs filesystem.
+
+The debug feature may also very useful for kernel and driver development.
+
+Design:
+-------
+Kernel components (e.g. device drivers) can register themselves at the debug
+feature with the function call :c:func:`debug_register()`.
+This function initializes a
+debug log for the caller. For each debug log exists a number of debug areas
+where exactly one is active at one time. Each debug area consists of contiguous
+pages in memory. In the debug areas there are stored debug entries (log records)
+which are written by event- and exception-calls.
+
+An event-call writes the specified debug entry to the active debug
+area and updates the log pointer for the active area. If the end
+of the active debug area is reached, a wrap around is done (ring buffer)
+and the next debug entry will be written at the beginning of the active
+debug area.
+
+An exception-call writes the specified debug entry to the log and
+switches to the next debug area. This is done in order to be sure
+that the records which describe the origin of the exception are not
+overwritten when a wrap around for the current area occurs.
+
+The debug areas themselves are also ordered in form of a ring buffer.
+When an exception is thrown in the last debug area, the following debug
+entries are then written again in the very first area.
+
+There are four versions for the event- and exception-calls: One for
+logging raw data, one for text, one for numbers (unsigned int and long),
+and one for sprintf-like formatted strings.
+
+Each debug entry contains the following data:
+
+- Timestamp
+- Cpu-Number of calling task
+- Level of debug entry (0...6)
+- Return Address to caller
+- Flag, if entry is an exception or not
+
+The debug logs can be inspected in a live system through entries in
+the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
+a directory for each registered component, which is named like the
+corresponding component. The debugfs normally should be mounted to
+``/sys/kernel/debug`` therefore the debug feature can be accessed under
+``/sys/kernel/debug/s390dbf``.
+
+The content of the directories are files which represent different views
+to the debug log. Each component can decide which views should be
+used through registering them with the function :c:func:`debug_register_view()`.
+Predefined views for hex/ascii, sprintf and raw binary data are provided.
+It is also possible to define other views. The content of
+a view can be inspected simply by reading the corresponding debugfs file.
+
+All debug logs have an actual debug level (range from 0 to 6).
+The default level is 3. Event and Exception functions have a :c:data:`level`
+parameter. Only debug entries with a level that is lower or equal
+than the actual level are written to the log. This means, when
+writing events, high priority log entries should have a low level
+value whereas low priority entries should have a high one.
+The actual debug level can be changed with the help of the debugfs-filesystem
+through writing a number string "x" to the ``level`` debugfs file which is
+provided for every debug log. Debugging can be switched off completely
+by using "-" on the ``level`` debugfs file.
+
+Example::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/level
+
+It is also possible to deactivate the debug feature globally for every
+debug log. You can change the behavior using 2 sysctl parameters in
+``/proc/sys/s390dbf``:
+
+There are currently 2 possible triggers, which stop the debug feature
+globally. The first possibility is to use the ``debug_active`` sysctl. If
+set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
+debug feature is turned off.
+
+The second trigger which stops the debug feature is a kernel oops.
+That prevents the debug feature from overwriting debug information that
+happened before the oops. After an oops you can reactivate the debug feature
+by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
+suggested to use an oopsed kernel in a production environment.
+
+If you want to disallow the deactivation of the debug feature, you can use
+the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
+feature cannot be stopped. If the debug feature is already stopped, it
+will stay deactivated.
+
+Kernel Interfaces:
+------------------
+
+.. kernel-doc:: arch/s390/kernel/debug.c
+.. kernel-doc:: arch/s390/include/asm/debug.h
+
+Predefined views:
+-----------------
+
+.. code-block:: c
+
+ extern struct debug_view debug_hex_ascii_view;
+
+ extern struct debug_view debug_raw_view;
+
+ extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+.. code-block:: c
+
+ /*
+ * hex_ascii- + raw-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and 4 byte data field */
+
+ debug_info = debug_register("test", 1, 4, 4 );
+ debug_register_view(debug_info, &debug_hex_ascii_view);
+ debug_register_view(debug_info, &debug_raw_view);
+
+ debug_text_event(debug_info, 4 , "one ");
+ debug_int_exception(debug_info, 4, 4711);
+ debug_event(debug_info, 3, &debug_info, 4);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+.. code-block:: c
+
+ /*
+ * sprintf-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and data field for */
+ /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
+
+ debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
+ debug_register_view(debug_info, &debug_sprintf_view);
+
+ debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+ debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+Debugfs Interface
+-----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example::
+
+ > ls /sys/kernel/debug/s390dbf/dasd
+ flush hex_ascii level pages raw
+ > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
+ 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
+ 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
+ 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
+ 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
+ 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
+ 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
+ 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
+ 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example::
+
+
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 3
+ > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0::
+
+ > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd"::
+
+ > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stopping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed::
+
+ > cat /proc/sys/s390dbf/debug_stoppable
+
+2. Stop debug feature::
+
+ > echo 0 > /proc/sys/s390dbf/debug_active
+
+crash Interface
+----------------
+The ``crash`` tool since v5.1.0 has a built-in command
+``s390dbf`` to display all the debug logs or export them to the file system.
+With this tool it is possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anchor of the debug-logs through
+the ``debug_area_first`` symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in :c:func:`debug_register()` in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are three predefined views: hex_ascii, raw and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. ``45 43 4b 44 | ECKD``).
+The raw view returns a bytestream as the debug areas are stored in memory.
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT:
+ Using "%s" in sprintf event functions is dangerous. You can only
+ use "%s" in the sprintf event functions, if the memory for the passed string
+ is available as long as the debug feature exists. The reason behind this is
+ that due to performance considerations only a pointer to the string is stored
+ in the debug feature. If you log a string that is freed afterwards, you will
+ get an OOPS when inspecting the debug feature, because then the debug feature
+ will access the already freed memory.
+
+NOTE:
+ If using the sprintf view do NOT use other event/exception functions
+ than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+ Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+The format of the raw view is:
+
+- Header as described in debug.h
+- datafield
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view)::
+
+ area time level exception cpu caller data (hex + ascii)
+ --------------------------------------------------------------------------
+ 00 00964419409:440690 1 - 00 88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files:
+
+.. code-block:: c
+
+ struct debug_view {
+ char name[DEBUG_MAX_PROCF_LEN];
+ debug_prolog_proc_t* prolog_proc;
+ debug_header_proc_t* header_proc;
+ debug_format_proc_t* format_proc;
+ debug_input_proc_t* input_proc;
+ void* private_data;
+ };
+
+where:
+
+.. code-block:: c
+
+ typedef int (debug_header_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ int area,
+ debug_entry_t* entry,
+ char* out_buf);
+
+ typedef int (debug_format_proc_t) (debug_info_t* id,
+ struct debug_view* view, char* out_buf,
+ const char* in_buf);
+ typedef int (debug_prolog_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ char* out_buf);
+ typedef int (debug_input_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ struct file* file, const char* user_buf,
+ size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this::
+
+ "prolog_proc output"
+
+ "header_proc output 1" "format_proc output 1"
+ "header_proc output 2" "format_proc output 2"
+ "header_proc output 3" "format_proc output 3"
+ ...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
+
+For header_proc there can be used the default function
+:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g::
+
+ 00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example:
+
+.. code-block:: c
+
+ #include <asm/debug.h>
+
+ #define UNKNOWNSTR "data: %08x"
+
+ const char* messages[] =
+ {"This error...........\n",
+ "That error...........\n",
+ "Problem..............\n",
+ "Something went wrong.\n",
+ "Everything ok........\n",
+ NULL
+ };
+
+ static int debug_test_format_fn(
+ debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf
+ )
+ {
+ int i, rc = 0;
+
+ if (id->buf_size >= 4) {
+ int msg_nr = *((int*)in_buf);
+ if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
+ rc += sprintf(out_buf, "%s", messages[msg_nr]);
+ else
+ rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+ }
+ return rc;
+ }
+
+ struct debug_view debug_test_view = {
+ "myview", /* name of view */
+ NULL, /* no prolog */
+ &debug_dflt_header_fn, /* default header for each entry */
+ &debug_test_format_fn, /* our own format function */
+ NULL, /* no input function */
+ NULL /* no private data */
+ };
+
+test:
+=====
+
+.. code-block:: c
+
+ debug_info_t *debug_info;
+ int i;
+ ...
+ debug_info = debug_register("test", 0, 4, 4);
+ debug_register_view(debug_info, &debug_test_view);
+ for (i = 0; i < 10; i ++)
+ debug_int_event(debug_info, 1, i);
+
+::
+
+ > cat /sys/kernel/debug/s390dbf/test/myview
+ 00 00964419734:611402 1 - 00 88042ca This error...........
+ 00 00964419734:611405 1 - 00 88042ca That error...........
+ 00 00964419734:611408 1 - 00 88042ca Problem..............
+ 00 00964419734:611411 1 - 00 88042ca Something went wrong.
+ 00 00964419734:611414 1 - 00 88042ca Everything ok........
+ 00 00964419734:611417 1 - 00 88042ca data: 00000005
+ 00 00964419734:611419 1 - 00 88042ca data: 00000006
+ 00 00964419734:611422 1 - 00 88042ca data: 00000007
+ 00 00964419734:611425 1 - 00 88042ca data: 00000008
+ 00 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
deleted file mode 100644
index 61329fd62e89..000000000000
--- a/Documentation/s390/s390dbf.txt
+++ /dev/null
@@ -1,667 +0,0 @@
-S390 Debug Feature
-==================
-
-files: arch/s390/kernel/debug.c
- arch/s390/include/asm/debug.h
-
-Description:
-------------
-The goal of this feature is to provide a kernel debug logging API
-where log records can be stored efficiently in memory, where each component
-(e.g. device drivers) can have one separate debug log.
-One purpose of this is to inspect the debug logs after a production system crash
-in order to analyze the reason for the crash.
-If the system still runs but only a subcomponent which uses dbf fails,
-it is possible to look at the debug logs on a live system via the Linux
-debugfs filesystem.
-The debug feature may also very useful for kernel and driver development.
-
-Design:
--------
-Kernel components (e.g. device drivers) can register themselves at the debug
-feature with the function call debug_register(). This function initializes a
-debug log for the caller. For each debug log exists a number of debug areas
-where exactly one is active at one time. Each debug area consists of contiguous
-pages in memory. In the debug areas there are stored debug entries (log records)
-which are written by event- and exception-calls.
-
-An event-call writes the specified debug entry to the active debug
-area and updates the log pointer for the active area. If the end
-of the active debug area is reached, a wrap around is done (ring buffer)
-and the next debug entry will be written at the beginning of the active
-debug area.
-
-An exception-call writes the specified debug entry to the log and
-switches to the next debug area. This is done in order to be sure
-that the records which describe the origin of the exception are not
-overwritten when a wrap around for the current area occurs.
-
-The debug areas themselves are also ordered in form of a ring buffer.
-When an exception is thrown in the last debug area, the following debug
-entries are then written again in the very first area.
-
-There are three versions for the event- and exception-calls: One for
-logging raw data, one for text and one for numbers.
-
-Each debug entry contains the following data:
-
-- Timestamp
-- Cpu-Number of calling task
-- Level of debug entry (0...6)
-- Return Address to caller
-- Flag, if entry is an exception or not
-
-The debug logs can be inspected in a live system through entries in
-the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
-a directory for each registered component, which is named like the
-corresponding component. The debugfs normally should be mounted to
-/sys/kernel/debug therefore the debug feature can be accessed under
-/sys/kernel/debug/s390dbf.
-
-The content of the directories are files which represent different views
-to the debug log. Each component can decide which views should be
-used through registering them with the function debug_register_view().
-Predefined views for hex/ascii, sprintf and raw binary data are provided.
-It is also possible to define other views. The content of
-a view can be inspected simply by reading the corresponding debugfs file.
-
-All debug logs have an actual debug level (range from 0 to 6).
-The default level is 3. Event and Exception functions have a 'level'
-parameter. Only debug entries with a level that is lower or equal
-than the actual level are written to the log. This means, when
-writing events, high priority log entries should have a low level
-value whereas low priority entries should have a high one.
-The actual debug level can be changed with the help of the debugfs-filesystem
-through writing a number string "x" to the 'level' debugfs file which is
-provided for every debug log. Debugging can be switched off completely
-by using "-" on the 'level' debugfs file.
-
-Example:
-
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
-
-It is also possible to deactivate the debug feature globally for every
-debug log. You can change the behavior using 2 sysctl parameters in
-/proc/sys/s390dbf:
-There are currently 2 possible triggers, which stop the debug feature
-globally. The first possibility is to use the "debug_active" sysctl. If
-set to 1 the debug feature is running. If "debug_active" is set to 0 the
-debug feature is turned off.
-The second trigger which stops the debug feature is a kernel oops.
-That prevents the debug feature from overwriting debug information that
-happened before the oops. After an oops you can reactivate the debug feature
-by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
-suggested to use an oopsed kernel in a production environment.
-If you want to disallow the deactivation of the debug feature, you can use
-the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
-feature cannot be stopped. If the debug feature is already stopped, it
-will stay deactivated.
-
-Kernel Interfaces:
-------------------
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register(char *name, int pages, int nr_areas,
- int buf_size);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: number of pages, which will be allocated per area
- nr_areas: number of debug areas
- buf_size: size of data area in each debug entry
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
- int buf_size, mode_t mode, uid_t uid,
- gid_t gid);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: Number of pages, which will be allocated per area
- nr_areas: Number of debug areas
- buf_size: Size of data area in each debug entry
- mode: File mode for debugfs files. E.g. S_IRWXUGO
- uid: User ID for debugfs files. Currently only 0 is
- supported.
- gid: Group ID for debugfs files. Currently only 0 is
- supported.
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-void debug_unregister (debug_info_t * id);
-
-Parameter: id: handle for debug log
-
-Return Value: none
-
-Description: frees memory for a debug log and removes all registered debug
- views.
- Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-void debug_set_level (debug_info_t * id, int new_level);
-
-Parameter: id: handle for debug log
- new_level: new debug level
-
-Return Value: none
-
-Description: Sets new actual debug level if new_level is valid.
-
----------------------------------------------------------------------------
-bool debug_level_enabled (debug_info_t * id, int level);
-
-Parameter: id: handle for debug log
- level: debug level
-
-Return Value: True if level is less or equal to the current debug level.
-
-Description: Returns true if debug events for the specified level would be
- logged. Otherwise returns false.
----------------------------------------------------------------------------
-void debug_stop_all(void);
-
-Parameter: none
-
-Return Value: none
-
-Description: stops the debug feature if stopping is allowed. Currently
- used in case of a kernel oops.
-
----------------------------------------------------------------------------
-debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
- int length);
-
-Parameter: id: handle for debug log
- level: debug level
- data: pointer to data for debug entry
- length: length of data in bytes
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_event (debug_info_t * id, int level,
- unsigned int data);
-debug_entry_t* debug_long_event(debug_info_t * id, int level,
- unsigned long data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: integer value for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_event (debug_info_t * id, int level,
- const char* data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: string for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry in ascii format to active debug area
- (if level <= actual debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
- char* string,...);
-
-Parameter: id: handle for debug log
- level: debug level
- string: format string for debug entry
- ...: varargs used as in sprintf()
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry with format string and varargs (longs) to
- active debug area (if level $<=$ actual debug level).
- floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
- int length);
-
-Parameter: id: handle for debug log
- level: debug level
- data: pointer to data for debug entry
- length: length of data in bytes
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_exception (debug_info_t * id, int level,
- unsigned int data);
-debug_entry_t* debug_long_exception(debug_info_t * id, int level,
- unsigned long data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: integer value for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_exception (debug_info_t * id, int level,
- const char* data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: string for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry in ascii format to active debug area
- (if level <= actual debug level) and switches to next debug
- area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
- char* string,...);
-
-Parameter: id: handle for debug log
- level: debug level
- string: format string for debug entry
- ...: varargs used as in sprintf()
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry with format string and varargs (longs) to
- active debug area (if level $<=$ actual debug level) and
- switches to next debug area.
- floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-int debug_register_view (debug_info_t * id, struct debug_view *view);
-
-Parameter: id: handle for debug log
- view: pointer to debug view struct
-
-Return Value: 0 : ok
- < 0: Error
-
-Description: registers new debug view and creates debugfs dir entry
-
----------------------------------------------------------------------------
-int debug_unregister_view (debug_info_t * id, struct debug_view *view);
-
-Parameter: id: handle for debug log
- view: pointer to debug view struct
-
-Return Value: 0 : ok
- < 0: Error
-
-Description: unregisters debug view and removes debugfs dir entry
-
-
-
-Predefined views:
------------------
-
-extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
-extern struct debug_view debug_sprintf_view;
-
-Examples
---------
-
-/*
- * hex_ascii- + raw-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
- /* register 4 debug areas with one page each and 4 byte data field */
-
- debug_info = debug_register ("test", 1, 4, 4 );
- debug_register_view(debug_info,&debug_hex_ascii_view);
- debug_register_view(debug_info,&debug_raw_view);
-
- debug_text_event(debug_info, 4 , "one ");
- debug_int_exception(debug_info, 4, 4711);
- debug_event(debug_info, 3, &debug_info, 4);
-
- return 0;
-}
-
-static void cleanup(void)
-{
- debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
----------------------------------------------------------------------------
-
-/*
- * sprintf-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
- /* register 4 debug areas with one page each and data field for */
- /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
-
- debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
- debug_register_view(debug_info,&debug_sprintf_view);
-
- debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
- debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
-
- return 0;
-}
-
-static void cleanup(void)
-{
- debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
-
-
-Debugfs Interface
-----------------
-Views to the debug logs can be investigated through reading the corresponding
-debugfs-files:
-
-Example:
-
-> ls /sys/kernel/debug/s390dbf/dasd
-flush hex_ascii level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
-00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
-00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
-00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
-00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
-01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
-01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
-01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
-01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
-01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
-01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
-
-See section about predefined views for explanation of the above output!
-
-Changing the debug level
-------------------------
-
-Example:
-
-
-> cat /sys/kernel/debug/s390dbf/dasd/level
-3
-> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
-> cat /sys/kernel/debug/s390dbf/dasd/level
-5
-
-Flushing debug areas
---------------------
-Debug areas can be flushed with piping the number of the desired
-area (0...n) to the debugfs file "flush". When using "-" all debug areas
-are flushed.
-
-Examples:
-
-1. Flush debug area 0:
-> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
-
-2. Flush all debug areas:
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
-
-Changing the size of debug areas
-------------------------------------
-It is possible the change the size of debug areas through piping
-the number of pages to the debugfs file "pages". The resize request will
-also flush the debug areas.
-
-Example:
-
-Define 4 pages for the debug areas of debug feature "dasd":
-> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
-
-Stooping the debug feature
---------------------------
-Example:
-
-1. Check if stopping is allowed
-> cat /proc/sys/s390dbf/debug_stoppable
-2. Stop debug feature
-> echo 0 > /proc/sys/s390dbf/debug_active
-
-lcrash Interface
-----------------
-It is planned that the dump analysis tool lcrash gets an additional command
-'s390dbf' to display all the debug logs. With this tool it will be possible
-to investigate the debug logs on a live system and with a memory dump after
-a system crash.
-
-Investigating raw memory
-------------------------
-One last possibility to investigate the debug logs at a live
-system and after a system crash is to look at the raw memory
-under VM or at the Service Element.
-It is possible to find the anker of the debug-logs through
-the 'debug_area_first' symbol in the System map. Then one has
-to follow the correct pointers of the data-structures defined
-in debug.h and find the debug-areas in memory.
-Normally modules which use the debug feature will also have
-a global variable with the pointer to the debug-logs. Following
-this pointer it will also be possible to find the debug logs in
-memory.
-
-For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
-for the length of the data field in debug_register() in
-order to see the debug entries well formatted.
-
-
-Predefined Views
-----------------
-
-There are three predefined views: hex_ascii, raw and sprintf.
-The hex_ascii view shows the data field in hex and ascii representation
-(e.g. '45 43 4b 44 | ECKD').
-The raw view returns a bytestream as the debug areas are stored in memory.
-
-The sprintf view formats the debug entries in the same way as the sprintf
-function would do. The sprintf event/exception functions write to the
-debug entry a pointer to the format string (size = sizeof(long))
-and for each vararg a long value. So e.g. for a debug entry with a format
-string plus two varargs one would need to allocate a (3 * sizeof(long))
-byte data area in the debug_register() function.
-
-IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
-use "%s" in the sprintf event functions, if the memory for the passed string is
-available as long as the debug feature exists. The reason behind this is that
-due to performance considerations only a pointer to the string is stored in
-the debug feature. If you log a string that is freed afterwards, you will get
-an OOPS when inspecting the debug feature, because then the debug feature will
-access the already freed memory.
-
-NOTE: If using the sprintf view do NOT use other event/exception functions
-than the sprintf-event and -exception functions.
-
-The format of the hex_ascii and sprintf view is as follows:
-- Number of area
-- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
- Universal Time (UTC), January 1, 1970)
-- level of debug entry
-- Exception flag (* = Exception)
-- Cpu-Number of calling task
-- Return Address to caller
-- data field
-
-The format of the raw view is:
-- Header as described in debug.h
-- datafield
-
-A typical line of the hex_ascii view will look like the following (first line
-is only for explanation and will not be displayed when 'cating' the view):
-
-area time level exception cpu caller data (hex + ascii)
---------------------------------------------------------------------------
-00 00964419409:440690 1 - 00 88023fe
-
-
-Defining views
---------------
-
-Views are specified with the 'debug_view' structure. There are defined
-callback functions which are used for reading and writing the debugfs files:
-
-struct debug_view {
- char name[DEBUG_MAX_PROCF_LEN];
- debug_prolog_proc_t* prolog_proc;
- debug_header_proc_t* header_proc;
- debug_format_proc_t* format_proc;
- debug_input_proc_t* input_proc;
- void* private_data;
-};
-
-where
-
-typedef int (debug_header_proc_t) (debug_info_t* id,
- struct debug_view* view,
- int area,
- debug_entry_t* entry,
- char* out_buf);
-
-typedef int (debug_format_proc_t) (debug_info_t* id,
- struct debug_view* view, char* out_buf,
- const char* in_buf);
-typedef int (debug_prolog_proc_t) (debug_info_t* id,
- struct debug_view* view,
- char* out_buf);
-typedef int (debug_input_proc_t) (debug_info_t* id,
- struct debug_view* view,
- struct file* file, const char* user_buf,
- size_t in_buf_size, loff_t* offset);
-
-
-The "private_data" member can be used as pointer to view specific data.
-It is not used by the debug feature itself.
-
-The output when reading a debugfs file is structured like this:
-
-"prolog_proc output"
-
-"header_proc output 1" "format_proc output 1"
-"header_proc output 2" "format_proc output 2"
-"header_proc output 3" "format_proc output 3"
-...
-
-When a view is read from the debugfs, the Debug Feature calls the
-'prolog_proc' once for writing the prolog.
-Then 'header_proc' and 'format_proc' are called for each
-existing debug entry.
-
-The input_proc can be used to implement functionality when it is written to
-the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
-
-For header_proc there can be used the default function
-debug_dflt_header_fn() which is defined in debug.h.
-and which produces the same header output as the predefined views.
-E.g:
-00 00964419409:440761 2 - 00 88023ec
-
-In order to see how to use the callback functions check the implementation
-of the default views!
-
-Example
-
-#include <asm/debug.h>
-
-#define UNKNOWNSTR "data: %08x"
-
-const char* messages[] =
-{"This error...........\n",
- "That error...........\n",
- "Problem..............\n",
- "Something went wrong.\n",
- "Everything ok........\n",
- NULL
-};
-
-static int debug_test_format_fn(
- debug_info_t * id, struct debug_view *view,
- char *out_buf, const char *in_buf
-)
-{
- int i, rc = 0;
-
- if(id->buf_size >= 4) {
- int msg_nr = *((int*)in_buf);
- if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
- rc += sprintf(out_buf, "%s", messages[msg_nr]);
- else
- rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
- }
- out:
- return rc;
-}
-
-struct debug_view debug_test_view = {
- "myview", /* name of view */
- NULL, /* no prolog */
- &debug_dflt_header_fn, /* default header for each entry */
- &debug_test_format_fn, /* our own format function */
- NULL, /* no input function */
- NULL /* no private data */
-};
-
-=====
-test:
-=====
-debug_info_t *debug_info;
-...
-debug_info = debug_register ("test", 0, 4, 4 ));
-debug_register_view(debug_info, &debug_test_view);
-for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
-
-> cat /sys/kernel/debug/s390dbf/test/myview
-00 00964419734:611402 1 - 00 88042ca This error...........
-00 00964419734:611405 1 - 00 88042ca That error...........
-00 00964419734:611408 1 - 00 88042ca Problem..............
-00 00964419734:611411 1 - 00 88042ca Something went wrong.
-00 00964419734:611414 1 - 00 88042ca Everything ok........
-00 00964419734:611417 1 - 00 88042ca data: 00000005
-00 00964419734:611419 1 - 00 88042ca data: 00000006
-00 00964419734:611422 1 - 00 88042ca data: 00000007
-00 00964419734:611425 1 - 00 88042ca data: 00000008
-00 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/s390/text_files.rst b/Documentation/s390/text_files.rst
new file mode 100644
index 000000000000..c94d05d4fa17
--- /dev/null
+++ b/Documentation/s390/text_files.rst
@@ -0,0 +1,11 @@
+ibm 3270 changelog
+------------------
+
+.. include:: 3270.ChangeLog
+ :literal:
+
+ibm 3270 config3270.sh
+----------------------
+
+.. literalinclude:: config3270.sh
+ :language: shell
diff --git a/Documentation/s390/vfio-ap.txt b/Documentation/s390/vfio-ap.rst
index 65167cfe4485..b5c51f7c748d 100644
--- a/Documentation/s390/vfio-ap.txt
+++ b/Documentation/s390/vfio-ap.rst
@@ -1,4 +1,9 @@
-Introduction:
+===============================
+Adjunct Processor (AP) facility
+===============================
+
+
+Introduction
============
The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
@@ -11,7 +16,7 @@ framework. This implementation relies considerably on the s390 virtualization
facilities which do most of the hard work of providing direct access to AP
devices.
-AP Architectural Overview:
+AP Architectural Overview
=========================
To facilitate the comprehension of the design, let's start with some
definitions:
@@ -31,13 +36,13 @@ definitions:
in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
creates a sysfs device for each assigned adapter. For example, if AP adapters
4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
- sysfs device entries:
+ sysfs device entries::
/sys/devices/ap/card04
/sys/devices/ap/card0a
Symbolic links to these devices will also be created in the AP bus devices
- sub-directory:
+ sub-directory::
/sys/bus/ap/devices/[card04]
/sys/bus/ap/devices/[card04]
@@ -84,7 +89,7 @@ definitions:
the cross product of the AP adapter and usage domain numbers detected when the
AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
- following sysfs entries:
+ following sysfs entries::
/sys/devices/ap/card04/04.0006
/sys/devices/ap/card04/04.0047
@@ -92,7 +97,7 @@ definitions:
/sys/devices/ap/card0a/0a.0047
The following symbolic links to these devices will be created in the AP bus
- devices subdirectory:
+ devices subdirectory::
/sys/bus/ap/devices/[04.0006]
/sys/bus/ap/devices/[04.0047]
@@ -112,7 +117,7 @@ definitions:
domain that is not one of the usage domains, but the modified domain
must be one of the control domains.
-AP and SIE:
+AP and SIE
==========
Let's now take a look at how AP instructions executed on a guest are interpreted
by the hardware.
@@ -153,7 +158,7 @@ and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
The APQNs can provide secure key functionality - i.e., a private key is stored
on the adapter card for each of its domains - so each APQN must be assigned to
-at most one guest or to the linux host.
+at most one guest or to the linux host::
Example 1: Valid configuration:
------------------------------
@@ -181,8 +186,8 @@ at most one guest or to the linux host.
This is an invalid configuration because both guests have access to
APQN (1,6).
-The Design:
-===========
+The Design
+==========
The design introduces three new objects:
1. AP matrix device
@@ -205,43 +210,43 @@ The VFIO AP (vfio_ap) device driver serves the following purposes:
Reserve APQNs for exclusive use of KVM guests
---------------------------------------------
The following block diagram illustrates the mechanism by which APQNs are
-reserved:
-
- +------------------+
- 7 remove | |
- +--------------------> cex4queue driver |
- | | |
- | +------------------+
- |
- |
- | +------------------+ +-----------------+
- | 5 register driver | | 3 create | |
- | +----------------> Device core +----------> matrix device |
- | | | | | |
- | | +--------^---------+ +-----------------+
- | | |
- | | +-------------------+
- | | +-----------------------------------+ |
- | | | 4 register AP driver | | 2 register device
- | | | | |
-+--------+---+-v---+ +--------+-------+-+
-| | | |
-| ap_bus +--------------------- > vfio_ap driver |
-| | 8 probe | |
-+--------^---------+ +--^--^------------+
-6 edit | | |
- apmask | +-----------------------------+ | 9 mdev create
- aqmask | | 1 modprobe |
-+--------+-----+---+ +----------------+-+ +------------------+
-| | | |8 create | mediated |
-| admin | | VFIO device core |---------> matrix |
-| + | | | device |
-+------+-+---------+ +--------^---------+ +--------^---------+
- | | | |
- | | 9 create vfio_ap-passthrough | |
- | +------------------------------+ |
- +-------------------------------------------------------------+
- 10 assign adapter/domain/control domain
+reserved::
+
+ +------------------+
+ 7 remove | |
+ +--------------------> cex4queue driver |
+ | | |
+ | +------------------+
+ |
+ |
+ | +------------------+ +----------------+
+ | 5 register driver | | 3 create | |
+ | +----------------> Device core +----------> matrix device |
+ | | | | | |
+ | | +--------^---------+ +----------------+
+ | | |
+ | | +-------------------+
+ | | +-----------------------------------+ |
+ | | | 4 register AP driver | | 2 register device
+ | | | | |
+ +--------+---+-v---+ +--------+-------+-+
+ | | | |
+ | ap_bus +--------------------- > vfio_ap driver |
+ | | 8 probe | |
+ +--------^---------+ +--^--^------------+
+ 6 edit | | |
+ apmask | +-----------------------------+ | 9 mdev create
+ aqmask | | 1 modprobe |
+ +--------+-----+---+ +----------------+-+ +----------------+
+ | | | |8 create | mediated |
+ | admin | | VFIO device core |---------> matrix |
+ | + | | | device |
+ +------+-+---------+ +--------^---------+ +--------^-------+
+ | | | |
+ | | 9 create vfio_ap-passthrough | |
+ | +------------------------------+ |
+ +-------------------------------------------------------------+
+ 10 assign adapter/domain/control domain
The process for reserving an AP queue for use by a KVM guest is:
@@ -250,7 +255,7 @@ The process for reserving an AP queue for use by a KVM guest is:
device with the device core. This will serve as the parent device for
all mediated matrix devices used to configure an AP matrix for a guest.
3. The /sys/devices/vfio_ap/matrix device is created by the device core
-4 The vfio_ap device driver will register with the AP bus for AP queue devices
+4. The vfio_ap device driver will register with the AP bus for AP queue devices
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
driver's probe and remove callback interfaces. Devices older than CEX4 queues
are not supported to simplify the implementation by not needlessly
@@ -266,13 +271,14 @@ The process for reserving an AP queue for use by a KVM guest is:
it.
9. The administrator creates a passthrough type mediated matrix device to be
used by a guest
-10 The administrator assigns the adapters, usage domains and control domains
- to be exclusively used by a guest.
+10. The administrator assigns the adapters, usage domains and control domains
+ to be exclusively used by a guest.
Set up the VFIO mediated device interfaces
------------------------------------------
The VFIO AP device driver utilizes the common interface of the VFIO mediated
device core driver to:
+
* Register an AP mediated bus driver to add a mediated matrix device to and
remove it from a VFIO group.
* Create and destroy a mediated matrix device
@@ -280,25 +286,25 @@ device core driver to:
* Add a mediated matrix device to and remove it from an IOMMU group
The following high-level block diagram shows the main components and interfaces
-of the VFIO AP mediated matrix device driver:
-
- +-------------+
- | |
- | +---------+ | mdev_register_driver() +--------------+
- | | Mdev | +<-----------------------+ |
- | | bus | | | vfio_mdev.ko |
- | | driver | +----------------------->+ |<-> VFIO user
- | +---------+ | probe()/remove() +--------------+ APIs
- | |
- | MDEV CORE |
- | MODULE |
- | mdev.ko |
- | +---------+ | mdev_register_device() +--------------+
- | |Physical | +<-----------------------+ |
- | | device | | | vfio_ap.ko |<-> matrix
- | |interface| +----------------------->+ | device
- | +---------+ | callback +--------------+
- +-------------+
+of the VFIO AP mediated matrix device driver::
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ap.ko |<-> matrix
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
During initialization of the vfio_ap module, the matrix device is registered
with an 'mdev_parent_ops' structure that provides the sysfs attribute
@@ -306,7 +312,8 @@ structures, mdev functions and callback interfaces for managing the mediated
matrix device.
* sysfs attribute structures:
- * supported_type_groups
+
+ supported_type_groups
The VFIO mediated device framework supports creation of user-defined
mediated device types. These mediated device types are specified
via the 'supported_type_groups' structure when a device is registered
@@ -318,61 +325,72 @@ matrix device.
The VFIO AP device driver will register one mediated device type for
passthrough devices:
+
/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+
Only the read-only attributes required by the VFIO mdev framework will
- be provided:
- ... name
- ... device_api
- ... available_instances
- ... device_api
- Where:
- * name: specifies the name of the mediated device type
- * device_api: the mediated device type's API
- * available_instances: the number of mediated matrix passthrough devices
- that can be created
- * device_api: specifies the VFIO API
- * mdev_attr_groups
+ be provided::
+
+ ... name
+ ... device_api
+ ... available_instances
+ ... device_api
+
+ Where:
+
+ * name:
+ specifies the name of the mediated device type
+ * device_api:
+ the mediated device type's API
+ * available_instances:
+ the number of mediated matrix passthrough devices
+ that can be created
+ * device_api:
+ specifies the VFIO API
+ mdev_attr_groups
This attribute group identifies the user-defined sysfs attributes of the
mediated device. When a device is registered with the VFIO mediated device
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
structure will be created in the mediated matrix device's directory. The
sysfs attributes for a mediated matrix device are:
- * assign_adapter:
- * unassign_adapter:
+
+ assign_adapter / unassign_adapter:
Write-only attributes for assigning/unassigning an AP adapter to/from the
mediated matrix device. To assign/unassign an adapter, the APID of the
adapter is echoed to the respective attribute file.
- * assign_domain:
- * unassign_domain:
+ assign_domain / unassign_domain:
Write-only attributes for assigning/unassigning an AP usage domain to/from
the mediated matrix device. To assign/unassign a domain, the domain
number of the the usage domain is echoed to the respective attribute
file.
- * matrix:
+ matrix:
A read-only file for displaying the APQNs derived from the cross product
of the adapter and domain numbers assigned to the mediated matrix device.
- * assign_control_domain:
- * unassign_control_domain:
+ assign_control_domain / unassign_control_domain:
Write-only attributes for assigning/unassigning an AP control domain
to/from the mediated matrix device. To assign/unassign a control domain,
the ID of the domain to be assigned/unassigned is echoed to the respective
attribute file.
- * control_domains:
+ control_domains:
A read-only file for displaying the control domain numbers assigned to the
mediated matrix device.
* functions:
- * create:
+
+ create:
allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+
* Store the reference to the KVM structure for the guest using the mdev
* Store the AP matrix configuration for the adapters, domains, and control
domains assigned via the corresponding sysfs attributes files
- * remove:
+
+ remove:
deallocates the mediated matrix device's ap_matrix_mdev structure. This will
be allowed only if a running guest is not using the mdev.
* callback interfaces
- * open:
+
+ open:
The vfio_ap driver uses this callback to register a
VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
device. The open is invoked when QEMU connects the VFIO iommu group
@@ -380,16 +398,17 @@ matrix device.
to configure the KVM guest is provided via this callback. The KVM structure,
is used to configure the guest's access to the AP matrix defined via the
mediated matrix device's sysfs attribute files.
- * release:
+ release:
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
mdev matrix device and deconfigures the guest's AP matrix.
-Configure the APM, AQM and ADM in the CRYCB:
+Configure the APM, AQM and ADM in the CRYCB
-------------------------------------------
Configuring the AP matrix for a KVM guest will be performed when the
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
function is called when QEMU connects to KVM. The guest's AP matrix is
configured via it's CRYCB by:
+
* Setting the bits in the APM corresponding to the APIDs assigned to the
mediated matrix device via its 'assign_adapter' interface.
* Setting the bits in the AQM corresponding to the domains assigned to the
@@ -418,12 +437,12 @@ available to a KVM guest via the following CPU model features:
Note: If the user chooses to specify a CPU model different than the 'host'
model to QEMU, the CPU model features and facilities need to be turned on
-explicitly; for example:
+explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
A guest can be precluded from using AP features/facilities by turning them off
-explicitly; for example:
+explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
@@ -435,7 +454,7 @@ the APFT facility is not installed on the guest, then the probe of device
drivers will fail since only type 10 and newer devices can be configured for
guest use.
-Example:
+Example
=======
Let's now provide an example to illustrate how KVM guests may be given
access to AP facilities. For this example, we will show how to configure
@@ -444,30 +463,36 @@ look like this:
Guest1
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
05 CEX5C CCA-Coproc
05.0004 CEX5C CCA-Coproc
05.00ab CEX5C CCA-Coproc
06 CEX5A Accelerator
06.0004 CEX5A Accelerator
06.00ab CEX5C CCA-Coproc
+=========== ===== ============
Guest2
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
05 CEX5A Accelerator
05.0047 CEX5A Accelerator
05.00ff CEX5A Accelerator
+=========== ===== ============
Guest2
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
06 CEX5A Accelerator
06.0047 CEX5A Accelerator
06.00ff CEX5A Accelerator
+=========== ===== ============
These are the steps:
@@ -492,25 +517,26 @@ These are the steps:
* VFIO_MDEV_DEVICE
* KVM
- If using make menuconfig select the following to build the vfio_ap module:
- -> Device Drivers
- -> IOMMU Hardware Support
- select S390 AP IOMMU Support
- -> VFIO Non-Privileged userspace driver framework
- -> Mediated device driver frramework
- -> VFIO driver for Mediated devices
- -> I/O subsystem
- -> VFIO support for AP devices
+ If using make menuconfig select the following to build the vfio_ap module::
+
+ -> Device Drivers
+ -> IOMMU Hardware Support
+ select S390 AP IOMMU Support
+ -> VFIO Non-Privileged userspace driver framework
+ -> Mediated device driver frramework
+ -> VFIO driver for Mediated devices
+ -> I/O subsystem
+ -> VFIO support for AP devices
2. Secure the AP queues to be used by the three guests so that the host can not
access them. To secure them, there are two sysfs files that specify
bitmasks marking a subset of the APQN range as 'usable by the default AP
queue device drivers' or 'not usable by the default device drivers' and thus
available for use by the vfio_ap device driver'. The location of the sysfs
- files containing the masks are:
+ files containing the masks are::
- /sys/bus/ap/apmask
- /sys/bus/ap/aqmask
+ /sys/bus/ap/apmask
+ /sys/bus/ap/aqmask
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
(APID). Each bit in the mask, from left to right (i.e., from most significant
@@ -526,7 +552,7 @@ These are the steps:
queue device drivers; otherwise, the APQI is usable by the vfio_ap device
driver.
- Take, for example, the following mask:
+ Take, for example, the following mask::
0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
@@ -548,68 +574,70 @@ These are the steps:
respective sysfs mask file in one of two formats:
* An absolute hex string starting with 0x - like "0x12345678" - sets
- the mask. If the given string is shorter than the mask, it is padded
- with 0s on the right; for example, specifying a mask value of 0x41 is
- the same as specifying:
+ the mask. If the given string is shorter than the mask, it is padded
+ with 0s on the right; for example, specifying a mask value of 0x41 is
+ the same as specifying::
- 0x4100000000000000000000000000000000000000000000000000000000000000
+ 0x4100000000000000000000000000000000000000000000000000000000000000
- Keep in mind that the mask reads from left to right (i.e., most
- significant to least significant bit in big endian order), so the mask
- above identifies device numbers 1 and 7 (01000001).
+ Keep in mind that the mask reads from left to right (i.e., most
+ significant to least significant bit in big endian order), so the mask
+ above identifies device numbers 1 and 7 (01000001).
- If the string is longer than the mask, the operation is terminated with
- an error (EINVAL).
+ If the string is longer than the mask, the operation is terminated with
+ an error (EINVAL).
* Individual bits in the mask can be switched on and off by specifying
- each bit number to be switched in a comma separated list. Each bit
- number string must be prepended with a ('+') or minus ('-') to indicate
- the corresponding bit is to be switched on ('+') or off ('-'). Some
- valid values are:
+ each bit number to be switched in a comma separated list. Each bit
+ number string must be prepended with a ('+') or minus ('-') to indicate
+ the corresponding bit is to be switched on ('+') or off ('-'). Some
+ valid values are:
- "+0" switches bit 0 on
- "-13" switches bit 13 off
- "+0x41" switches bit 65 on
- "-0xff" switches bit 255 off
+ - "+0" switches bit 0 on
+ - "-13" switches bit 13 off
+ - "+0x41" switches bit 65 on
+ - "-0xff" switches bit 255 off
- The following example:
- +0,-6,+0x47,-0xf0
+ The following example:
- Switches bits 0 and 71 (0x47) on
- Switches bits 6 and 240 (0xf0) off
+ +0,-6,+0x47,-0xf0
- Note that the bits not specified in the list remain as they were before
- the operation.
+ Switches bits 0 and 71 (0x47) on
+
+ Switches bits 6 and 240 (0xf0) off
+
+ Note that the bits not specified in the list remain as they were before
+ the operation.
2. The masks can also be changed at boot time via parameters on the kernel
command line like this:
- ap.apmask=0xffff ap.aqmask=0x40
+ ap.apmask=0xffff ap.aqmask=0x40
- This would create the following masks:
+ This would create the following masks::
- apmask:
- 0xffff000000000000000000000000000000000000000000000000000000000000
+ apmask:
+ 0xffff000000000000000000000000000000000000000000000000000000000000
- aqmask:
- 0x4000000000000000000000000000000000000000000000000000000000000000
+ aqmask:
+ 0x4000000000000000000000000000000000000000000000000000000000000000
- Resulting in these two pools:
+ Resulting in these two pools::
- default drivers pool: adapter 0-15, domain 1
- alternate drivers pool: adapter 16-255, domains 0, 2-255
+ default drivers pool: adapter 0-15, domain 1
+ alternate drivers pool: adapter 16-255, domains 0, 2-255
- Securing the APQNs for our example:
- ----------------------------------
+Securing the APQNs for our example
+----------------------------------
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
- APQNs can either be removed from the default masks:
+ APQNs can either be removed from the default masks::
echo -5,-6 > /sys/bus/ap/apmask
echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
- Or the masks can be set as follows:
+ Or the masks can be set as follows::
echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
> apmask
@@ -620,19 +648,19 @@ These are the steps:
This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
sysfs directory for the vfio_ap device driver will now contain symbolic links
- to the AP queue devices bound to it:
-
- /sys/bus/ap
- ... [drivers]
- ...... [vfio_ap]
- ......... [05.0004]
- ......... [05.0047]
- ......... [05.00ab]
- ......... [05.00ff]
- ......... [06.0004]
- ......... [06.0047]
- ......... [06.00ab]
- ......... [06.00ff]
+ to the AP queue devices bound to it::
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [vfio_ap]
+ ......... [05.0004]
+ ......... [05.0047]
+ ......... [05.00ab]
+ ......... [05.00ff]
+ ......... [06.0004]
+ ......... [06.0047]
+ ......... [06.00ab]
+ ......... [06.00ff]
Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
can be bound to the vfio_ap device driver. The reason for this is to
@@ -645,96 +673,96 @@ These are the steps:
queue device can be read from the parent card's sysfs directory. For example,
to see the hardware type of the queue 05.0004:
- cat /sys/bus/ap/devices/card05/hwtype
+ cat /sys/bus/ap/devices/card05/hwtype
The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
vfio_ap device driver.
3. Create the mediated devices needed to configure the AP matrixes for the
three guests and to provide an interface to the vfio_ap driver for
- use by the guests:
+ use by the guests::
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
- --------- create
- --------- [devices]
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+ --------- create
+ --------- [devices]
- To create the mediated devices for the three guests:
+ To create the mediated devices for the three guests::
uuidgen > create
uuidgen > create
uuidgen > create
- or
+ or
- echo $uuid1 > create
- echo $uuid2 > create
- echo $uuid3 > create
+ echo $uuid1 > create
+ echo $uuid2 > create
+ echo $uuid3 > create
This will create three mediated devices in the [devices] subdirectory named
after the UUID written to the create attribute file. We call them $uuid1,
- $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
-
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough]
- --------- [devices]
- ------------ [$uuid1]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- --------------- unassign_control_domain
- --------------- unassign_domain
-
- ------------ [$uuid2]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
-
- ------------ [$uuid3]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ --------------- unassign_control_domain
+ --------------- unassign_domain
+
+ ------------ [$uuid2]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+ ------------ [$uuid3]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
4. The administrator now needs to configure the matrixes for the mediated
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
- This is how the matrix is configured for Guest1:
+ This is how the matrix is configured for Guest1::
echo 5 > assign_adapter
echo 6 > assign_adapter
echo 4 > assign_domain
echo 0xab > assign_domain
- Control domains can similarly be assigned using the assign_control_domain
- sysfs file.
+ Control domains can similarly be assigned using the assign_control_domain
+ sysfs file.
- If a mistake is made configuring an adapter, domain or control domain,
- you can use the unassign_xxx files to unassign the adapter, domain or
- control domain.
+ If a mistake is made configuring an adapter, domain or control domain,
+ you can use the unassign_xxx files to unassign the adapter, domain or
+ control domain.
- To display the matrix configuration for Guest1:
+ To display the matrix configuration for Guest1::
- cat matrix
+ cat matrix
- This is how the matrix is configured for Guest2:
+ This is how the matrix is configured for Guest2::
echo 5 > assign_adapter
echo 0x47 > assign_domain
echo 0xff > assign_domain
- This is how the matrix is configured for Guest3:
+ This is how the matrix is configured for Guest3::
echo 6 > assign_adapter
echo 0x47 > assign_domain
@@ -783,24 +811,24 @@ These are the steps:
configured for the system. If a control domain number higher than the maximum
is specified, the operation will terminate with an error (ENODEV).
-5. Start Guest1:
+5. Start Guest1::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
-7. Start Guest2:
+7. Start Guest2::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
-7. Start Guest3:
+7. Start Guest3::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
When the guest is shut down, the mediated matrix devices may be removed.
-Using our example again, to remove the mediated matrix device $uuid1:
+Using our example again, to remove the mediated matrix device $uuid1::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
@@ -809,18 +837,19 @@ Using our example again, to remove the mediated matrix device $uuid1:
------------ [$uuid1]
--------------- remove
+::
echo 1 > remove
- This will remove all of the mdev matrix device's sysfs structures including
- the mdev device itself. To recreate and reconfigure the mdev matrix device,
- all of the steps starting with step 3 will have to be performed again. Note
- that the remove will fail if a guest using the mdev is still running.
+This will remove all of the mdev matrix device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the mdev matrix device,
+all of the steps starting with step 3 will have to be performed again. Note
+that the remove will fail if a guest using the mdev is still running.
- It is not necessary to remove an mdev matrix device, but one may want to
- remove it if no guest will use it during the remaining lifetime of the linux
- host. If the mdev matrix device is removed, one may want to also reconfigure
- the pool of adapters and queues reserved for use by the default drivers.
+It is not necessary to remove an mdev matrix device, but one may want to
+remove it if no guest will use it during the remaining lifetime of the linux
+host. If the mdev matrix device is removed, one may want to also reconfigure
+the pool of adapters and queues reserved for use by the default drivers.
Limitations
===========
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.rst
index 2be11ad864ff..1f6d0b56d53e 100644
--- a/Documentation/s390/vfio-ccw.txt
+++ b/Documentation/s390/vfio-ccw.rst
@@ -1,3 +1,4 @@
+==================================
vfio-ccw: the basic infrastructure
==================================
@@ -11,9 +12,11 @@ virtual machine, while vfio is the means.
Different than other hardware architectures, s390 has defined a unified
I/O access method, which is so called Channel I/O. It has its own access
patterns:
+
- Channel programs run asynchronously on a separate (co)processor.
- The channel subsystem will access any memory designated by the caller
in the channel program directly, i.e. there is no iommu involved.
+
Thus when we introduce vfio support for these devices, we realize it
with a mediated device (mdev) implementation. The vfio mdev will be
added to an iommu group, so as to make itself able to be managed by the
@@ -24,6 +27,7 @@ to perform I/O instructions.
This document does not intend to explain the s390 I/O architecture in
every detail. More information/reference could be found here:
+
- A good start to know Channel I/O in general:
https://en.wikipedia.org/wiki/Channel_I/O
- s390 architecture:
@@ -80,6 +84,7 @@ until interrupted. The I/O completion result is received by the
interrupt handler in the form of interrupt response block (IRB).
Back to vfio-ccw, in short:
+
- ORBs and channel programs are built in guest kernel (with guest
physical addresses).
- ORBs and channel programs are passed to the host kernel.
@@ -106,6 +111,7 @@ it gets sent to hardware.
Within this implementation, we have two drivers for two types of
devices:
+
- The vfio_ccw driver for the physical subchannel device.
This is an I/O subchannel driver for the real subchannel device. It
realizes a group of callbacks and registers to the mdev framework as a
@@ -137,7 +143,7 @@ devices:
vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
backend for the physical devices to pin and unpin pages by demand.
-Below is a high Level block diagram.
+Below is a high Level block diagram::
+-------------+
| |
@@ -158,6 +164,7 @@ Below is a high Level block diagram.
+-------------+
The process of how these work together.
+
1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
physical device (with callbacks) to mdev framework.
When vfio_ccw probing the subchannel device, it registers device
@@ -178,17 +185,17 @@ vfio-ccw I/O region
An I/O region is used to accept channel program request from user
space and store I/O interrupt result for user space to retrieve. The
-definition of the region is:
-
-struct ccw_io_region {
-#define ORB_AREA_SIZE 12
- __u8 orb_area[ORB_AREA_SIZE];
-#define SCSW_AREA_SIZE 12
- __u8 scsw_area[SCSW_AREA_SIZE];
-#define IRB_AREA_SIZE 96
- __u8 irb_area[IRB_AREA_SIZE];
- __u32 ret_code;
-} __packed;
+definition of the region is::
+
+ struct ccw_io_region {
+ #define ORB_AREA_SIZE 12
+ __u8 orb_area[ORB_AREA_SIZE];
+ #define SCSW_AREA_SIZE 12
+ __u8 scsw_area[SCSW_AREA_SIZE];
+ #define IRB_AREA_SIZE 96
+ __u8 irb_area[IRB_AREA_SIZE];
+ __u32 ret_code;
+ } __packed;
While starting an I/O request, orb_area should be filled with the
guest ORB, and scsw_area should be filled with the SCSW of the Virtual
@@ -205,7 +212,7 @@ vfio-ccw follows what vfio-pci did on the s390 platform and uses
vfio-iommu-type1 as the vfio iommu backend.
* CCW translation APIs
- A group of APIs (start with 'cp_') to do CCW translation. The CCWs
+ A group of APIs (start with `cp_`) to do CCW translation. The CCWs
passed in by a user space program are organized with their guest
physical memory addresses. These APIs will copy the CCWs into kernel
space, and assemble a runnable kernel channel program by updating the
@@ -217,12 +224,14 @@ vfio-iommu-type1 as the vfio iommu backend.
This driver utilizes the CCW translation APIs and introduces
vfio_ccw, which is the driver for the I/O subchannel devices you want
to pass through.
- vfio_ccw implements the following vfio ioctls:
+ vfio_ccw implements the following vfio ioctls::
+
VFIO_DEVICE_GET_INFO
VFIO_DEVICE_GET_IRQ_INFO
VFIO_DEVICE_GET_REGION_INFO
VFIO_DEVICE_RESET
VFIO_DEVICE_SET_IRQS
+
This provides an I/O region, so that the user space program can pass a
channel program to the kernel, to do further CCW translation before
issuing them to a real device.
@@ -236,32 +245,49 @@ bit more detail how an I/O request triggered by the QEMU guest will be
handled (without error handling).
Explanation:
-Q1-Q7: QEMU side process.
-K1-K5: Kernel side process.
-Q1. Get I/O region info during initialization.
-Q2. Setup event notifier and handler to handle I/O completion.
+- Q1-Q7: QEMU side process.
+- K1-K5: Kernel side process.
+
+Q1.
+ Get I/O region info during initialization.
+
+Q2.
+ Setup event notifier and handler to handle I/O completion.
... ...
-Q3. Intercept a ssch instruction.
-Q4. Write the guest channel program and ORB to the I/O region.
- K1. Copy from guest to kernel.
- K2. Translate the guest channel program to a host kernel space
- channel program, which becomes runnable for a real device.
- K3. With the necessary information contained in the orb passed in
- by QEMU, issue the ccwchain to the device.
- K4. Return the ssch CC code.
-Q5. Return the CC code to the guest.
+Q3.
+ Intercept a ssch instruction.
+Q4.
+ Write the guest channel program and ORB to the I/O region.
+
+ K1.
+ Copy from guest to kernel.
+ K2.
+ Translate the guest channel program to a host kernel space
+ channel program, which becomes runnable for a real device.
+ K3.
+ With the necessary information contained in the orb passed in
+ by QEMU, issue the ccwchain to the device.
+ K4.
+ Return the ssch CC code.
+Q5.
+ Return the CC code to the guest.
... ...
- K5. Interrupt handler gets the I/O result and write the result to
- the I/O region.
- K6. Signal QEMU to retrieve the result.
-Q6. Get the signal and event handler reads out the result from the I/O
+ K5.
+ Interrupt handler gets the I/O result and write the result to
+ the I/O region.
+ K6.
+ Signal QEMU to retrieve the result.
+
+Q6.
+ Get the signal and event handler reads out the result from the I/O
region.
-Q7. Update the irb for the guest.
+Q7.
+ Update the irb for the guest.
Limitations
-----------
@@ -295,6 +321,6 @@ Reference
1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
3. https://en.wikipedia.org/wiki/Channel_I/O
-4. Documentation/s390/cds.txt
+4. Documentation/s390/cds.rst
5. Documentation/vfio.txt
6. Documentation/vfio-mediated-device.txt
diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.rst
index b064aa59714d..54e8e7caf7e7 100644
--- a/Documentation/s390/zfcpdump.txt
+++ b/Documentation/s390/zfcpdump.rst
@@ -1,4 +1,6 @@
+==================================
The s390 SCSI dump tool (zfcpdump)
+==================================
System z machines (z900 or higher) provide hardware support for creating system
dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index b14e03ff3528..a7514343b660 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -652,7 +652,7 @@ CONTENTS
-deadline tasks cannot have an affinity mask smaller that the entire
root_domain they are created on. However, affinities can be specified
- through the cpuset facility (Documentation/cgroup-v1/cpusets.txt).
+ through the cpuset facility (Documentation/cgroup-v1/cpusets.rst).
5.1 SCHED_DEADLINE and cpusets HOWTO
------------------------------------
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index edd861c94c1b..d1328890ef28 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -215,7 +215,7 @@ SCHED_BATCH) tasks.
These options need CONFIG_CGROUPS to be defined, and let the administrator
create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See
- Documentation/cgroup-v1/cgroups.txt for more information about this filesystem.
+ Documentation/cgroup-v1/cgroups.rst for more information about this filesystem.
When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
group created using the pseudo filesystem. See example steps below to create
diff --git a/Documentation/scheduler/sched-pelt.c b/Documentation/scheduler/sched-pelt.c
index e4219139386a..7238b355919c 100644
--- a/Documentation/scheduler/sched-pelt.c
+++ b/Documentation/scheduler/sched-pelt.c
@@ -20,7 +20,8 @@ void calc_runnable_avg_yN_inv(void)
int i;
unsigned int x;
- printf("static const u32 runnable_avg_yN_inv[] = {");
+ /* To silence -Wunused-but-set-variable warnings. */
+ printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
for (i = 0; i < HALFLIFE; i++) {
x = ((1UL<<32)-1)*pow(y, i);
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
index d8fce3e78457..c09f7a3fee66 100644
--- a/Documentation/scheduler/sched-rt-group.txt
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -133,7 +133,7 @@ This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
to control the CPU time reserved for each control group.
For more information on working with control groups, you should read
-Documentation/cgroup-v1/cgroups.txt as well.
+Documentation/cgroup-v1/cgroups.rst as well.
Group settings are checked against the following limits in order to keep the
configuration schedulable:
diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 2cd0e273cc9a..3d1cca287aa4 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -69,15 +69,16 @@ descriptors by adding their identifier to the format string
algorithm (field format: [<hash algo>:]digest, where the digest
prefix is shown only if the hash algorithm is not SHA1 or MD5);
- 'n-ng': the name of the event, without size limitations;
- - 'sig': the file signature.
+ - 'sig': the file signature;
+ - 'buf': the buffer data that was used to generate the hash without size limitations;
Below, there is the list of defined template descriptors:
- "ima": its format is ``d|n``;
- "ima-ng" (default): its format is ``d-ng|n-ng``;
- - "ima-sig": its format is ``d-ng|n-ng|sig``.
-
+ - "ima-sig": its format is ``d-ng|n-ng|sig``;
+ - "ima-buf": its format is ``d-ng|n-ng|buf``;
Use
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9521c4207f01..1b3c907980ad 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -57,9 +57,9 @@ Each key has a number of attributes:
type provides an operation to perform a match between the description on a
key and a criterion string.
- * Each key has an owner user ID, a group ID and a permissions mask. These
- are used to control what a process may do to a key from userspace, and
- whether a kernel service will be able to find the key.
+ * Each key has an owner user ID, a group ID and an ACL. These are used to
+ control what a process may do to a key from userspace, and whether a
+ kernel service will be able to find the key.
* Each key can be set to expire at a specific time by the key type's
instantiation function. Keys can also be immortal.
@@ -198,43 +198,110 @@ The key service provides a number of features besides keys:
Key Access Permissions
======================
-Keys have an owner user ID, a group access ID, and a permissions mask. The mask
-has up to eight bits each for possessor, user, group and other access. Only
-six of each set of eight bits are defined. These permissions granted are:
+Keys have an owner user ID, a group ID and an ACL. The ACL is made up of a
+sequence of ACEs that each contain three elements:
- * View
+ * The type of subject.
+ * The subject.
- This permits a key or keyring's attributes to be viewed - including key
- type and description.
+ These two together indicate the subject to whom the permits are granted.
+ The type can be one of:
- * Read
+ * ``KEY_ACE_SUBJ_STANDARD``
- This permits a key's payload to be viewed or a keyring's list of linked
- keys.
+ The subject is a standard 'macro' type. The subject can be one of:
+
+ * ``KEY_ACE_EVERYONE``
+
+ The permits are granted to everyone. It replaces the old 'other'
+ type on the assumption that you wouldn't grant a permission to other
+ that you you wouldn't grant to everyone else.
+
+ * ``KEY_ACE_OWNER``
+
+ The permits are granted to the owner of the key (key->uid).
+
+ * ``KEY_ACE_GROUP``
+
+ The permits are granted to the key's group (key->gid).
+
+ * ``KEY_ACE_POSSESSOR``
+
+ The permits are granted to anyone who possesses the key.
+
+ * The set of permits granted to the subject. These include:
+
+ * ``KEY_ACE_VIEW``
+
+ This permits a key or keyring's attributes to be viewed - including the
+ key type and description.
+
+ * ``KEY_ACE_READ``
+
+ This permits a key's payload to be viewed or a keyring's list of linked
+ keys.
+
+ * ``KEY_ACE_WRITE``
+
+ This permits a key's payload to be instantiated or updated, or it allows
+ a link to be added to or removed from a keyring.
+
+ * ``KEY_ACE_SEARCH``
+
+ This permits keyrings to be searched and keys to be found. Searches can
+ only recurse into nested keyrings that have search permission set.
+
+ * ``KEY_ACE_LINK``
+
+ This permits a key or keyring to be linked to. To create a link from a
+ keyring to a key, a process must have Write permission on the keyring
+ and Link permission on the key.
+
+ * ``KEY_ACE_SET_SECURITY``
+
+ This permits a key's UID, GID and permissions mask to be changed.
- * Write
+ * ``KEY_ACE_INVAL``
- This permits a key's payload to be instantiated or updated, or it allows a
- link to be added to or removed from a keyring.
+ This permits a key to be invalidated with KEYCTL_INVALIDATE.
- * Search
+ * ``KEY_ACE_REVOKE``
- This permits keyrings to be searched and keys to be found. Searches can
- only recurse into nested keyrings that have search permission set.
+ This permits a key to be revoked with KEYCTL_REVOKE.
- * Link
+ * ``KEY_ACE_JOIN``
- This permits a key or keyring to be linked to. To create a link from a
- keyring to a key, a process must have Write permission on the keyring and
- Link permission on the key.
+ This permits a keyring to be joined as a session by
+ KEYCTL_JOIN_SESSION_KEYRING or KEYCTL_SESSION_TO_PARENT.
- * Set Attribute
+ * ``KEY_ACE_CLEAR``
- This permits a key's UID, GID and permissions mask to be changed.
+ This permits a keyring to be cleared.
For changing the ownership, group ID or permissions mask, being the owner of
the key or having the sysadmin capability is sufficient.
+The legacy KEYCTL_SETPERM and KEYCTL_DESCRIBE functions can only see/generate
+View, Read, Write, Search, Link and SetAttr permits, and do this for each of
+possessor, user, group and other permission sets as a 32-bit flag mask. These
+will be approximated/inferred:
+
+ SETPERM Permit Implied ACE Permit
+ =============== =======================
+ Search Inval, Join
+ Write Revoke, Clear
+ Setattr Set Security, Revoke
+
+ ACE Permit Described as
+ =============== =======================
+ Inval Search
+ Join Search
+ Revoke Write (unless Setattr)
+ Clear write
+ Set Security Setattr
+
+'Other' will be approximated as/inferred from the 'Everyone' subject.
+
SELinux Support
===============
@@ -433,6 +500,10 @@ The main syscalls are:
/sbin/request-key will be invoked in an attempt to obtain a key. The
callout_info string will be passed as an argument to the program.
+ To link a key into the destination keyring the key must grant link
+ permission on the key to the caller and the keyring must grant write
+ permission.
+
See also Documentation/security/keys/request-key.rst.
@@ -577,6 +648,27 @@ The keyctl syscall functions are:
added.
+ * Move a key from one keyring to another::
+
+ long keyctl(KEYCTL_MOVE,
+ key_serial_t id,
+ key_serial_t from_ring_id,
+ key_serial_t to_ring_id,
+ unsigned int flags);
+
+ Move the key specified by "id" from the keyring specified by
+ "from_ring_id" to the keyring specified by "to_ring_id". If the two
+ keyrings are the same, nothing is done.
+
+ "flags" can have KEYCTL_MOVE_EXCL set in it to cause the operation to fail
+ with EEXIST if a matching key exists in the destination keyring, otherwise
+ such a key will be replaced.
+
+ A process must have link permission on the key for this function to be
+ successful and write permission on both keyrings. Any errors that can
+ occur from KEYCTL_LINK also apply on the destination keyring here.
+
+
* Unlink a key or keyring from another keyring::
long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
@@ -1059,7 +1151,8 @@ payload contents" for more information.
struct key *request_key(const struct key_type *type,
const char *description,
- const char *callout_info);
+ const char *callout_info,
+ struct key_acl *acl);
This is used to request a key or keyring with a description that matches
the description specified according to the key type's match_preparse()
@@ -1074,52 +1167,50 @@ payload contents" for more information.
If successful, the key will have been attached to the default keyring for
implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
+ If a key is created, it will be given the specified ACL.
+
See also Documentation/security/keys/request-key.rst.
+ * To search for a key in a specific domain, call:
+
+ struct key *request_key_tag(const struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag,
+ const char *callout_info,
+ struct key_acl *acl);
+
+ This is identical to request_key(), except that a domain tag may be
+ specifies that causes search algorithm to only match keys matching that
+ tag. The domain_tag may be NULL, specifying a global domain that is
+ separate from any nominated domain.
+
+
* To search for a key, passing auxiliary data to the upcaller, call::
struct key *request_key_with_auxdata(const struct key_type *type,
const char *description,
+ struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux);
-
- This is identical to request_key(), except that the auxiliary data is
- passed to the key_type->request_key() op if it exists, and the callout_info
- is a blob of length callout_len, if given (the length may be 0).
-
-
- * A key can be requested asynchronously by calling one of::
-
- struct key *request_key_async(const struct key_type *type,
- const char *description,
- const void *callout_info,
- size_t callout_len);
-
- or::
-
- struct key *request_key_async_with_auxdata(const struct key_type *type,
- const char *description,
- const char *callout_info,
- size_t callout_len,
- void *aux);
+ void *aux,
+ struct key_acl *acl);
- which are asynchronous equivalents of request_key() and
- request_key_with_auxdata() respectively.
+ This is identical to request_key_tag(), except that the auxiliary data is
+ passed to the key_type->request_key() op if it exists, and the
+ callout_info is a blob of length callout_len, if given (the length may be
+ 0).
- These two functions return with the key potentially still under
- construction. To wait for construction completion, the following should be
- called::
- int wait_for_key_construction(struct key *key, bool intr);
+ * To search for a key under RCU conditions, call::
- The function will wait for the key to finish being constructed and then
- invokes key_validate() to return an appropriate value to indicate the state
- of the key (0 indicates the key is usable).
+ struct key *request_key_rcu(const struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag);
- If intr is true, then the wait can be interrupted by a signal, in which
- case error ERESTARTSYS will be returned.
+ which is similar to request_key_tag() except that it does not check for
+ keys that are under construction and it will not call out to userspace to
+ construct a key if it can't find a match.
* When it is no longer required, the key should be released using::
@@ -1159,11 +1250,13 @@ payload contents" for more information.
key_ref_t keyring_search(key_ref_t keyring_ref,
const struct key_type *type,
- const char *description)
+ const char *description,
+ bool recurse)
- This searches the keyring tree specified for a matching key. Error ENOKEY
- is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
- the returned key will need to be released.
+ This searches the specified keyring only (recurse == false) or keyring tree
+ (recurse == true) specified for a matching key. Error ENOKEY is returned
+ upon failure (use IS_ERR/PTR_ERR to determine). If successful, the returned
+ key will need to be released.
The possession attribute from the keyring reference is used to control
access through the permissions mask and is propagated to the returned key
@@ -1174,7 +1267,7 @@ payload contents" for more information.
struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
const struct cred *cred,
- key_perm_t perm,
+ struct key_acl *acl,
struct key_restriction *restrict_link,
unsigned long flags,
struct key *dest);
diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst
index 600ad67d1707..f356fd06c8d5 100644
--- a/Documentation/security/keys/request-key.rst
+++ b/Documentation/security/keys/request-key.rst
@@ -11,30 +11,32 @@ The process starts by either the kernel requesting a service by calling
struct key *request_key(const struct key_type *type,
const char *description,
- const char *callout_info);
+ const char *callout_info,
+ struct key_acl *acl);
+
+or::
+
+ struct key *request_key_tag(const struct key_type *type,
+ const char *description,
+ const struct key_tag *domain_tag,
+ const char *callout_info,
+ struct key_acl *acl);
or::
struct key *request_key_with_auxdata(const struct key_type *type,
const char *description,
+ const struct key_tag *domain_tag,
const char *callout_info,
size_t callout_len,
- void *aux);
+ void *aux,
+ struct key_acl *acl);
or::
- struct key *request_key_async(const struct key_type *type,
- const char *description,
- const char *callout_info,
- size_t callout_len);
-
-or::
-
- struct key *request_key_async_with_auxdata(const struct key_type *type,
- const char *description,
- const char *callout_info,
- size_t callout_len,
- void *aux);
+ struct key *request_key_rcu(const struct key_type *type,
+ const char *description,
+ const struct key_tag *domain_tag);
Or by userspace invoking the request_key system call::
@@ -48,14 +50,18 @@ does not need to link the key to a keyring to prevent it from being immediately
destroyed. The kernel interface returns a pointer directly to the key, and
it's up to the caller to destroy the key.
-The request_key*_with_auxdata() calls are like the in-kernel request_key*()
-calls, except that they permit auxiliary data to be passed to the upcaller (the
-default is NULL). This is only useful for those key types that define their
-own upcall mechanism rather than using /sbin/request-key.
+The request_key_tag() call is like the in-kernel request_key(), except that it
+also takes a domain tag that allows keys to be separated by namespace and
+killed off as a group.
+
+The request_key_with_auxdata() calls is like the request_key_tag() call, except
+that they permit auxiliary data to be passed to the upcaller (the default is
+NULL). This is only useful for those key types that define their own upcall
+mechanism rather than using /sbin/request-key.
-The two async in-kernel calls may return keys that are still in the process of
-being constructed. The two non-async ones will wait for construction to
-complete first.
+The request_key_rcu() call is like the request_key_tag() call, except that it
+doesn't check for keys that are under construction and doesn't attempt to
+construct missing keys.
The userspace interface links the key to a keyring associated with the process
to prevent the key from going away, and returns the serial number of the key to
@@ -148,7 +154,7 @@ The Search Algorithm
A search of any particular keyring proceeds in the following fashion:
- 1) When the key management code searches for a key (keyring_search_aux) it
+ 1) When the key management code searches for a key (keyring_search_rcu) it
firstly calls key_permission(SEARCH) on the keyring it's starting with,
if this denies permission, it doesn't search further.
@@ -167,6 +173,9 @@ The process stops immediately a valid key is found with permission granted to
use it. Any error from a previous match attempt is discarded and the key is
returned.
+When request_key() is invoked, if CONFIG_KEYS_REQUEST_CACHE=y, a per-task
+one-key cache is first checked for a match.
+
When search_process_keyrings() is invoked, it performs the following searches
until one succeeds:
@@ -186,7 +195,9 @@ until one succeeds:
c) The calling process's session keyring is searched.
The moment one succeeds, all pending errors are discarded and the found key is
-returned.
+returned. If CONFIG_KEYS_REQUEST_CACHE=y, then that key is placed in the
+per-task cache, displacing the previous key. The cache is cleared on exit or
+just prior to resumption of userspace.
Only if all these fail does the whole thing fail with the highest priority
error. Note that several errors may have come from LSM.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index f0c86fbb3b48..5af8b131ccbc 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -23,7 +23,6 @@ show up in /proc/sys/kernel:
- auto_msgmni
- bootloader_type [ X86 only ]
- bootloader_version [ X86 only ]
-- callhome [ S390 only ]
- cap_last_cap
- core_pattern
- core_pipe_limit
@@ -171,21 +170,6 @@ Documentation/x86/boot.txt for additional information.
==============================================================
-callhome:
-
-Controls the kernel's callhome behavior in case of a kernel panic.
-
-The s390 hardware allows an operating system to send a notification
-to a service organization (callhome) in case of an operating system panic.
-
-When the value in this file is 0 (which is the default behavior)
-nothing happens in case of a kernel panic. If this value is set to "1"
-the complete kernel oops message is send to the IBM customer service
-organization in case the mainframe the Linux operating system is running
-on has a service contract with IBM.
-
-==============================================================
-
cap_last_cap
Highest valid capability of the running kernel. Exports
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index db0b9d8619f1..5f3c74dcad43 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -24,7 +24,7 @@ Documentation/memory-barriers.txt
=========================
저자: David Howells <dhowells@redhat.com>
- Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
index 5cae13e9a08b..0d830edae8fe 100644
--- a/Documentation/vm/numa.rst
+++ b/Documentation/vm/numa.rst
@@ -67,7 +67,7 @@ nodes. Each emulated node will manage a fraction of the underlying cells'
physical memory. NUMA emluation is useful for testing NUMA kernel and
application features on non-NUMA platforms, and as a sort of memory resource
management mechanism when used together with cpusets.
-[see Documentation/cgroup-v1/cpusets.txt]
+[see Documentation/cgroup-v1/cpusets.rst]
For each node with memory, Linux constructs an independent memory management
subsystem, complete with its own free page lists, in-use page lists, usage
@@ -114,7 +114,7 @@ allocation behavior using Linux NUMA memory policy. [see
System administrators can restrict the CPUs and nodes' memories that a non-
privileged user can specify in the scheduling or NUMA commands and functions
-using control groups and CPUsets. [see Documentation/cgroup-v1/cpusets.txt]
+using control groups and CPUsets. [see Documentation/cgroup-v1/cpusets.rst]
On architectures that do not hide memoryless nodes, Linux will include only
zones [nodes] with memory in the zonelists. This means that for a memoryless
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
index f68d61335abb..35bba27d5fff 100644
--- a/Documentation/vm/page_migration.rst
+++ b/Documentation/vm/page_migration.rst
@@ -41,7 +41,7 @@ locations.
Larger installations usually partition the system using cpusets into
sections of nodes. Paul Jackson has equipped cpusets with the ability to
move pages when a task is moved to another cpuset (See
-Documentation/cgroup-v1/cpusets.txt).
+Documentation/cgroup-v1/cpusets.rst).
Cpusets allows the automation of process locality. If a task is moved to
a new cpuset then also all its pages are moved with it so that the
performance of the process does not sink dramatically. Also the pages
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
index b8e29f977f2d..c6d94118fbcc 100644
--- a/Documentation/vm/unevictable-lru.rst
+++ b/Documentation/vm/unevictable-lru.rst
@@ -98,7 +98,7 @@ Memory Control Group Interaction
--------------------------------
The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroup-v1/memory.txt] by extending the
+memory controller; see Documentation/cgroup-v1/memory.rst] by extending the
lru_list enum.
The memory controller data structure automatically gets a per-zone unevictable
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index 24596c8210b5..ed6d4b0cf62c 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -35,7 +35,7 @@ page fault handler::
void do_page_fault(struct pt_regs *regs, unsigned long error_code)
in arch/x86/mm/fault.c. The parameters on the stack are set up by
-the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter
+the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
regs is a pointer to the saved registers on the stack, error_code
contains a reason code for the exception.
diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
index 6e28dbe818ab..8e9704f61017 100644
--- a/Documentation/x86/topology.rst
+++ b/Documentation/x86/topology.rst
@@ -49,6 +49,10 @@ Package-related topology information in the kernel:
The number of cores in a package. This information is retrieved via CPUID.
+ - cpuinfo_x86.x86_max_dies:
+
+ The number of dies in a package. This information is retrieved via CPUID.
+
- cpuinfo_x86.phys_proc_id:
The physical ID of the package. This information is retrieved via CPUID
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 74fbb78b3c67..a6926cd40f70 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -15,7 +15,7 @@ assign them to cpusets and their attached tasks. This is a way of limiting the
amount of system memory that are available to a certain class of tasks.
For more information on the features of cpusets, see
-Documentation/cgroup-v1/cpusets.txt.
+Documentation/cgroup-v1/cpusets.rst.
There are a number of different configurations you can use for your needs. For
more information on the numa=fake command line option and its various ways of
configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
@@ -40,7 +40,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
On node 3 totalpages: 131072
Now following the instructions for mounting the cpusets filesystem from
-Documentation/cgroup-v1/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
+Documentation/cgroup-v1/cpusets.rst, you can assign fake nodes (i.e. contiguous memory
address spaces) to individual cpusets::
[root@xroads /]# mkdir exampleset
diff --git a/MAINTAINERS b/MAINTAINERS
index 1080c7386678..369b2af4bd76 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1306,6 +1306,12 @@ S: Maintained
F: Documentation/devicetree/bindings/interrupt-controller/arm,vic.txt
F: drivers/irqchip/irq-vic.c
+AMAZON ANNAPURNA LABS FIC DRIVER
+M: Talel Shenhar <talel@amazon.com>
+S: Maintained
+F: Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
+F: drivers/irqchip/irq-al-fic.c
+
ARM SMMU DRIVERS
M: Will Deacon <will@kernel.org>
R: Robin Murphy <robin.murphy@arm.com>
@@ -4116,7 +4122,7 @@ W: http://www.bullopensource.org/cpuset/
W: http://oss.sgi.com/projects/cpusets/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git
S: Maintained
-F: Documentation/cgroup-v1/cpusets.txt
+F: Documentation/cgroup-v1/cpusets.rst
F: include/linux/cpuset.h
F: kernel/cgroup/cpuset.c
@@ -4251,6 +4257,7 @@ F: crypto/
F: drivers/crypto/
F: include/crypto/
F: include/linux/crypto*
+F: lib/crypto/
CRYPTOGRAPHIC RANDOM NUMBER GENERATOR
M: Neil Horman <nhorman@tuxdriver.com>
@@ -4707,6 +4714,7 @@ F: Documentation/devicetree/bindings/mfd/da90*.txt
F: Documentation/devicetree/bindings/input/da90??-onkey.txt
F: Documentation/devicetree/bindings/thermal/da90??-thermal.txt
F: Documentation/devicetree/bindings/regulator/da92*.txt
+F: Documentation/devicetree/bindings/regulator/slg51000.txt
F: Documentation/devicetree/bindings/watchdog/da90??-wdt.txt
F: Documentation/devicetree/bindings/sound/da[79]*.txt
F: drivers/gpio/gpio-da90??.c
@@ -4722,6 +4730,7 @@ F: drivers/power/supply/da9052-battery.c
F: drivers/power/supply/da91??-*.c
F: drivers/regulator/da903x.c
F: drivers/regulator/da9???-regulator.[ch]
+F: drivers/regulator/slg51000-regulator.[ch]
F: drivers/thermal/da90??-thermal.c
F: drivers/rtc/rtc-da90??.c
F: drivers/video/backlight/da90??_bl.c
@@ -6337,6 +6346,13 @@ L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-cpm.c
+FREESCALE IMX DDR PMU DRIVER
+M: Frank Li <Frank.li@nxp.com>
+L: linux-arm-kernel@lists.infradead.org
+S: Maintained
+F: drivers/perf/fsl_imx8_ddr_perf.c
+F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+
FREESCALE IMX LPI2C DRIVER
M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
@@ -6674,6 +6690,18 @@ L: kvm@vger.kernel.org
S: Supported
F: drivers/uio/uio_pci_generic.c
+GENERIC VDSO LIBRARY:
+M: Andy Lutomirski <luto@kernel.org>
+M: Thomas Gleixner <tglx@linutronix.de>
+M: Vincenzo Frascino <vincenzo.frascino@arm.com>
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
+S: Maintained
+F: lib/vdso/
+F: kernel/time/vsyscall.c
+F: include/vdso/
+F: include/asm-generic/vdso/vsyscall.h
+
GENWQE (IBM Generic Workqueue Card)
M: Frank Haverkamp <haver@linux.ibm.com>
S: Supported
@@ -7311,6 +7339,7 @@ F: arch/x86/include/asm/trace/hyperv.h
F: arch/x86/include/asm/hyperv-tlfs.h
F: arch/x86/kernel/cpu/mshyperv.c
F: arch/x86/hyperv
+F: drivers/clocksource/hyperv_timer.c
F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
@@ -7321,6 +7350,7 @@ F: drivers/uio/uio_hv_generic.c
F: drivers/video/fbdev/hyperv_fb.c
F: drivers/iommu/hyperv_iommu.c
F: net/vmw_vsock/hyperv_transport.c
+F: include/clocksource/hyperv_timer.h
F: include/linux/hyperv.h
F: include/uapi/linux/hyperv.h
F: tools/hv/
@@ -7810,7 +7840,7 @@ INGENIC JZ4780 NAND DRIVER
M: Harvey Hunt <harveyhuntnexus@gmail.com>
L: linux-mtd@lists.infradead.org
S: Maintained
-F: drivers/mtd/nand/raw/jz4780_*
+F: drivers/mtd/nand/raw/ingenic/
INOTIFY
M: Jan Kara <jack@suse.cz>
@@ -10137,7 +10167,7 @@ L: linux-leds@vger.kernel.org
S: Supported
F: drivers/leds/leds-mlxcpld.c
F: drivers/leds/leds-mlxreg.c
-F: Documentation/leds/leds-mlxcpld.txt
+F: Documentation/leds/leds-mlxcpld.rst
MELLANOX PLATFORM DRIVER
M: Vadim Pasternak <vadimp@mellanox.com>
@@ -13711,7 +13741,7 @@ L: linux-s390@vger.kernel.org
L: kvm@vger.kernel.org
S: Supported
F: drivers/s390/cio/vfio_ccw*
-F: Documentation/s390/vfio-ccw.txt
+F: Documentation/s390/vfio-ccw.rst
F: include/uapi/linux/vfio_ccw.h
S390 ZCRYPT DRIVER
@@ -13731,7 +13761,7 @@ S: Supported
F: drivers/s390/crypto/vfio_ap_drv.c
F: drivers/s390/crypto/vfio_ap_private.h
F: drivers/s390/crypto/vfio_ap_ops.c
-F: Documentation/s390/vfio-ap.txt
+F: Documentation/s390/vfio-ap.rst
S390 ZFCP DRIVER
M: Steffen Maier <maier@linux.ibm.com>
@@ -15511,6 +15541,7 @@ F: drivers/dma/tegra*
TEGRA I2C DRIVER
M: Laxman Dewangan <ldewangan@nvidia.com>
+R: Dmitry Osipenko <digetx@gmail.com>
S: Supported
F: drivers/i2c/busses/i2c-tegra.c
@@ -17503,6 +17534,12 @@ Q: https://patchwork.linuxtv.org/project/linux-media/list/
S: Maintained
F: drivers/media/dvb-frontends/zd1301_demod*
+ZHAOXIN PROCESSOR SUPPORT
+M: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: arch/x86/kernel/cpu/zhaoxin.c
+
ZPOOL COMPRESSED PAGE STORAGE API
M: Dan Streetman <ddstreet@ieee.org>
L: linux-mm@kvack.org
diff --git a/Makefile b/Makefile
index fabc127d127f..3e4868a6498b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 2
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
NAME = Bobtail Squid
# *DOCUMENTATION*
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 150a1c5d6a2c..2144530d1428 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(long i, atomic64_t * v) \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
{ \
- unsigned long temp; \
+ s64 temp; \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%2,%0\n" \
@@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
} \
#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
{ \
- long temp, result; \
+ s64 temp, result; \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
}
#define ATOMIC64_FETCH_OP(op, asm_op) \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
{ \
- long temp, result; \
+ s64 temp, result; \
__asm__ __volatile__( \
"1: ldq_l %2,%1\n" \
" " #asm_op " %2,%3,%0\n" \
@@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long c, new, old;
+ s64 c, new, old;
smp_mb();
__asm__ __volatile__(
"1: ldq_l %[old],%[mem]\n"
@@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static inline long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long old, tmp;
+ s64 old, tmp;
smp_mb();
__asm__ __volatile__(
"1: ldq_l %[old],%[mem]\n"
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 33e904a05881..a813020d2f11 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -225,7 +225,7 @@ do_sigreturn(struct sigcontext __user *sc)
return;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void
@@ -253,7 +253,7 @@ do_rt_sigreturn(struct rt_sigframe __user *frame)
return;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index d0dccae53ba9..5f90df30be20 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -614,8 +614,7 @@ void
smp_imb(void)
{
/* Must wait other processors to flush their icache before continue. */
- if (on_each_cpu(ipi_imb, NULL, 1))
- printk(KERN_CRIT "smp_imb: timed out\n");
+ on_each_cpu(ipi_imb, NULL, 1);
}
EXPORT_SYMBOL(smp_imb);
@@ -630,9 +629,7 @@ flush_tlb_all(void)
{
/* Although we don't have any data to pass, we do want to
synchronize with the other processors. */
- if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) {
- printk(KERN_CRIT "flush_tlb_all: timed out\n");
- }
+ on_each_cpu(ipi_flush_tlb_all, NULL, 1);
}
#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
@@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm)
}
}
- if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) {
- printk(KERN_CRIT "flush_tlb_mm: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_mm, mm, 1);
preempt_enable();
}
@@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
data.mm = mm;
data.addr = addr;
- if (smp_call_function(ipi_flush_tlb_page, &data, 1)) {
- printk(KERN_CRIT "flush_tlb_page: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_page, &data, 1);
preempt_enable();
}
@@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
}
}
- if (smp_call_function(ipi_flush_icache_page, mm, 1)) {
- printk(KERN_CRIT "flush_icache_page: timed out\n");
- }
+ smp_call_function(ipi_flush_icache_page, mm, 1);
preempt_enable();
}
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index bc9627698796..f6b9664ac504 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -402,7 +402,7 @@ do_entDbg(struct pt_regs *regs)
{
die_if_kernel("Instruction fault", regs, 0, NULL);
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0);
}
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 188fc9256baf..741e61ef9d3f 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -221,13 +221,13 @@ retry:
up_read(&mm->mmap_sem);
/* Send a sigbus, regardless of whether we were in kernel
or user mode. */
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0);
if (!user_mode(regs))
goto no_context;
return;
do_sigsegv:
- force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0);
return;
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index 310a4ce1dccc..1b1259c7d7d1 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -65,7 +65,7 @@ op_axp_setup(void)
model->reg_setup(&reg, ctr, &sys);
/* Configure the registers on all cpus. */
- (void)smp_call_function(model->cpu_setup, &reg, 1);
+ smp_call_function(model->cpu_setup, &reg, 1);
model->cpu_setup(&reg);
return 0;
}
@@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy)
static int
op_axp_start(void)
{
- (void)smp_call_function(op_axp_cpu_start, NULL, 1);
+ smp_call_function(op_axp_cpu_start, NULL, 1);
op_axp_cpu_start(NULL);
return 0;
}
@@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy)
static void
op_axp_stop(void)
{
- (void)smp_call_function(op_axp_cpu_stop, NULL, 1);
+ smp_call_function(op_axp_cpu_stop, NULL, 1);
op_axp_cpu_stop(NULL);
}
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 17cf1c657cb3..7298ce84762e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
*/
typedef struct {
- aligned_u64 counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- unsigned long long val;
+ s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
@@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v)
return val;
}
-static inline void atomic64_set(atomic64_t *v, long long a)
+static inline void atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
@@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a)
}
#define ATOMIC64_OP(op, op1, op2) \
-static inline void atomic64_##op(long long a, atomic64_t *v) \
+static inline void atomic64_##op(s64 a, atomic64_t *v) \
{ \
- unsigned long long val; \
+ s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
@@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v) \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
- : "cc"); \
+ : "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
-static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
+static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
{ \
- unsigned long long val; \
+ s64 val; \
\
smp_mb(); \
\
@@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, op1, op2) \
-static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
{ \
- unsigned long long val, orig; \
+ s64 val, orig; \
\
smp_mb(); \
\
@@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline long long
-atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+static inline s64
+atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
- long long prev;
+ s64 prev;
smp_mb();
@@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
return prev;
}
-static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
{
- long long prev;
+ s64 prev;
smp_mb();
@@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
* the atomic variable, v, was not decremented.
*/
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long long val;
+ s64 val;
smp_mb();
@@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long long old, temp;
+ s64 old, temp;
smp_mb();
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index ff321f7df716..e1889ce3faf9 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -97,7 +97,7 @@ fault:
goto again;
fail:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return ret;
}
@@ -310,7 +310,7 @@ int elf_check_arch(const struct elf32_hdr *x)
eflags = x->e_flags;
if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
pr_err("ABI mismatch - you need newer toolchain\n");
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
return 0;
}
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index b895f889602a..3d57ed0d8535 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -194,7 +194,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
return regs->r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index e9a5b259f405..57235e5c0cea 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -47,7 +47,7 @@ unhandled_exception(const char *str, struct pt_regs *regs,
tsk->thread.fault_address = (__force unsigned int)addr;
- force_sig_fault(signo, si_code, addr, tsk);
+ force_sig_fault(signo, si_code, addr);
} else {
/* If not due to copy_(to|from)_user, we are doomed */
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 8cca03480bb2..81e84426fe21 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -196,7 +196,7 @@ bad_area:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
tsk->thread.fault_address = address;
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -231,5 +231,5 @@ do_sigbus:
goto no_context;
tsk->thread.fault_address = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8869742a85df..c87cc9a6fb3c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1175,6 +1175,14 @@ config ARM_ERRATA_825619
DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable
and Device/Strongly-Ordered loads and stores might cause deadlock
+config ARM_ERRATA_857271
+ bool "ARM errata: A12: CPU might deadlock under some very rare internal conditions"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 857271 Cortex-A12
+ (all revs) erratum. Under very rare timing conditions, the CPU might
+ hang. The workaround is expected to have a < 1% performance impact.
+
config ARM_ERRATA_852421
bool "ARM errata: A17: DMB ST might fail to create order between stores"
depends on CPU_V7
@@ -1196,6 +1204,16 @@ config ARM_ERRATA_852423
config option from the A12 erratum due to the way errata are checked
for and handled.
+config ARM_ERRATA_857272
+ bool "ARM errata: A17: CPU might deadlock under some very rare internal conditions"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 857272 Cortex-A17 erratum.
+ This erratum is not known to be fixed in any A17 revision.
+ This is identical to Cortex-A12 erratum 857271. It is a separate
+ config option from the A12 erratum due to the way errata are checked
+ for and handled.
+
endmenu
source "arch/arm/common/Kconfig"
@@ -1232,6 +1250,18 @@ config PCI_HOST_ITE8152
default y
select DMABOUNCE
+config ARM_ERRATA_814220
+ bool "ARM errata: Cache maintenance by set/way operations can execute out of order"
+ depends on CPU_V7
+ help
+ The v7 ARM states that all cache and branch predictor maintenance
+ operations that do not specify an address execute, relative to
+ each other, in program order.
+ However, because of this erratum, an L2 set/way cache maintenance
+ operation can overtake an L1 set/way cache maintenance operation.
+ This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3,
+ r0p4, r0p5.
+
endmenu
menu "Kernel Features"
diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
index 59753470cd34..267d0c178e55 100644
--- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
+++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi
@@ -336,3 +336,11 @@
status = "disabled";
};
+&uart0 {
+ compatible = "marvell,armada-38x-uart";
+};
+
+&uart1 {
+ compatible = "marvell,armada-38x-uart";
+};
+
diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
index d6b711011cba..e20483714be5 100644
--- a/arch/arm/boot/dts/imx7ulp.dtsi
+++ b/arch/arm/boot/dts/imx7ulp.dtsi
@@ -100,6 +100,29 @@
reg = <0x40000000 0x800000>;
ranges;
+ crypto: crypto@40240000 {
+ compatible = "fsl,sec-v4.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x40240000 0x10000>;
+ ranges = <0 0x40240000 0x10000>;
+ clocks = <&pcc2 IMX7ULP_CLK_CAAM>,
+ <&scg1 IMX7ULP_CLK_NIC1_BUS_DIV>;
+ clock-names = "aclk", "ipg";
+
+ sec_jr0: jr0@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ sec_jr1: jr1@2000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ };
+ };
+
lpuart4: serial@402d0000 {
compatible = "fsl,imx7ulp-lpuart";
reg = <0x402d0000 0x1000>;
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 13e561737ca8..746e1fce777e 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -539,16 +539,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
int bL_switcher_trace_trigger(void)
{
- int ret;
-
preempt_disable();
bL_switcher_trace_trigger_cpu(NULL);
- ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+ smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
preempt_enable();
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c95c54284da2..9b959afaaa12 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -9,6 +9,7 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_ARCH_EXYNOS=y
CONFIG_ARCH_EXYNOS3=y
+CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND=y
CONFIG_SMP=y
CONFIG_BIG_LITTLE=y
CONFIG_NR_CPUS=8
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
index 48a89537b828..a8e9b534c8da 100644
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -63,7 +63,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_neon_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
struct skcipher_walk walk;
u32 state[16];
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
index 232eeab1ec37..8775aa42bbbe 100644
--- a/arch/arm/crypto/sha512-glue.c
+++ b/arch/arm/crypto/sha512-glue.c
@@ -34,7 +34,7 @@ int sha512_arm_update(struct shash_desc *desc, const u8 *data,
(sha512_block_fn *)sha512_block_data_order);
}
-int sha512_arm_final(struct shash_desc *desc, u8 *out)
+static int sha512_arm_final(struct shash_desc *desc, u8 *out)
{
sha512_base_do_finalize(desc,
(sha512_block_fn *)sha512_block_data_order);
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 4b66ecd6be99..99175812d903 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -4,6 +4,7 @@
#include <asm/barrier.h>
#include <asm/errno.h>
+#include <asm/hwcap.h>
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
isb();
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ elf_hwcap |= HWCAP_EVTSTRM;
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return elf_hwcap & HWCAP_EVTSTRM;
+}
#endif
#endif
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 50c3ac5f0809..75bb2c543e59 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor)
#ifndef CONFIG_GENERIC_ATOMIC64
typedef struct {
- long long counter;
+ s64 counter;
} atomic64_t;
#define ATOMIC64_INIT(i) { (i) }
#ifdef CONFIG_ARM_LPAE
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long long result;
+ s64 result;
__asm__ __volatile__("@ atomic64_read\n"
" ldrd %0, %H0, [%1]"
@@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v)
return result;
}
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("@ atomic64_set\n"
" strd %2, %H2, [%1]"
@@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
);
}
#else
-static inline long long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long long result;
+ s64 result;
__asm__ __volatile__("@ atomic64_read\n"
" ldrexd %0, %H0, [%1]"
@@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v)
return result;
}
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
- long long tmp;
+ s64 tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_set\n"
@@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
#endif
#define ATOMIC64_OP(op, op1, op2) \
-static inline void atomic64_##op(long long i, atomic64_t *v) \
+static inline void atomic64_##op(s64 i, atomic64_t *v) \
{ \
- long long result; \
+ s64 result; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
-static inline long long \
-atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
+static inline s64 \
+atomic64_##op##_return_relaxed(s64 i, atomic64_t *v) \
{ \
- long long result; \
+ s64 result; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, op1, op2) \
-static inline long long \
-atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \
+static inline s64 \
+atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v) \
{ \
- long long result, val; \
+ s64 result, val; \
unsigned long tmp; \
\
prefetchw(&v->counter); \
@@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-static inline long long
-atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
+static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
{
- long long oldval;
+ s64 oldval;
unsigned long res;
prefetchw(&ptr->counter);
@@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
}
#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
-static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
+static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
{
- long long result;
+ s64 result;
unsigned long tmp;
prefetchw(&ptr->counter);
@@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
}
#define atomic64_xchg_relaxed atomic64_xchg_relaxed
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long long result;
+ s64 result;
unsigned long tmp;
smp_mb();
@@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
}
#define atomic64_dec_if_positive atomic64_dec_if_positive
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long long oldval, newval;
+ s64 oldval, newval;
unsigned long tmp;
smp_mb();
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 36c951dd23b8..deef4d0cb3b5 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -85,7 +85,7 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int,
extern asmlinkage void c_backtrace(unsigned long fp, int pmode);
struct mm_struct;
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr);
extern void __show_regs(struct pt_regs *);
#endif
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index d6667b8cfca5..7114b9aa46b8 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -476,4 +476,11 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
void *kaddr, unsigned long len);
+
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+void check_cpu_icache_size(int cpuid);
+#else
+static inline void check_cpu_icache_size(int cpuid) { }
+#endif
+
#endif
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index a00288d75ee6..172b08ff3760 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -30,7 +30,7 @@ static inline int __in_irqentry_text(unsigned long ptr)
extern void __init early_trap_init(void *);
extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
-extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs);
+extern void ptrace_break(struct pt_regs *regs);
extern void *vectors_page;
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index afcb4d3b14dc..324352787aea 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -198,15 +198,15 @@ void ptrace_disable(struct task_struct *child)
/*
* Handle hitting a breakpoint.
*/
-void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+void ptrace_break(struct pt_regs *regs)
{
force_sig_fault(SIGTRAP, TRAP_BRKPT,
- (void __user *)instruction_pointer(regs), tsk);
+ (void __user *)instruction_pointer(regs));
}
static int break_trap(struct pt_regs *regs, unsigned int instr)
{
- ptrace_break(current, regs);
+ ptrace_break(regs);
return 0;
}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 3ca71d679aec..09f6fdd41974 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -247,7 +247,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs)
return regs->ARM_r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -280,7 +280,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
return regs->ARM_r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index a137608cd197..aab8ba40ce38 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -372,6 +372,7 @@ static void smp_store_cpu_info(unsigned int cpuid)
cpu_info->cpuid = read_cpuid_id();
store_cpu_topology(cpuid);
+ check_cpu_icache_size(cpuid);
}
/*
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 60e375ce1ab2..d17cb1e6d679 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu)
topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity);
pr_info("CPU%u: update cpu_capacity %lu\n",
- cpu, topology_get_cpu_scale(NULL, cpu));
+ cpu, topology_get_cpu_scale(cpu));
}
#else
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 7e2f1cba84e5..c053abd1fb53 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -369,7 +369,7 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
current->thread.error_code = err;
current->thread.trap_no = trap;
- force_sig_fault(signo, si_code, addr, current);
+ force_sig_fault(signo, si_code, addr);
} else {
die(str, regs, err);
}
@@ -603,7 +603,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
case NR(breakpoint): /* SWI BREAK_POINT */
regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
- ptrace_break(current, regs);
+ ptrace_break(regs);
return regs->ARM_r0;
/*
@@ -722,10 +722,11 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
#ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_BADABORT) {
+ pr_err("8<--- cut here ---\n");
pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
task_pid_nr(current), current->comm, code, instr);
dump_instr(KERN_ERR, regs);
- show_pte(current->mm, addr);
+ show_pte(KERN_ERR, current->mm, addr);
}
#endif
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 51a892702e27..a273ab25c668 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -61,6 +61,9 @@ static struct regulator_consumer_supply da830_evm_usb_supplies[] = {
static struct regulator_init_data da830_evm_usb_vbus_data = {
.consumer_supplies = da830_evm_usb_supplies,
.num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies),
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
};
static struct fixed_voltage_config da830_evm_usb_vbus = {
@@ -88,7 +91,7 @@ static struct gpiod_lookup_table da830_evm_usb_oc_gpio_lookup = {
static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = {
.dev_id = "reg-fixed-voltage.0",
.table = {
- GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0),
+ GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0),
{ }
},
};
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index db177a6a7e48..5390a8630cf0 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -306,6 +306,9 @@ static struct regulator_consumer_supply hawk_usb_supplies[] = {
static struct regulator_init_data hawk_usb_vbus_data = {
.consumer_supplies = hawk_usb_supplies,
.num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies),
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
};
static struct fixed_voltage_config hawk_usb_vbus = {
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 0af2bf6f9933..43899fa56674 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -11,6 +11,7 @@
* in the MontaVista 2.4 kernel (and the Amstrad changes therein)
*/
#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio/driver.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
@@ -99,7 +100,8 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip,
}
for (i = 0; i < ARRAY_SIZE(irq_data); i++) {
- gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], 0);
+ gpiod = gpiochip_request_own_desc(chip, i, pin_name[i],
+ GPIO_ACTIVE_HIGH, GPIOD_IN);
if (IS_ERR(gpiod)) {
pr_err("%s: failed to get GPIO pin %d (%ld)\n",
__func__, i, PTR_ERR(gpiod));
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 36498ea1b2f3..e47a6fbcfd6e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -10,6 +10,7 @@
*/
#include <linux/gpio/driver.h>
#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -606,12 +607,12 @@ static void __init modem_assign_irq(struct gpio_chip *chip)
struct gpio_desc *gpiod;
gpiod = gpiochip_request_own_desc(chip, AMS_DELTA_GPIO_PIN_MODEM_IRQ,
- "modem_irq", 0);
+ "modem_irq", GPIO_ACTIVE_HIGH,
+ GPIOD_IN);
if (IS_ERR(gpiod)) {
pr_err("%s: modem IRQ GPIO request failed (%ld)\n", __func__,
PTR_ERR(gpiod));
} else {
- gpiod_direction_input(gpiod);
ams_delta_modem_ports[0].irq = gpiod_to_irq(gpiod);
}
}
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 379424d72ae7..8ec6a4f5eb05 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/init.h>
#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
#include <linux/leds.h>
#include <linux/delay.h>
#include <linux/mmc/host.h>
@@ -398,7 +399,6 @@ static struct pca953x_platform_data crag6410_pca_data = {
/* VDDARM is controlled by DVS1 connected to GPK(0) */
static struct wm831x_buckv_pdata vddarm_pdata = {
.dvs_control_src = 1,
- .dvs_gpio = S3C64XX_GPK(0),
};
static struct regulator_consumer_supply vddarm_consumers[] = {
@@ -596,6 +596,24 @@ static struct wm831x_pdata crag_pmic_pdata = {
.touch = &touch_pdata,
};
+/*
+ * VDDARM is eventually ending up as a regulator hanging on the MFD cell device
+ * "wm831x-buckv.1" spawn from drivers/mfd/wm831x-core.c.
+ *
+ * From the note on the platform data we can see that this is clearly DVS1
+ * and assigned as dcdc1 resource to the MFD core which sets .id of the cell
+ * spawning the DVS1 platform device to 1, then the cell platform device
+ * name is calculated from 10*instance + id resulting in the device name
+ * "wm831x-buckv.11"
+ */
+static struct gpiod_lookup_table crag_pmic_gpiod_table = {
+ .dev_id = "wm831x-buckv.11",
+ .table = {
+ GPIO_LOOKUP("GPIOK", 0, "dvs", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
static struct i2c_board_info i2c_devs0[] = {
{ I2C_BOARD_INFO("24c08", 0x50), },
{ I2C_BOARD_INFO("tca6408", 0x20),
@@ -836,6 +854,7 @@ static void __init crag6410_machine_init(void)
s3c_fb_set_platdata(&crag6410_lcd_pdata);
dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata);
+ gpiod_add_lookup_table(&crag_pmic_gpiod_table);
i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0));
i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
diff --git a/arch/arm/mach-stm32/Kconfig b/arch/arm/mach-stm32/Kconfig
index 36e6c68c0b57..05d6b5aada80 100644
--- a/arch/arm/mach-stm32/Kconfig
+++ b/arch/arm/mach-stm32/Kconfig
@@ -44,6 +44,7 @@ if ARCH_MULTI_V7
config MACH_STM32MP157
bool "STMicroelectronics STM32MP157"
+ select ARM_ERRATA_814220
default y
endif # ARMv7-A
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b169e580bf82..cc798115aa9b 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -780,6 +780,14 @@ config CPU_ICACHE_DISABLE
Say Y here to disable the processor instruction cache. Unless
you have a reason not to or are unsure, say N.
+config CPU_ICACHE_MISMATCH_WORKAROUND
+ bool "Workaround for I-Cache line size mismatch between CPU cores"
+ depends on SMP && CPU_V7
+ help
+ Some big.LITTLE systems have I-Cache line size mismatch between
+ LITTLE and big cores. Say Y here to enable a workaround for
+ proper I-Cache support on such systems. If unsure, say N.
+
config CPU_DCACHE_DISABLE
bool "Disable D-Cache (C-bit)"
depends on (CPU_CP15 && !SMP) || CPU_V7M
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 6067fa4de22b..8cdb78642e93 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -945,7 +945,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto fixup;
if (ai_usermode & UM_SIGNAL) {
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
} else {
/*
* We're about to disable the alignment trap and return to
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 8c83b4586883..0ee8fc4b4672 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -16,6 +16,14 @@
#include "proc-macros.S"
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+.globl icache_size
+ .data
+ .align 2
+icache_size:
+ .long 64
+ .text
+#endif
/*
* The secondary kernel init calls v7_flush_dcache_all before it enables
* the L1; however, the L1 comes out of reset in an undefined state, so
@@ -160,6 +168,9 @@ loop2:
skip:
add r10, r10, #2 @ increment cache number
cmp r3, r10
+#ifdef CONFIG_ARM_ERRATA_814220
+ dsb
+#endif
bgt flush_levels
finished:
mov r10, #0 @ switch back to cache level 0
@@ -281,7 +292,12 @@ ENTRY(v7_coherent_user_range)
cmp r12, r1
blo 1b
dsb ishst
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+ ldr r3, =icache_size
+ ldr r2, [r3, #0]
+#else
icache_line_size r2, r3
+#endif
sub r3, r2, #1
bic r12, r0, r3
2:
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0048eadd0681..0e417233dad7 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -53,17 +53,16 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
* This is useful to dump out the page tables associated with
* 'addr' in mm 'mm'.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
if (!mm)
mm = &init_mm;
- pr_alert("pgd = %p\n", mm->pgd);
+ printk("%spgd = %p\n", lvl, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%08llx",
- addr, (long long)pgd_val(*pgd));
+ printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
do {
pud_t *pud;
@@ -118,7 +117,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
pr_cont("\n");
}
#else /* CONFIG_MMU */
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
{ }
#endif /* CONFIG_MMU */
@@ -139,11 +138,12 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
* No handler, we'll have to terminate things with extreme prejudice.
*/
bust_spinlocks(1);
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
(addr < PAGE_SIZE) ? "NULL pointer dereference" :
"paging request", addr);
- show_pte(mm, addr);
+ show_pte(KERN_ALERT, mm, addr);
die("Oops", regs, fsr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -154,19 +154,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
* User mode accesses just cause a SIGSEGV
*/
static void
-__do_user_fault(struct task_struct *tsk, unsigned long addr,
- unsigned int fsr, unsigned int sig, int code,
- struct pt_regs *regs)
+__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
+ int code, struct pt_regs *regs)
{
+ struct task_struct *tsk = current;
+
if (addr > TASK_SIZE)
harden_branch_predictor();
#ifdef CONFIG_DEBUG_USER
if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
- printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
+ pr_err("8<--- cut here ---\n");
+ pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
tsk->comm, sig, addr, fsr);
- show_pte(tsk->mm, addr);
+ show_pte(KERN_ERR, tsk->mm, addr);
show_regs(regs);
}
#endif
@@ -180,7 +182,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr,
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
- force_sig_fault(sig, code, (void __user *)addr, tsk);
+ force_sig_fault(sig, code, (void __user *)addr);
}
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
@@ -193,7 +195,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* have no context to handle this fault with.
*/
if (user_mode(regs))
- __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, fsr, regs);
}
@@ -389,7 +391,7 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, fsr, sig, code, regs);
+ __do_user_fault(addr, fsr, sig, code, regs);
return 0;
no_context:
@@ -553,9 +555,10 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
return;
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
- show_pte(current->mm, addr);
+ show_pte(KERN_ALERT, current->mm, addr);
arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
fsr, 0);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 749a5a6f6143..4920a206dce9 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -239,6 +239,22 @@ static void __init arm_initrd_init(void)
#endif
}
+#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
+void check_cpu_icache_size(int cpuid)
+{
+ u32 size, ctr;
+
+ asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr));
+
+ size = 1 << ((ctr & 0xf) + 2);
+ if (cpuid != 0 && icache_size != size)
+ pr_info("CPU%u: detected I-Cache line size mismatch, workaround enabled\n",
+ cpuid);
+ if (icache_size > size)
+ icache_size = size;
+}
+#endif
+
void __init arm_memblock_init(const struct machine_desc *mdesc)
{
/* Register the kernel text, kernel data and initrd with memblock. */
@@ -447,12 +463,6 @@ static void __init free_highpages(void)
*/
void __init mem_init(void)
{
-#ifdef CONFIG_HAVE_TCM
- /* These pointers are filled in on TCM detection */
- extern u32 dtcm_end;
- extern u32 itcm_end;
-#endif
-
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
/* this will put all unused low memory onto the freelists */
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 6b045c6653ea..941356d95a67 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -8,6 +8,8 @@
/* the upper-most page table pointer */
extern pmd_t *top_pmd;
+extern int icache_size;
+
/*
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
* specific hacks for copying pages efficiently, while 0xffff4000
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 83741c31757d..c4e8006a1a8c 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -389,6 +389,11 @@ __ca12_errata:
orr r10, r10, #1 << 24 @ set bit #24
mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
+#ifdef CONFIG_ARM_ERRATA_857271
+ mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register
+ orr r10, r10, #3 << 10 @ set bits #10 and #11
+ mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
+#endif
b __errata_finish
__ca17_errata:
@@ -404,6 +409,11 @@ __ca17_errata:
orrle r10, r10, #1 << 12 @ set bit #12
mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
+#ifdef CONFIG_ARM_ERRATA_857272
+ mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register
+ orr r10, r10, #3 << 10 @ set bits #10 and #11
+ mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register
+#endif
b __errata_finish
__v7_pj4b_setup:
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 1f5ec9741e6d..ca85df247775 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -12,8 +12,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- -z max-page-size=4096 -z common-page-size=4096 \
- -nostdlib -shared $(ldflags-y) \
+ -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
$(call ld-option, --hash-style=sysv) \
$(call ld-option, --build-id) \
-T
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea0510729..c1734e444fb8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -26,6 +26,7 @@ config ARM64
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SETUP_DMA_OPS
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
@@ -107,6 +108,8 @@ config ARM64
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -160,6 +163,7 @@ config ARM64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
@@ -260,7 +264,8 @@ config GENERIC_CALIBRATE_DELAY
def_bool y
config ZONE_DMA32
- def_bool y
+ bool "Support DMA32 zone" if EXPERT
+ default y
config HAVE_GENERIC_GUP
def_bool y
@@ -933,7 +938,6 @@ config PARAVIRT
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
- default n
help
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
@@ -1418,12 +1422,27 @@ config ARM64_SVE
KVM in the same kernel image.
config ARM64_MODULE_PLTS
- bool
+ bool "Use PLTs to allow module memory to spill over into vmalloc area"
+ depends on MODULES
select HAVE_MOD_ARCH_SPECIFIC
+ help
+ Allocate PLTs when loading modules so that jumps and calls whose
+ targets are too far away for their relative offsets to be encoded
+ in the instructions themselves can be bounced via veneers in the
+ module's PLT. This allows modules to be allocated in the generic
+ vmalloc area after the dedicated module memory area has been
+ exhausted.
+
+ When running with address space randomization (KASLR), the module
+ region itself may be too far away for ordinary relative jumps and
+ calls, and so in that case, module PLTs are required and cannot be
+ disabled.
+
+ Specific errata workaround(s) might also force module PLTs to be
+ enabled (ARM64_ERRATUM_843419).
config ARM64_PSEUDO_NMI
bool "Support for NMI-like interrupts"
- depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com
select CONFIG_ARM_GIC_V3
help
Adds support for mimicking Non-Maskable Interrupts through the use of
@@ -1436,6 +1455,17 @@ config ARM64_PSEUDO_NMI
If unsure, say N
+if ARM64_PSEUDO_NMI
+config ARM64_DEBUG_PRIORITY_MASKING
+ bool "Debug interrupt priority masking"
+ help
+ This adds runtime checks to functions enabling/disabling
+ interrupts when using priority masking. The additional checks verify
+ the validity of ICC_PMR_EL1 when calling concerned functions.
+
+ If unsure, say N
+endif
+
config RELOCATABLE
bool
help
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index e9d2e578cbe6..e3d3fd0a4268 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -49,10 +49,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
endif
-KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
+ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
+ CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
+
+ ifeq ($(CONFIG_CC_IS_CLANG), y)
+ $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
+ else ifeq ($(CROSS_COMPILE_COMPAT),)
+ $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
+ else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
+ $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
+ else
+ export CROSS_COMPILE_COMPAT
+ export CONFIG_COMPAT_VDSO := y
+ compat_vdso := -DCONFIG_COMPAT_VDSO=1
+ endif
+endif
+
+KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
-KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
+KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
@@ -164,6 +180,9 @@ ifeq ($(KBUILD_EXTMOD),)
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+ $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
+ $(build)=arch/arm64/kernel/vdso32 \
+ include/generated/vdso32-offsets.h)
endif
define archhelp
diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
index f09f3e03f708..108667ce4f31 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
@@ -27,6 +27,23 @@
status = "okay";
};
+&pm8005_lsid1 {
+ pm8005-regulators {
+ compatible = "qcom,pm8005-regulators";
+
+ vdd_s1-supply = <&vph_pwr>;
+
+ pm8005_s1: s1 { /* VDD_GFX supply */
+ regulator-min-microvolt = <524000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-enable-ramp-delay = <500>;
+
+ /* hack until we rig up the gpu consumer */
+ regulator-always-on;
+ };
+ };
+};
+
&qusb2phy {
status = "okay";
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 6bca5b082ea4..dd827e64e5fe 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -68,6 +68,7 @@ CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
CONFIG_XEN=y
CONFIG_COMPAT=y
+CONFIG_RANDOMIZE_BASE=y
CONFIG_HIBERNATION=y
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
CONFIG_ARM_CPUIDLE=y
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 3ebfaec97e27..00bd2885feaa 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -15,6 +15,8 @@
.arch armv8-a+crypto
xtsmask .req v16
+ cbciv .req v16
+ vctr .req v16
.macro xts_reload_mask, tmp
.endm
@@ -49,7 +51,7 @@
load_round_keys \rounds, \temp
.endm
- .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
+ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
@@ -60,27 +62,34 @@
aes\mc \i2\().16b, \i2\().16b
aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b
+ .ifnb \i4
+ aes\de \i4\().16b, \k\().16b
+ aes\mc \i4\().16b, \i4\().16b
+ .endif
.endif
.endif
.endm
- /* up to 4 interleaved encryption rounds with the same round key */
- .macro round_Nx, enc, k, i0, i1, i2, i3
+ /* up to 5 interleaved encryption rounds with the same round key */
+ .macro round_Nx, enc, k, i0, i1, i2, i3, i4
.ifc \enc, e
- do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
+ do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
.else
- do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
+ do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
.endif
.endm
- /* up to 4 interleaved final rounds */
- .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
+ /* up to 5 interleaved final rounds */
+ .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
aes\de \i0\().16b, \k\().16b
.ifnb \i1
aes\de \i1\().16b, \k\().16b
.ifnb \i3
aes\de \i2\().16b, \k\().16b
aes\de \i3\().16b, \k\().16b
+ .ifnb \i4
+ aes\de \i4\().16b, \k\().16b
+ .endif
.endif
.endif
eor \i0\().16b, \i0\().16b, \k2\().16b
@@ -89,47 +98,52 @@
.ifnb \i3
eor \i2\().16b, \i2\().16b, \k2\().16b
eor \i3\().16b, \i3\().16b, \k2\().16b
+ .ifnb \i4
+ eor \i4\().16b, \i4\().16b, \k2\().16b
+ .endif
.endif
.endif
.endm
- /* up to 4 interleaved blocks */
- .macro do_block_Nx, enc, rounds, i0, i1, i2, i3
+ /* up to 5 interleaved blocks */
+ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
cmp \rounds, #12
blo 2222f /* 128 bits */
beq 1111f /* 192 bits */
- round_Nx \enc, v17, \i0, \i1, \i2, \i3
- round_Nx \enc, v18, \i0, \i1, \i2, \i3
-1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
- round_Nx \enc, v20, \i0, \i1, \i2, \i3
+ round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
+ round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
+1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
+ round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
- round_Nx \enc, \key, \i0, \i1, \i2, \i3
+ round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
.endr
- fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
+ fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
.endm
.macro encrypt_block, in, rounds, t0, t1, t2
do_block_Nx e, \rounds, \in
.endm
- .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
- do_block_Nx e, \rounds, \i0, \i1
- .endm
-
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
.endm
- .macro decrypt_block, in, rounds, t0, t1, t2
- do_block_Nx d, \rounds, \in
+ .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
.endm
- .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
- do_block_Nx d, \rounds, \i0, \i1
+ .macro decrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \in
.endm
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
.endm
+ .macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
+ .endm
+
+#define MAX_STRIDE 5
+
#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 2883def14be5..324039b72094 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -10,6 +10,18 @@
.text
.align 4
+#ifndef MAX_STRIDE
+#define MAX_STRIDE 4
+#endif
+
+#if MAX_STRIDE == 4
+#define ST4(x...) x
+#define ST5(x...)
+#else
+#define ST4(x...)
+#define ST5(x...) x
+#endif
+
aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
@@ -20,6 +32,18 @@ aes_decrypt_block4x:
ret
ENDPROC(aes_decrypt_block4x)
+#if MAX_STRIDE == 5
+aes_encrypt_block5x:
+ encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
+ ret
+ENDPROC(aes_encrypt_block5x)
+
+aes_decrypt_block5x:
+ decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
+ ret
+ENDPROC(aes_decrypt_block5x)
+#endif
+
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
@@ -34,14 +58,17 @@ AES_ENTRY(aes_ecb_encrypt)
enc_prepare w3, x2, x5
.LecbencloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lecbenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
- bl aes_encrypt_block4x
+ST4( bl aes_encrypt_block4x )
+ST5( ld1 {v4.16b}, [x1], #16 )
+ST5( bl aes_encrypt_block5x )
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LecbencloopNx
.Lecbenc1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lecbencout
.Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
@@ -62,14 +89,17 @@ AES_ENTRY(aes_ecb_decrypt)
dec_prepare w3, x2, x5
.LecbdecloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lecbdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
- bl aes_decrypt_block4x
+ST4( bl aes_decrypt_block4x )
+ST5( ld1 {v4.16b}, [x1], #16 )
+ST5( bl aes_decrypt_block5x )
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LecbdecloopNx
.Lecbdec1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lecbdecout
.Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */
@@ -129,39 +159,56 @@ AES_ENTRY(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
- ld1 {v7.16b}, [x5] /* get iv */
+ ld1 {cbciv.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
.LcbcdecloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lcbcdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+#if MAX_STRIDE == 5
+ ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
+ mov v5.16b, v0.16b
+ mov v6.16b, v1.16b
+ mov v7.16b, v2.16b
+ bl aes_decrypt_block5x
+ sub x1, x1, #32
+ eor v0.16b, v0.16b, cbciv.16b
+ eor v1.16b, v1.16b, v5.16b
+ ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v6.16b
+ eor v3.16b, v3.16b, v7.16b
+ eor v4.16b, v4.16b, v5.16b
+#else
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
sub x1, x1, #16
- eor v0.16b, v0.16b, v7.16b
+ eor v0.16b, v0.16b, cbciv.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
+#endif
st1 {v0.16b-v3.16b}, [x0], #64
+ST5( st1 {v4.16b}, [x0], #16 )
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lcbcdecout
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
decrypt_block v0, w3, x2, x6, w7
- eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
- mov v7.16b, v1.16b /* ct is next iv */
+ eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
+ mov cbciv.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x5] /* return iv */
+ st1 {cbciv.16b}, [x5] /* return iv */
ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -255,51 +302,60 @@ AES_ENTRY(aes_ctr_encrypt)
mov x29, sp
enc_prepare w3, x2, x6
- ld1 {v4.16b}, [x5]
+ ld1 {vctr.16b}, [x5]
- umov x6, v4.d[1] /* keep swabbed ctr in reg */
+ umov x6, vctr.d[1] /* keep swabbed ctr in reg */
rev x6, x6
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
- subs w4, w4, #4
+ subs w4, w4, #MAX_STRIDE
bmi .Lctr1x
add w7, w6, #1
- mov v0.16b, v4.16b
+ mov v0.16b, vctr.16b
add w8, w6, #2
- mov v1.16b, v4.16b
+ mov v1.16b, vctr.16b
+ add w9, w6, #3
+ mov v2.16b, vctr.16b
add w9, w6, #3
- mov v2.16b, v4.16b
rev w7, w7
- mov v3.16b, v4.16b
+ mov v3.16b, vctr.16b
rev w8, w8
+ST5( mov v4.16b, vctr.16b )
mov v1.s[3], w7
rev w9, w9
+ST5( add w10, w6, #4 )
mov v2.s[3], w8
+ST5( rev w10, w10 )
mov v3.s[3], w9
+ST5( mov v4.s[3], w10 )
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
- bl aes_encrypt_block4x
+ST4( bl aes_encrypt_block4x )
+ST5( bl aes_encrypt_block5x )
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ST4( ld1 {v5.16b}, [x1], #16 )
eor v1.16b, v6.16b, v1.16b
+ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
+ST5( eor v4.16b, v6.16b, v4.16b )
st1 {v0.16b-v3.16b}, [x0], #64
- add x6, x6, #4
+ST5( st1 {v4.16b}, [x0], #16 )
+ add x6, x6, #MAX_STRIDE
rev x7, x6
- ins v4.d[1], x7
+ ins vctr.d[1], x7
cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
- adds w4, w4, #4
+ adds w4, w4, #MAX_STRIDE
beq .Lctrout
.Lctrloop:
- mov v0.16b, v4.16b
+ mov v0.16b, vctr.16b
encrypt_block v0, w3, x2, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
- ins v4.d[1], x7
+ ins vctr.d[1], x7
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
@@ -311,7 +367,7 @@ AES_ENTRY(aes_ctr_encrypt)
bne .Lctrloop
.Lctrout:
- st1 {v4.16b}, [x5] /* return next CTR value */
+ st1 {vctr.16b}, [x5] /* return next CTR value */
ldp x29, x30, [sp], #16
ret
@@ -320,11 +376,11 @@ AES_ENTRY(aes_ctr_encrypt)
b .Lctrout
.Lctrcarry:
- umov x7, v4.d[0] /* load upper word of ctr */
+ umov x7, vctr.d[0] /* load upper word of ctr */
rev x7, x7 /* ... to handle the carry */
add x7, x7, #1
rev x7, x7
- ins v4.d[0], x7
+ ins vctr.d[0], x7
b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index d261331747f2..2bebccc73869 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -12,6 +12,8 @@
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
xtsmask .req v7
+ cbciv .req v7
+ vctr .req v4
.macro xts_reload_mask, tmp
xts_load_mask \tmp
@@ -114,26 +116,9 @@
/*
* Interleaved versions: functionally equivalent to the
- * ones above, but applied to 2 or 4 AES states in parallel.
+ * ones above, but applied to AES states in parallel.
*/
- .macro sub_bytes_2x, in0, in1
- sub v8.16b, \in0\().16b, v15.16b
- tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
- sub v9.16b, \in1\().16b, v15.16b
- tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, v8.16b, v15.16b
- tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
- sub v11.16b, v9.16b, v15.16b
- tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v10.16b, v15.16b
- tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
- sub v9.16b, v11.16b, v15.16b
- tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
- tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
- tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
- .endm
-
.macro sub_bytes_4x, in0, in1, in2, in3
sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
@@ -212,25 +197,6 @@
eor \in1\().16b, \in1\().16b, v11.16b
.endm
- .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
- ld1 {v15.4s}, [\rk]
- add \rkp, \rk, #16
- mov \i, \rounds
-1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
- eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
- movi v15.16b, #0x40
- tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
- tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- sub_bytes_2x \in0, \in1
- subs \i, \i, #1
- ld1 {v15.4s}, [\rkp], #16
- beq 2222f
- mix_columns_2x \in0, \in1, \enc
- b 1111b
-2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
- eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
- .endm
-
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
@@ -257,14 +223,6 @@
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
.endm
- .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
- do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
- .endm
-
- .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
- do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
- .endm
-
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
.endm
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 82029cda2e77..1495d2b18518 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -60,7 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_neon_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
struct skcipher_walk walk;
u32 state[16];
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index ecb0f67e5998..bdc1b6d7aff7 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -52,7 +52,7 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
+ bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len;
if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 955c3c2d3f5a..604a01a4ede6 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -57,7 +57,7 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
- bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
+ bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len;
if (!crypto_simd_usable()) {
if (len)
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index ada0bc480a1b..b263e239cb59 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -38,6 +38,9 @@
(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
(unsigned long)(entry) + (entry)->header.length > (end))
+#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+ spe_interrupt) + sizeof(u16))
+
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 2247908e55d6..79155a8cfe7c 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void)
static inline void gic_pmr_mask_irqs(void)
{
- BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
+ BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
+ GIC_PRIO_PSR_I_SET));
+ BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
gic_write_pmr(GIC_PRIO_IRQOFF);
}
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 6756178c27db..7ae54d7d333a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -9,6 +9,7 @@
#define __ASM_ARCH_TIMER_H
#include <asm/barrier.h>
+#include <asm/hwcap.h>
#include <asm/sysreg.h>
#include <linux/bug.h>
@@ -229,4 +230,16 @@ static inline int arch_timer_arch_init(void)
return 0;
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ cpu_set_named_feature(EVTSTRM);
+#ifdef CONFIG_COMPAT
+ compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
+#endif
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return cpu_have_named_feature(EVTSTRM);
+}
#endif
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 23c378606aed..c8c850bc3dfb 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor)
#define ATOMIC64_OP(op, asm_op) \
__LL_SC_INLINE void \
-__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \
+__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \
{ \
- long result; \
+ s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
@@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \
__LL_SC_EXPORT(arch_atomic64_##op);
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
-__LL_SC_INLINE long \
-__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
+__LL_SC_INLINE s64 \
+__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
{ \
- long result; \
+ s64 result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return" #name "\n" \
@@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\
__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
-__LL_SC_INLINE long \
-__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \
+__LL_SC_INLINE s64 \
+__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \
{ \
- long result, val; \
+ s64 result, val; \
unsigned long tmp; \
\
asm volatile("// atomic64_fetch_" #op #name "\n" \
@@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-__LL_SC_INLINE long
+__LL_SC_INLINE s64
__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
{
- long result;
+ s64 result;
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 45e030d54332..69acb1c19a15 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory")
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op)
#define ATOMIC64_OP(op, asm_op) \
-static inline void arch_atomic64_##op(long i, atomic64_t *v) \
+static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
@@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
-static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd)
#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
-static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory")
#undef ATOMIC64_OP_ADD_RETURN
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- register long x0 asm ("x0") = i;
+ register s64 x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
}
#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
-static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \
+static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND( , al, "memory")
#undef ATOMIC64_FETCH_OP_AND
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
- register long x0 asm ("x0") = i;
+ register s64 x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
@@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
}
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
-static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\
+static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC64_OP_SUB_RETURN
#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
-static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \
+static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \
{ \
- register long x0 asm ("x0") = i; \
+ register s64 x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
@@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory")
#undef ATOMIC64_FETCH_OP_SUB
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a05db636981a..64eeaa41e7ca 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void)
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
-static inline int cache_line_size(void)
+static inline int cache_line_size_of_cpu(void)
{
u32 cwg = cache_type_cwg();
+
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
+int cache_line_size(void);
+
/*
* Read the effective value of CTR_EL0.
*
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 1fe4467442aa..665c78e0665a 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
int set_memory_valid(unsigned long addr, int numpages, int enable);
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
#endif
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 373799b7982f..3d8db50d9ae2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -614,6 +614,12 @@ static inline bool system_uses_irq_prio_masking(void)
cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
}
+static inline bool system_has_prio_mask_debugging(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
+ system_uses_irq_prio_masking();
+}
+
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 6dd8a8723525..987926ed535e 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -7,6 +7,7 @@
#include <linux/irqflags.h>
+#include <asm/arch_gicv3.h>
#include <asm/cpufeature.h>
#define DAIF_PROCCTX 0
@@ -16,11 +17,20 @@
/* mask/save/unmask/restore all exceptions, including interrupts. */
static inline void local_daif_mask(void)
{
+ WARN_ON(system_has_prio_mask_debugging() &&
+ (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF |
+ GIC_PRIO_PSR_I_SET)));
+
asm volatile(
"msr daifset, #0xf // local_daif_mask\n"
:
:
: "memory");
+
+ /* Don't really care for a dsb here, we don't intend to enable IRQs */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
trace_hardirqs_off();
}
@@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void)
if (system_uses_irq_prio_masking()) {
/* If IRQs are masked with PMR, reflect it in the flags */
- if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
+ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
flags |= PSR_I_BIT;
}
@@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags)
{
bool irq_disabled = flags & PSR_I_BIT;
+ WARN_ON(system_has_prio_mask_debugging() &&
+ !(read_sysreg(daif) & PSR_I_BIT));
+
if (!irq_disabled) {
trace_hardirqs_on();
- if (system_uses_irq_prio_masking())
- arch_local_irq_enable();
- } else if (!(flags & PSR_A_BIT)) {
- /*
- * If interrupts are disabled but we can take
- * asynchronous errors, we can take NMIs
- */
if (system_uses_irq_prio_masking()) {
- flags &= ~PSR_I_BIT;
+ gic_write_pmr(GIC_PRIO_IRQON);
+ dsb(sy);
+ }
+ } else if (system_uses_irq_prio_masking()) {
+ u64 pmr;
+
+ if (!(flags & PSR_A_BIT)) {
/*
- * There has been concern that the write to daif
- * might be reordered before this write to PMR.
- * From the ARM ARM DDI 0487D.a, section D1.7.1
- * "Accessing PSTATE fields":
- * Writes to the PSTATE fields have side-effects on
- * various aspects of the PE operation. All of these
- * side-effects are guaranteed:
- * - Not to be visible to earlier instructions in
- * the execution stream.
- * - To be visible to later instructions in the
- * execution stream
- *
- * Also, writes to PMR are self-synchronizing, so no
- * interrupts with a lower priority than PMR is signaled
- * to the PE after the write.
- *
- * So we don't need additional synchronization here.
+ * If interrupts are disabled but we can take
+ * asynchronous errors, we can take NMIs
*/
- arch_local_irq_disable();
+ flags &= ~PSR_I_BIT;
+ pmr = GIC_PRIO_IRQOFF;
+ } else {
+ pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
}
+
+ /*
+ * There has been concern that the write to daif
+ * might be reordered before this write to PMR.
+ * From the ARM ARM DDI 0487D.a, section D1.7.1
+ * "Accessing PSTATE fields":
+ * Writes to the PSTATE fields have side-effects on
+ * various aspects of the PE operation. All of these
+ * side-effects are guaranteed:
+ * - Not to be visible to earlier instructions in
+ * the execution stream.
+ * - To be visible to later instructions in the
+ * execution stream
+ *
+ * Also, writes to PMR are self-synchronizing, so no
+ * interrupts with a lower priority than PMR is signaled
+ * to the PE after the write.
+ *
+ * So we don't need additional synchronization here.
+ */
+ gic_write_pmr(pmr);
}
write_sysreg(flags, daif);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 325d9515c0f8..3c7037c6ba9b 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
({ \
set_thread_flag(TIF_32BIT); \
})
+#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#define COMPAT_ARCH_DLINFO \
+do { \
+ /* \
+ * Note that we use Elf64_Off instead of elf_addr_t because \
+ * elf_addr_t in compat is defined as Elf32_Addr and casting \
+ * current->mm->context.vdso to it triggers a cast warning of \
+ * cast from pointer to integer of different size. \
+ */ \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (Elf64_Off)current->mm->context.vdso); \
+} while (0)
+#else
#define COMPAT_ARCH_DLINFO
+#endif
extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages \
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 897029c8e9b5..b6a2c352f4c3 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,8 +37,6 @@ struct task_struct;
extern void fpsimd_save_state(struct user_fpsimd_state *state);
extern void fpsimd_load_state(struct user_fpsimd_state *state);
-extern void fpsimd_save(void);
-
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
@@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl);
extern void fpsimd_flush_task_state(struct task_struct *target);
-extern void fpsimd_flush_cpu_state(void);
-extern void sve_flush_cpu_state(void);
+extern void fpsimd_save_and_flush_cpu_state(void);
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index e5d9420cd258..3d2f2472a36c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -84,6 +84,8 @@
#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
+#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
+#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 66853fde60f9..7872f260c9ee 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -29,6 +29,12 @@
*/
static inline void arch_local_irq_enable(void)
{
+ if (system_has_prio_mask_debugging()) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
asm volatile(ALTERNATIVE(
"msr daifclr, #2 // arch_local_irq_enable\n"
"nop",
@@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void)
static inline void arch_local_irq_disable(void)
{
+ if (system_has_prio_mask_debugging()) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
asm volatile(ALTERNATIVE(
"msr daifset, #2 // arch_local_irq_disable",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
@@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void)
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long daif_bits;
unsigned long flags;
- daif_bits = read_sysreg(daif);
-
- /*
- * The asm is logically equivalent to:
- *
- * if (system_uses_irq_prio_masking())
- * flags = (daif_bits & PSR_I_BIT) ?
- * GIC_PRIO_IRQOFF :
- * read_sysreg_s(SYS_ICC_PMR_EL1);
- * else
- * flags = daif_bits;
- */
asm volatile(ALTERNATIVE(
- "mov %0, %1\n"
- "nop\n"
- "nop",
- __mrs_s("%0", SYS_ICC_PMR_EL1)
- "ands %1, %1, " __stringify(PSR_I_BIT) "\n"
- "csel %0, %0, %2, eq",
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (flags), "+r" (daif_bits)
- : "r" ((unsigned long) GIC_PRIO_IRQOFF)
+ "mrs %0, daif",
+ __mrs_s("%0", SYS_ICC_PMR_EL1),
+ ARM64_HAS_IRQ_PRIO_MASKING)
+ : "=&r" (flags)
+ :
: "memory");
return flags;
}
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ int res;
+
+ asm volatile(ALTERNATIVE(
+ "and %w0, %w1, #" __stringify(PSR_I_BIT),
+ "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
+ ARM64_HAS_IRQ_PRIO_MASKING)
+ : "=&r" (res)
+ : "r" ((int) flags)
+ : "memory");
+
+ return res;
+}
+
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
flags = arch_local_save_flags();
- arch_local_irq_disable();
+ /*
+ * There are too many states with IRQs disabled, just keep the current
+ * state if interrupts are already disabled/masked.
+ */
+ if (!arch_irqs_disabled_flags(flags))
+ arch_local_irq_disable();
return flags;
}
@@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags)
__msr_s(SYS_ICC_PMR_EL1, "%0")
"dsb sy",
ARM64_HAS_IRQ_PRIO_MASKING)
- : "+r" (flags)
:
+ : "r" (flags)
: "memory");
}
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
- int res;
-
- asm volatile(ALTERNATIVE(
- "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
- "nop",
- "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
- "cset %w0, ls",
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (res)
- : "r" ((int) flags)
- : "memory");
-
- return res;
-}
#endif
#endif
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index c328191aa202..9f19c354b165 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -597,11 +597,12 @@ static inline void kvm_arm_vhe_guest_enter(void)
* will not signal the CPU of interrupts of lower priority, and the
* only way to get out will be via guest exceptions.
* Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
*/
- if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON);
+ if (system_uses_irq_prio_masking())
dsb(sy);
- }
}
static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 30e5e67749e5..db92950bb1a0 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -115,7 +115,6 @@
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
-#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
@@ -142,8 +141,8 @@
/*
* Level 3 descriptor (PTE).
*/
+#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
-#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index c81583be034b..f318258a14be 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -13,7 +13,6 @@
/*
* Software defined PTE bits definition.
*/
-#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index fca26759081a..3052381baaeb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval);
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+
+static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte)
{
pte_t old_pte;
- if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
- __sync_icache_dcache(pte);
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return;
+
+ old_pte = READ_ONCE(*ptep);
+
+ if (!pte_valid(old_pte) || !pte_valid(pte))
+ return;
+ if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
+ return;
/*
- * If the existing pte is valid, check for potential race with
- * hardware updates of the pte (ptep_set_access_flags safely changes
- * valid ptes without going through an invalid entry).
+ * Check for potential race with hardware updates of the pte
+ * (ptep_set_access_flags safely changes valid ptes without going
+ * through an invalid entry).
*/
- old_pte = READ_ONCE(*ptep);
- if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
- (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
- VM_WARN_ONCE(!pte_young(pte),
- "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(old_pte), pte_val(pte));
- VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
- "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(old_pte), pte_val(pte));
- }
+ VM_WARN_ONCE(!pte_young(pte),
+ "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
+ "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte);
+
+ __check_racy_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
@@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte)
return __pmd(pte_val(pte));
}
-static inline pgprot_t mk_sect_prot(pgprot_t prot)
+static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+}
+
+static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
}
#ifdef CONFIG_NUMA_BALANCING
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index dad858b6adc6..81693244f58d 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -24,9 +24,15 @@
* means masking more IRQs (or at least that the same IRQs remain masked).
*
* To mask interrupts, we clear the most significant bit of PMR.
+ *
+ * Some code sections either automatically switch back to PSR.I or explicitly
+ * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
+ * in the the priority mask, it indicates that PSR.I should be set and
+ * interrupt disabling temporarily does not rely on IRQ priorities.
*/
-#define GIC_PRIO_IRQON 0xf0
-#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_IRQON 0xc0
+#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_PSR_I_SET (1 << 4)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_IL_BIT (1 << 20)
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 0418c67f2b8b..bd43d1cf724b 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -9,6 +9,52 @@
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
+struct compat_sigcontext {
+ /* We always set these two fields to 0 */
+ compat_ulong_t trap_no;
+ compat_ulong_t error_code;
+
+ compat_ulong_t oldmask;
+ compat_ulong_t arm_r0;
+ compat_ulong_t arm_r1;
+ compat_ulong_t arm_r2;
+ compat_ulong_t arm_r3;
+ compat_ulong_t arm_r4;
+ compat_ulong_t arm_r5;
+ compat_ulong_t arm_r6;
+ compat_ulong_t arm_r7;
+ compat_ulong_t arm_r8;
+ compat_ulong_t arm_r9;
+ compat_ulong_t arm_r10;
+ compat_ulong_t arm_fp;
+ compat_ulong_t arm_ip;
+ compat_ulong_t arm_sp;
+ compat_ulong_t arm_lr;
+ compat_ulong_t arm_pc;
+ compat_ulong_t arm_cpsr;
+ compat_ulong_t fault_address;
+};
+
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ compat_uptr_t uc_link;
+ compat_stack_t uc_stack;
+ struct compat_sigcontext uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))];
+ compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct compat_sigframe {
+ struct compat_ucontext uc;
+ compat_ulong_t retcode[2];
+};
+
+struct compat_rt_sigframe {
+ struct compat_siginfo info;
+ struct compat_sigframe sig;
+};
+
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 7e245b9e03a5..7434844036d3 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,9 +12,9 @@
#include <linux/preempt.h>
#include <linux/types.h>
-#ifdef CONFIG_KERNEL_MODE_NEON
+DECLARE_PER_CPU(bool, fpsimd_context_busy);
-DECLARE_PER_CPU(bool, kernel_neon_busy);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
@@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy);
static __must_check inline bool may_use_simd(void)
{
/*
- * kernel_neon_busy is only set while preemption is disabled,
+ * fpsimd_context_busy is only set while preemption is disabled,
* and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy
+ * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
* cannot change under our feet -- if it's set we cannot be
* migrated, and if it's clear we cannot be migrated to a CPU
* where it is set.
*/
return !in_irq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(kernel_neon_busy);
+ !this_cpu_read(fpsimd_context_busy);
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cd7f7ce1a56a..d0bd4ffcf2c4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -549,6 +549,7 @@
/* id_aa64isar1 */
#define ID_AA64ISAR1_SB_SHIFT 36
+#define ID_AA64ISAR1_FRINTTS_SHIFT 32
#define ID_AA64ISAR1_GPI_SHIFT 28
#define ID_AA64ISAR1_GPA_SHIFT 24
#define ID_AA64ISAR1_LRCPC_SHIFT 20
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 2372e97db29c..180b34ec5965 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk);
* TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
* TIF_SYSCALL_AUDIT - syscall auditing
* TIF_SECCOMP - syscall secure computing
+ * TIF_SYSCALL_EMU - syscall emulation active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_SYSCALL_AUDIT 9
#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_SECCOMP 11
+#define TIF_SYSCALL_EMU 12
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
@@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_SYSCALL_EMU)
#define INIT_THREAD_INFO(tsk) \
{ \
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index c9f8dd421c5f..2a23614198f1 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -22,8 +22,13 @@
#define __NR_compat_exit 1
#define __NR_compat_read 3
#define __NR_compat_write 4
+#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
+#define __NR_compat_clock_getres 247
+#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_gettime64 403
+#define __NR_compat_clock_getres_time64 406
/*
* The following SVCs are ARM private.
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 1f94ec19903c..9c15e0a06301 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -17,6 +17,9 @@
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
+#ifdef CONFIG_COMPAT_VDSO
+#include <generated/vdso32-offsets.h>
+#endif
#define VDSO_SYMBOL(base, name) \
({ \
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
new file mode 100644
index 000000000000..fb60a88b5ed4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __COMPAT_BARRIER_H
+#define __COMPAT_BARRIER_H
+
+#ifndef __ASSEMBLY__
+/*
+ * Warning: This code is meant to be used with
+ * ENABLE_COMPAT_VDSO only.
+ */
+#ifndef ENABLE_COMPAT_VDSO
+#error This header is meant to be used with ENABLE_COMPAT_VDSO only
+#endif
+
+#ifdef dmb
+#undef dmb
+#endif
+
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+
+#if __LINUX_ARM_ARCH__ >= 8
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() dmb(ishld)
+#define aarch32_smp_wmb() dmb(ishst)
+#else
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() aarch32_smp_mb()
+#define aarch32_smp_wmb() dmb(ishst)
+#endif
+
+
+#undef smp_mb
+#undef smp_rmb
+#undef smp_wmb
+
+#define smp_mb() aarch32_smp_mb()
+#define smp_rmb() aarch32_smp_rmb()
+#define smp_wmb() aarch32_smp_wmb()
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __COMPAT_BARRIER_H */
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
new file mode 100644
index 000000000000..f4812777f5c5
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#include <asm/vdso/compat_barrier.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("r1") = _tz;
+ register struct __kernel_old_timeval *tv asm("r0") = _tv;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_gettimeofday;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_gettime64;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_getres_time64;
+
+ /* The checks below are required for ABI consistency with arm */
+ if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
+ return -EINVAL;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ const struct vdso_data *ret;
+
+ /*
+ * This simply puts &_vdso_data into ret. The reason why we don't use
+ * `ret = _vdso_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_data in a register,
+ * it goes through a relocation almost every time _vdso_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __arch_get_vdso_data() is called, and then keep the result in
+ * a register.
+ */
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..b08f476b72b4
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.#
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline
+const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0c731bfc7c8c
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm64_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+
+static __always_inline
+int __arm64_get_clock_mode(struct timekeeper *tk)
+{
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
+
+ return use_syscall;
+}
+#define __arch_get_clock_mode __arm64_get_clock_mode
+
+static __always_inline
+int __arm64_use_vsyscall(struct vdso_data *vdata)
+{
+ return !vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm64_use_vsyscall
+
+static __always_inline
+void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
+{
+ vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vsyscall __arm64_update_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 1a772b162191..a1e72886b30c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -63,5 +63,7 @@
#define HWCAP2_SVEBITPERM (1 << 4)
#define HWCAP2_SVESHA3 (1 << 5)
#define HWCAP2_SVESM4 (1 << 6)
+#define HWCAP2_FLAGM2 (1 << 7)
+#define HWCAP2_FRINT (1 << 8)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index e932284993d4..7ed9294e2004 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -62,6 +62,9 @@
#define PSR_x 0x0000ff00 /* Extension */
#define PSR_c 0x000000ff /* Control */
+/* syscall emulation path in ptrace */
+#define PTRACE_SYSEMU 31
+#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9e7dcb2c31c7..478491f07b4f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
- sigreturn32.o sys_compat.o
+ sys_compat.o
+ifneq ($(CONFIG_COMPAT_VDSO), y)
+obj-$(CONFIG_COMPAT) += sigreturn32.o
+endif
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
obj-$(CONFIG_MODULES) += module.o
@@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-y += vdso/ probes/
+obj-$(CONFIG_COMPAT_VDSO) += vdso32/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 2804330c95dc..3a58e9db5cfe 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void)
*/
if (table->revision < 5 ||
(table->revision == 5 && fadt->minor_revision < 1)) {
- pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+ pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n",
table->revision, fadt->minor_revision);
- ret = -EINVAL;
- goto out;
+
+ if (!fadt->arm_boot_flags) {
+ ret = -EINVAL;
+ goto out;
+ }
+ pr_err("FADT has ARM boot flags set, assuming 5.1\n");
}
if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 02f08768c298..214685760e1c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,9 +18,9 @@
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
+#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>
@@ -66,6 +66,11 @@ int main(void)
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
+#ifdef CONFIG_COMPAT
+ DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
+ DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
+ BLANK();
+#endif
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
@@ -80,33 +85,6 @@ int main(void)
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index 880d79904d36..7fa6828bb488 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -17,6 +17,15 @@
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+int cache_line_size(void)
+{
+ if (coherency_max_size != 0)
+ return coherency_max_size;
+
+ return cache_line_size_of_cpu();
+}
+EXPORT_SYMBOL_GPL(cache_line_size);
+
static inline enum cache_type get_cache_type(int level)
{
u64 clidr;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aabdabf52fdb..f29f36a65175 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = {
static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
{
static bool undef_hook_registered = false;
- static DEFINE_SPINLOCK(hook_lock);
+ static DEFINE_RAW_SPINLOCK(hook_lock);
- spin_lock(&hook_lock);
+ raw_spin_lock(&hook_lock);
if (!undef_hook_registered) {
register_undef_hook(&ssbs_emulation_hook);
undef_hook_registered = true;
}
- spin_unlock(&hook_lock);
+ raw_spin_unlock(&hook_lock);
if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
@@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
@@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 0593665fc7b4..876055e37352 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -82,6 +82,8 @@ static const char *const hwcap_str[] = {
"svebitperm",
"svesha3",
"svesm4",
+ "flagm2",
+ "frint",
NULL
};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2df8d0a1d980..9cdc4592da3e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,6 +247,7 @@ alternative_else_nop_endif
/*
* Registers that may be useful after this macro is invoked:
*
+ * x20 - ICC_PMR_EL1
* x21 - aborted SP
* x22 - aborted PC
* x23 - aborted PSTATE
@@ -424,6 +425,38 @@ tsk .req x28 // current thread_info
irq_stack_exit
.endm
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ /*
+ * Set res to 0 if irqs were unmasked in interrupted context.
+ * Otherwise set res to non-0 value.
+ */
+ .macro test_irqs_unmasked res:req, pmr:req
+alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ sub \res, \pmr, #GIC_PRIO_IRQON
+alternative_else
+ mov \res, xzr
+alternative_endif
+ .endm
+#endif
+
+ .macro gic_prio_kentry_setup, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
+ msr_s SYS_ICC_PMR_EL1, \tmp
+ alternative_else_nop_endif
+#endif
+ .endm
+
+ .macro gic_prio_irq_setup, pmr:req, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, \tmp
+ alternative_else_nop_endif
+#endif
+ .endm
+
.text
/*
@@ -602,6 +635,7 @@ el1_dbg:
cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
cinc x24, x24, eq // set bit '0'
tbz x24, #0, el1_inv // EL1 only
+ gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
@@ -619,20 +653,18 @@ ENDPROC(el1_sync)
.align 6
el1_irq:
kernel_entry 1
+ gic_prio_irq_setup pmr=x20, tmp=x1
enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- ldr x20, [sp, #S_PMR_SAVE]
-alternative_else
- mov x20, #GIC_PRIO_IRQON
-alternative_endif
- cmp x20, #GIC_PRIO_IRQOFF
- /* Irqs were disabled, don't trace */
- b.ls 1f
+ test_irqs_unmasked res=x0, pmr=x20
+ cbz x0, 1f
+ bl asm_nmi_enter
+1:
#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
-1:
#endif
irq_handler
@@ -651,14 +683,23 @@ alternative_else_nop_endif
bl preempt_schedule_irq // irq en/disable is done inside
1:
#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
/*
- * if IRQs were disabled when we received the interrupt, we have an NMI
- * and we are not re-enabling interrupt upon eret. Skip tracing.
+ * When using IRQ priority masking, we can get spurious interrupts while
+ * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
+ * section with interrupts disabled. Skip tracing in those cases.
*/
- cmp x20, #GIC_PRIO_IRQOFF
- b.ls 1f
+ test_irqs_unmasked res=x0, pmr=x20
+ cbz x0, 1f
+ bl asm_nmi_exit
+1:
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ test_irqs_unmasked res=x0, pmr=x20
+ cbnz x0, 1f
#endif
bl trace_hardirqs_on
1:
@@ -776,6 +817,7 @@ el0_ia:
* Instruction abort handling
*/
mrs x26, far_el1
+ gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
@@ -821,6 +863,7 @@ el0_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
+ gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
@@ -855,11 +898,12 @@ el0_dbg:
* Debug exception handling
*/
tbnz x24, #0, el0_inv // EL0 only
+ gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x1, x25
mov x2, sp
bl do_debug_exception
- enable_daif
+ enable_da_f
ct_user_exit
b ret_to_user
el0_inv:
@@ -876,7 +920,9 @@ ENDPROC(el0_sync)
el0_irq:
kernel_entry 0
el0_irq_naked:
+ gic_prio_irq_setup pmr=x20, tmp=x0
enable_da_f
+
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
@@ -898,6 +944,7 @@ ENDPROC(el0_irq)
el1_error:
kernel_entry 1
mrs x1, esr_el1
+ gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
@@ -908,10 +955,11 @@ el0_error:
kernel_entry 0
el0_error_naked:
mrs x1, esr_el1
+ gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
- enable_daif
+ enable_da_f
ct_user_exit
b ret_to_user
ENDPROC(el0_error)
@@ -932,6 +980,7 @@ work_pending:
*/
ret_to_user:
disable_daif
+ gic_prio_kentry_setup tmp=x3
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
@@ -948,6 +997,7 @@ ENDPROC(ret_to_user)
*/
.align 6
el0_svc:
+ gic_prio_kentry_setup tmp=x1
mov x0, sp
bl el0_svc_handler
b ret_to_user
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 0cfcf5c237c5..eec4776ae5f0 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -82,7 +82,8 @@
* To prevent this from racing with the manipulation of the task's FPSIMD state
* from task context and thereby corrupting the state, it is necessary to
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with local_bh_disable() unless softirqs are already masked.
+ * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
+ * run but prevent them to use FPSIMD.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
+DEFINE_PER_CPU(bool, fpsimd_context_busy);
+EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
+
+static void __get_cpu_fpsimd_context(void)
+{
+ bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
+
+ WARN_ON(busy);
+}
+
+/*
+ * Claim ownership of the CPU FPSIMD context for use by the calling context.
+ *
+ * The caller may freely manipulate the FPSIMD context metadata until
+ * put_cpu_fpsimd_context() is called.
+ *
+ * The double-underscore version must only be called if you know the task
+ * can't be preempted.
+ */
+static void get_cpu_fpsimd_context(void)
+{
+ preempt_disable();
+ __get_cpu_fpsimd_context();
+}
+
+static void __put_cpu_fpsimd_context(void)
+{
+ bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
+
+ WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
+}
+
+/*
+ * Release the CPU FPSIMD context.
+ *
+ * Must be called from a context in which get_cpu_fpsimd_context() was
+ * previously called, with no call to put_cpu_fpsimd_context() in the
+ * meantime.
+ */
+static void put_cpu_fpsimd_context(void)
+{
+ __put_cpu_fpsimd_context();
+ preempt_enable();
+}
+
+static bool have_cpu_fpsimd_context(void)
+{
+ return !preemptible() && __this_cpu_read(fpsimd_context_busy);
+}
+
/*
* Call __sve_free() directly only if you know task can't be scheduled
* or preempted.
@@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task)
* This function should be called only when the FPSIMD/SVE state in
* thread_struct is known to be up to date, when preparing to enter
* userspace.
- *
- * Softirqs (and preemption) must be disabled.
*/
static void task_fpsimd_load(void)
{
- WARN_ON(!in_softirq() && !irqs_disabled());
+ WARN_ON(!have_cpu_fpsimd_context());
if (system_supports_sve() && test_thread_flag(TIF_SVE))
sve_load_state(sve_pffr(&current->thread),
@@ -233,16 +282,14 @@ static void task_fpsimd_load(void)
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
* date with respect to the CPU registers.
- *
- * Softirqs (and preemption) must be disabled.
*/
-void fpsimd_save(void)
+static void fpsimd_save(void)
{
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
- WARN_ON(!in_softirq() && !irqs_disabled());
+ WARN_ON(!have_cpu_fpsimd_context());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
@@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
* task->thread.sve_state.
*
* Task can be a non-runnable task, or current. In the latter case,
- * softirqs (and preemption) must be disabled.
+ * the caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.uw.fpsimd_state must be up to date before calling this
@@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task)
* task->thread.uw.fpsimd_state.
*
* Task can be a non-runnable task, or current. In the latter case,
- * softirqs (and preemption) must be disabled.
+ * the caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
@@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task,
* non-SVE thread.
*/
if (task == current) {
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
}
@@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task,
sve_to_fpsimd(task);
if (task == current)
- local_bh_enable();
+ put_cpu_fpsimd_context();
/*
* Force reallocation of task SVE state to the correct size
@@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
sve_alloc(current);
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
@@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
@@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
+ __get_cpu_fpsimd_context();
+
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
@@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next)
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
wrong_task || wrong_cpu);
+
+ __put_cpu_fpsimd_context();
}
void fpsimd_flush_thread(void)
@@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void)
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
@@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void)
current->thread.sve_vl_onexec = 0;
}
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
@@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void)
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
@@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void)
/*
* Associate current's FPSIMD context with this cpu
- * Preemption must be disabled when calling this function.
+ * The caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
*/
void fpsimd_bind_task_to_cpu(void)
{
@@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void)
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
fpsimd_bind_task_to_cpu();
}
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
@@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
@@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
clear_thread_flag(TIF_FOREIGN_FPSTATE);
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
@@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t)
/*
* Invalidate any task's FPSIMD state that is present on this cpu.
- * This function must be called with softirqs disabled.
+ * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
+ * before calling this function.
*/
-void fpsimd_flush_cpu_state(void)
+static void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
-#ifdef CONFIG_KERNEL_MODE_NEON
+/*
+ * Save the FPSIMD state to memory and invalidate cpu view.
+ * This function must be called with preemption disabled.
+ */
+void fpsimd_save_and_flush_cpu_state(void)
+{
+ WARN_ON(preemptible());
+ __get_cpu_fpsimd_context();
+ fpsimd_save();
+ fpsimd_flush_cpu_state();
+ __put_cpu_fpsimd_context();
+}
-DEFINE_PER_CPU(bool, kernel_neon_busy);
-EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Kernel-side NEON support functions
@@ -1170,19 +1235,13 @@ void kernel_neon_begin(void)
BUG_ON(!may_use_simd());
- local_bh_disable();
-
- __this_cpu_write(kernel_neon_busy, true);
+ get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
-
- preempt_disable();
-
- local_bh_enable();
}
EXPORT_SYMBOL(kernel_neon_begin);
@@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin);
*/
void kernel_neon_end(void)
{
- bool busy;
-
if (!system_supports_fpsimd())
return;
- busy = __this_cpu_xchg(kernel_neon_busy, false);
- WARN_ON(!busy); /* No matching kernel_neon_begin()? */
-
- preempt_enable();
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL(kernel_neon_end);
@@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
- fpsimd_save();
- fpsimd_flush_cpu_state();
+ fpsimd_save_and_flush_cpu_state();
break;
case CPU_PM_EXIT:
break;
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 04ca08086d35..2b85c0d6fa3d 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -67,7 +67,11 @@
#ifdef CONFIG_EFI
-__efistub_stext_offset = stext - _text;
+/*
+ * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol:
+ * https://github.com/ClangBuiltLinux/linux/issues/561
+ */
+__efistub_stext_offset = ABSOLUTE(stext - _text);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index c70034fbd4ce..04a327ccf84d 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -16,8 +16,10 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
+#include <linux/kprobes.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
unsigned long irq_err_count;
@@ -64,4 +66,28 @@ void __init init_IRQ(void)
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
+
+ if (system_uses_irq_prio_masking()) {
+ /*
+ * Now that we have a stack for our IRQ handler, set
+ * the PMR/PSR pair to a consistent state.
+ */
+ WARN_ON(read_sysreg(daif) & PSR_A_BIT);
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ }
+}
+
+/*
+ * Stubs to make nmi_enter/exit() code callable from ASM
+ */
+asmlinkage void notrace asm_nmi_enter(void)
+{
+ nmi_enter();
+}
+NOKPROBE_SYMBOL(asm_nmi_enter);
+
+asmlinkage void notrace asm_nmi_exit(void)
+{
+ nmi_exit();
}
+NOKPROBE_SYMBOL(asm_nmi_exit);
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e23a68a5808f..46e643e30708 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -21,6 +21,7 @@
void *module_alloc(unsigned long size)
{
+ u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
gfp_t gfp_mask = GFP_KERNEL;
void *p;
@@ -28,9 +29,12 @@ void *module_alloc(unsigned long size)
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
gfp_mask |= __GFP_NOWARN;
+ if (IS_ENABLED(CONFIG_KASAN))
+ /* don't exceed the static module region - see below */
+ module_alloc_end = MODULES_END;
+
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_base + MODULES_VSIZE,
- gfp_mask, PAGE_KERNEL_EXEC, 0,
+ module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
@@ -46,7 +50,7 @@ void *module_alloc(unsigned long size)
*/
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
module_alloc_base + SZ_2G, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 88ce502c8e6f..bd5dfffca272 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -122,8 +122,10 @@ void *alloc_insn_page(void)
void *page;
page = vmalloc_exec(PAGE_SIZE);
- if (page)
+ if (page) {
set_memory_ro((unsigned long)page, 1);
+ set_vm_flush_reset_perms(page);
+ }
return page;
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 9856395ccdb7..6a869d9f304f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void)
* be raised.
*/
pmr = gic_read_pmr();
- gic_write_pmr(GIC_PRIO_IRQON);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
__cpu_do_idle();
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index da2441d7b066..3cf3b135027e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs,
int syscall_trace_enter(struct pt_regs *regs)
{
- if (test_thread_flag(TIF_SYSCALL_TRACE))
+ if (test_thread_flag(TIF_SYSCALL_TRACE) ||
+ test_thread_flag(TIF_SYSCALL_EMU)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+ if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU))
+ return -1;
+ }
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 331d1e5acad4..12a585386c2f 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -18,42 +18,7 @@
#include <asm/traps.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
-
-struct compat_sigcontext {
- /* We always set these two fields to 0 */
- compat_ulong_t trap_no;
- compat_ulong_t error_code;
-
- compat_ulong_t oldmask;
- compat_ulong_t arm_r0;
- compat_ulong_t arm_r1;
- compat_ulong_t arm_r2;
- compat_ulong_t arm_r3;
- compat_ulong_t arm_r4;
- compat_ulong_t arm_r5;
- compat_ulong_t arm_r6;
- compat_ulong_t arm_r7;
- compat_ulong_t arm_r8;
- compat_ulong_t arm_r9;
- compat_ulong_t arm_r10;
- compat_ulong_t arm_fp;
- compat_ulong_t arm_ip;
- compat_ulong_t arm_sp;
- compat_ulong_t arm_lr;
- compat_ulong_t arm_pc;
- compat_ulong_t arm_cpsr;
- compat_ulong_t fault_address;
-};
-
-struct compat_ucontext {
- compat_ulong_t uc_flags;
- compat_uptr_t uc_link;
- compat_stack_t uc_stack;
- struct compat_sigcontext uc_mcontext;
- compat_sigset_t uc_sigmask;
- int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
- compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
-};
+#include <asm/vdso.h>
struct compat_vfp_sigframe {
compat_ulong_t magic;
@@ -81,16 +46,6 @@ struct compat_aux_sigframe {
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-struct compat_sigframe {
- struct compat_ucontext uc;
- compat_ulong_t retcode[2];
-};
-
-struct compat_rt_sigframe {
- struct compat_siginfo info;
- struct compat_sigframe sig;
-};
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
@@ -387,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
+#ifdef CONFIG_COMPAT_VDSO
+ void *vdso_base = current->mm->context.vdso;
+ void *vdso_trampoline;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_arm);
+ }
+ } else {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_arm);
+ }
+ }
+
+ retcode = ptr_to_compat(vdso_trampoline) + thumb;
+#else
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
@@ -394,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = (unsigned long)current->mm->context.vdso +
(idx << 2) + thumb;
+#endif
}
regs->regs[0] = usig;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 3e53ffa07994..f5b04dd8a710 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -27,7 +27,7 @@
* aff0 = mpidr_masked & 0xff;
* aff1 = mpidr_masked & 0xff00;
* aff2 = mpidr_masked & 0xff0000;
- * aff2 = mpidr_masked & 0xff00000000;
+ * aff3 = mpidr_masked & 0xff00000000;
* dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
*}
* Input registers: rs0, rs1, rs2, rs3, mpidr, mask
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6dcf9607d770..9286ee6749e8 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -181,11 +181,7 @@ static void init_gic_priority_masking(void)
WARN_ON(!(cpuflags & PSR_I_BIT));
- gic_write_pmr(GIC_PRIO_IRQOFF);
-
- /* We can only unmask PSR.I if we can take aborts */
- if (!(cpuflags & PSR_A_BIT))
- write_sysreg(cpuflags & ~PSR_I_BIT, daif);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
}
/*
@@ -834,18 +830,23 @@ void arch_irq_work_raise(void)
}
#endif
-/*
- * ipi_cpu_stop - handle IPI from smp_send_stop()
- */
-static void ipi_cpu_stop(unsigned int cpu)
+static void local_cpu_stop(void)
{
- set_cpu_online(cpu, false);
+ set_cpu_online(smp_processor_id(), false);
local_daif_mask();
sdei_mask_local_cpu();
+ cpu_park_loop();
+}
- while (1)
- cpu_relax();
+/*
+ * We need to implement panic_smp_self_stop() for parallel panic() calls, so
+ * that cpu_online_mask gets correctly updated and smp_send_stop() can skip
+ * CPUs that have already stopped themselves.
+ */
+void panic_smp_self_stop(void)
+{
+ local_cpu_stop();
}
#ifdef CONFIG_KEXEC_CORE
@@ -898,7 +899,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
case IPI_CPU_STOP:
irq_enter();
- ipi_cpu_stop(cpu);
+ local_cpu_stop();
irq_exit();
break;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 985721a1264c..678af745d881 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where)
printk(" %pS\n", (void *)where);
}
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
+ if (user_mode(regs))
+ return;
+
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = get_user(val, &((u32 *)addr)[i]);
+ bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
break;
}
}
- printk("%sCode: %s\n", lvl, str);
-}
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
- if (!user_mode(regs)) {
- mm_segment_t fs = get_fs();
- set_fs(KERNEL_DS);
- __dump_instr(lvl, regs);
- set_fs(fs);
- } else {
- __dump_instr(lvl, regs);
- }
+ printk("%sCode: %s\n", lvl, str);
}
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
@@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs)
print_modules();
show_regs(regs);
- if (!user_mode(regs))
- dump_instr(KERN_EMERG, regs);
+ dump_kernel_instr(KERN_EMERG, regs);
return ret;
}
@@ -242,16 +233,16 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr,
{
arm64_show_signal(signo, str);
if (signo == SIGKILL)
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
else
- force_sig_fault(signo, code, addr, current);
+ force_sig_fault(signo, code, addr);
}
void arm64_force_sig_mceerr(int code, void __user *addr, short lsb,
const char *str)
{
arm64_show_signal(SIGBUS, str);
- force_sig_mceerr(code, addr, lsb, current);
+ force_sig_mceerr(code, addr, lsb);
}
void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 663b166241d0..354b11e27c07 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -20,41 +20,212 @@
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>
extern char vdso_start[], vdso_end[];
-static unsigned long vdso_pages __ro_after_init;
+#ifdef CONFIG_COMPAT_VDSO
+extern char vdso32_start[], vdso32_end[];
+#endif /* CONFIG_COMPAT_VDSO */
+
+/* vdso_lookup arch_index */
+enum arch_vdso_type {
+ ARM64_VDSO = 0,
+#ifdef CONFIG_COMPAT_VDSO
+ ARM64_VDSO32 = 1,
+#endif /* CONFIG_COMPAT_VDSO */
+};
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_TYPES (ARM64_VDSO32 + 1)
+#else
+#define VDSO_TYPES (ARM64_VDSO + 1)
+#endif /* CONFIG_COMPAT_VDSO */
+
+struct __vdso_abi {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
+ {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+ },
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "vdso32",
+ .vdso_code_start = vdso32_start,
+ .vdso_code_end = vdso32_end,
+ },
+#endif /* CONFIG_COMPAT_VDSO */
+};
/*
* The vDSO data page.
*/
static union {
- struct vdso_data data;
+ struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+static int __vdso_remap(enum arch_vdso_type arch_index,
+ const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start;
+
+ if (vdso_size != new_size)
+ return -EINVAL;
+
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
+static int __vdso_init(enum arch_vdso_type arch_index)
+{
+ int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
+
+ if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+ pr_err("vDSO is not a valid ELF object!\n");
+ return -EINVAL;
+ }
+
+ vdso_lookup[arch_index].vdso_pages = (
+ vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start) >>
+ PAGE_SHIFT;
+
+ /* Allocate the vDSO pagelist, plus a page for the data. */
+ vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ return -ENOMEM;
+
+ /* Grab the vDSO data page. */
+ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
+
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+
+ for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
+ vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+
+ vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
+ vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+
+ return 0;
+}
+
+static int __setup_additional_pages(enum arch_vdso_type arch_index,
+ struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp)
+{
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
+
+ vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
+ }
+
+ ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ VM_READ|VM_MAYREAD,
+ vdso_lookup[arch_index].dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ vdso_base += PAGE_SIZE;
+ mm->context.vdso = (void *)vdso_base;
+ ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_lookup[arch_index].cm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ return 0;
+
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
+}
#ifdef CONFIG_COMPAT
/*
* Create and map the vectors page for AArch32 tasks.
*/
+#ifdef CONFIG_COMPAT_VDSO
+static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+}
+#endif /* CONFIG_COMPAT_VDSO */
+
+/*
+ * aarch32_vdso_pages:
+ * 0 - kuser helpers
+ * 1 - sigreturn code
+ * or (CONFIG_COMPAT_VDSO):
+ * 0 - kuser helpers
+ * 1 - vdso data
+ * 2 - vdso code
+ */
#define C_VECTORS 0
+#ifdef CONFIG_COMPAT_VDSO
+#define C_VVAR 1
+#define C_VDSO 2
+#define C_PAGES (C_VDSO + 1)
+#else
#define C_SIGPAGE 1
#define C_PAGES (C_SIGPAGE + 1)
+#endif /* CONFIG_COMPAT_VDSO */
static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
+static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
{
.name = "[vectors]", /* ABI */
.pages = &aarch32_vdso_pages[C_VECTORS],
},
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "[vvar]",
+ },
+ {
+ .name = "[vdso]",
+ .mremap = aarch32_vdso_mremap,
+ },
+#else
{
.name = "[sigpage]", /* ABI */
.pages = &aarch32_vdso_pages[C_SIGPAGE],
},
+#endif /* CONFIG_COMPAT_VDSO */
};
static int aarch32_alloc_kuser_vdso_page(void)
@@ -77,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void)
return 0;
}
-static int __init aarch32_alloc_vdso_pages(void)
+#ifdef CONFIG_COMPAT_VDSO
+static int __aarch32_alloc_vdso_pages(void)
+{
+ int ret;
+
+ vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
+ vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
+
+ ret = __vdso_init(ARM64_VDSO32);
+ if (ret)
+ return ret;
+
+ ret = aarch32_alloc_kuser_vdso_page();
+ if (ret) {
+ unsigned long c_vvar =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]);
+ unsigned long c_vdso =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]);
+
+ free_page(c_vvar);
+ free_page(c_vdso);
+ }
+
+ return ret;
+}
+#else
+static int __aarch32_alloc_vdso_pages(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
@@ -98,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void)
return ret;
}
+#endif /* CONFIG_COMPAT_VDSO */
+
+static int __init aarch32_alloc_vdso_pages(void)
+{
+ return __aarch32_alloc_vdso_pages();
+}
arch_initcall(aarch32_alloc_vdso_pages);
static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
@@ -119,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
return PTR_ERR_OR_ZERO(ret);
}
+#ifndef CONFIG_COMPAT_VDSO
static int aarch32_sigreturn_setup(struct mm_struct *mm)
{
unsigned long addr;
@@ -146,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm)
out:
return PTR_ERR_OR_ZERO(ret);
}
+#endif /* !CONFIG_COMPAT_VDSO */
int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
@@ -159,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (ret)
goto out;
+#ifdef CONFIG_COMPAT_VDSO
+ ret = __setup_additional_pages(ARM64_VDSO32,
+ mm,
+ bprm,
+ uses_interp);
+#else
ret = aarch32_sigreturn_setup(mm);
+#endif /* CONFIG_COMPAT_VDSO */
out:
up_write(&mm->mmap_sem);
@@ -170,18 +382,18 @@ out:
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
- unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
- unsigned long vdso_size = vdso_end - vdso_start;
-
- if (vdso_size != new_size)
- return -EINVAL;
-
- current->mm->context.vdso = (void *)new_vma->vm_start;
-
- return 0;
+ return __vdso_remap(ARM64_VDSO, sm, new_vma);
}
-static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
+/*
+ * aarch64_vdso_pages:
+ * 0 - vvar
+ * 1 - vdso
+ */
+#define A_VVAR 0
+#define A_VDSO 1
+#define A_PAGES (A_VDSO + 1)
+static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
{
.name = "[vvar]",
},
@@ -193,37 +405,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
static int __init vdso_init(void)
{
- int i;
- struct page **vdso_pagelist;
- unsigned long pfn;
-
- if (memcmp(vdso_start, "\177ELF", 4)) {
- pr_err("vDSO is not a valid ELF object!\n");
- return -EINVAL;
- }
-
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- if (vdso_pagelist == NULL)
- return -ENOMEM;
-
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
- /* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_start);
-
- for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
+ vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
- vdso_spec[0].pages = &vdso_pagelist[0];
- vdso_spec[1].pages = &vdso_pagelist[1];
-
- return 0;
+ return __vdso_init(ARM64_VDSO);
}
arch_initcall(vdso_init);
@@ -231,84 +416,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
- void *ret;
-
- vdso_text_len = vdso_pages << PAGE_SHIFT;
- /* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ int ret;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
- vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- ret = ERR_PTR(vdso_base);
- goto up_fail;
- }
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
- &vdso_spec[0]);
- if (IS_ERR(ret))
- goto up_fail;
-
- vdso_base += PAGE_SIZE;
- mm->context.vdso = (void *)vdso_base;
- ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_spec[1]);
- if (IS_ERR(ret))
- goto up_fail;
+ ret = __setup_additional_pages(ARM64_VDSO,
+ mm,
+ bprm,
+ uses_interp);
up_write(&mm->mmap_sem);
- return 0;
-
-up_fail:
- mm->context.vdso = NULL;
- up_write(&mm->mmap_sem);
- return PTR_ERR(ret);
-}
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- ++vdso_data->tb_seq_count;
- smp_wmb();
-
- vdso_data->use_syscall = use_syscall;
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift;
- vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-
- /* Read without the seqlock held by clock_getres() */
- WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
-
- if (!use_syscall) {
- /* tkr_mono.cycle_last == tkr_raw.cycle_last */
- vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_sec;
- vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mono_mult = tk->tkr_mono.mult;
- vdso_data->cs_raw_mult = tk->tkr_raw.mult;
- /* tkr_mono.shift == tkr_raw.shift */
- vdso_data->cs_shift = tk->tkr_mono.shift;
- }
-
- smp_wmb();
- ++vdso_data->tb_seq_count;
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ return ret;
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index fa230ff09aa1..4ab863045188 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -6,7 +6,12 @@
# Heavily based on the vDSO Makefiles for other archs.
#
-obj-vdso := gettimeofday.o note.o sigreturn.o
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso := vgettimeofday.o note.o sigreturn.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -15,6 +20,31 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
--build-id -n -T
+ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Bsymbolic
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+KCOV_INSTRUMENT := n
+
+ifeq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+else
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+endif
+
+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+ ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+ CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+ endif
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
@@ -28,6 +58,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,ld)
+ $(call if_changed,vdso_check)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,13 +73,9 @@ quiet_cmd_vdsosym = VDSOSYM $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
-quiet_cmd_vdsoas = VDSOA $@
- cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOCC $@
+ cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index 80f780f56e0d..e69de29bb2d1 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -1,323 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- /*
- * Fake address dependency from the value computed from the counter
- * register to subsequent data page accesses so that the sequence
- * locking also orders the read of the counter.
- */
- and x_tmp, \res, xzr
- add vdso_data, vdso_data, x_tmp
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=1b
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=realtime
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic_raw
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- adr vdso_data, _vdso_data
- ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 5f
-2:
- cbz x1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..747635501a14
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __kernel_clock_getres(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
new file mode 100644
index 000000000000..4fea950fa5ed
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+vdso.lds
+vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..288c14d30b45
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for vdso32
+#
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
+include $(srctree)/lib/vdso/Makefile
+
+COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+
+# Same as cc-*option, but using COMPATCC instead of CC
+cc32-option = $(call try-run,\
+ $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-disable-warning = $(call try-run,\
+ $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+cc32-ldoption = $(call try-run,\
+ $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# We cannot use the global flags to compile the vDSO files, the main reason
+# being that the 32-bit compiler may be older than the main (64-bit) compiler
+# and therefore may not understand flags set using $(cc-option ...). Besides,
+# arch-specific options should be taken from the arm Makefile instead of the
+# arm64 one.
+# As a result we set our own flags here.
+
+# From top-level Makefile
+# NOSTDINC_FLAGS
+VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+VDSO_CPPFLAGS += $(LINUXINCLUDE)
+VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
+
+# Common C and assembly flags
+# From top-level Makefile
+VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
+ifdef CONFIG_DEBUG_INFO
+VDSO_CAFLAGS += -g
+endif
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
+VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
+# From arm Makefile
+VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
+VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+VDSO_CAFLAGS += -mbig-endian
+else
+VDSO_CAFLAGS += -mlittle-endian
+endif
+
+# From arm vDSO Makefile
+VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
+VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
+# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
+# fall back to v7. There is no easy way to check for what architecture the code
+# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
+VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
+ -march=armv7-a -D__LINUX_ARM_ARCH__=7)
+
+VDSO_CFLAGS := $(VDSO_CAFLAGS)
+VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
+# KBUILD_CFLAGS from top-level Makefile
+VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -fno-common \
+ -Werror-implicit-function-declaration \
+ -Wno-format-security \
+ -std=gnu89
+VDSO_CFLAGS += -O2
+# Some useful compiler-dependent flags from top-level Makefile
+VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
+VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
+VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
+VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
+
+# The 32-bit compiler does not provide 128-bit integers, which are used in
+# some headers that are indirectly included from the vDSO code.
+# This hack makes the compiler happy and should trigger a warning/error if
+# variables of such type are referenced.
+VDSO_CFLAGS += -D__uint128_t='void*'
+# Silence some warnings coming from headers that operate on long's
+# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
+VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
+VDSO_CFLAGS += -Wno-int-to-pointer-cast
+
+VDSO_AFLAGS := $(VDSO_CAFLAGS)
+VDSO_AFLAGS += -D__ASSEMBLY__
+
+VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
+# From arm vDSO Makefile
+VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+
+
+# Borrow vdsomunge.c from the arm vDSO
+# We have to use a relative path because scripts/Makefile.host prefixes
+# $(hostprogs-y) with $(obj)
+munge := ../../../arm/vdso/vdsomunge
+hostprogs-y := $(munge)
+
+c-obj-vdso := note.o
+c-obj-vdso-gettimeofday := vgettimeofday.o
+asm-obj-vdso := sigreturn.o
+
+ifneq ($(c-gettimeofday-y),)
+VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
+endif
+
+VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+
+# Build rules
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
+c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
+asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
+obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (vdso.s includes vdso.so through incbin)
+$(obj)/vdso.o: $(obj)/vdso.so
+
+include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+# Strip rule for vdso.so
+$(obj)/vdso.so: OBJCOPYFLAGS := -S
+$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+ $(call if_changed,vdsomunge)
+
+# Link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold)
+ $(call if_changed,vdso_check)
+
+# Compilation rules for the vDSO sources
+$(c-obj-vdso): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+$(c-obj-vdso-gettimeofday): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc_gettimeofday)
+$(asm-obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSOL $@
+ cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
+ cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+quiet_cmd_vdsoas = VDSOA $@
+ cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+
+quiet_cmd_vdsomunge = MUNGE $@
+ cmd_vdsomunge = $(obj)/$(munge) $< $@
+
+# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
+gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+# The AArch64 nm should be able to read an AArch32 binary
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+# Install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c
new file mode 100644
index 000000000000..eff5bf9efb8b
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/note.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012-2018 ARM Limited
+ *
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+#include <linux/build-salt.h>
+
+ELFNOTE32("Linux", 0, LINUX_VERSION_CODE);
+BUILD_SALT;
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
new file mode 100644
index 000000000000..1a81277c2d09
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/sigreturn.S
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file provides both A32 and T32 versions, in accordance with the
+ * arm sigreturn code.
+ *
+ * Copyright (C) 2018 ARM Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+#define ARM_ENTRY(name) \
+ ENTRY(name)
+
+#define ARM_ENDPROC(name) \
+ .type name, %function; \
+ END(name)
+
+ .text
+
+ .arm
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_arm)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_arm)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_arm)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_arm)
+
+ .thumb
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_thumb)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_thumb)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_thumb)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S
new file mode 100644
index 000000000000..e72ac7bc4c04
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+ .globl vdso32_start, vdso32_end
+ .section .rodata
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/arm64/kernel/vdso32/vdso.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
new file mode 100644
index 000000000000..a3944927eaeb
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ * Heavily based on the vDSO linker scripts for other archs.
+ *
+ * Copyright (C) 2012-2018 ARM Limited
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+ PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+
+ .text : { *(.text*) } :text =0xe7f001f2
+
+ .got : { *(.got) }
+ .rel.plt : { *(.rel.plt) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+}
+
+VERSION
+{
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_clock_getres;
+ __kernel_sigreturn_arm;
+ __kernel_sigreturn_thumb;
+ __kernel_rt_sigreturn_arm;
+ __kernel_rt_sigreturn_thumb;
+ __vdso_clock_gettime64;
+ local: *;
+ };
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
+VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
+VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
+VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
new file mode 100644
index 000000000000..54fc1c2ce93f
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 compat userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock,
+ struct old_timespec32 *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+ struct old_timespec32 *res)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)res >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_getres_time32(clock_id, res);
+}
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 6e3c9c8b2df9..525010504f9d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
- /* Clean guest FP state to memory and invalidate cpu view */
- fpsimd_save();
- fpsimd_flush_cpu_state();
+ fpsimd_save_and_flush_cpu_state();
if (guest_has_sve)
*guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index c2afa7982047..dfd626447482 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -208,7 +208,7 @@ out:
#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
-#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq))
+#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b0041812bca9..58f281b6ca4a 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
* Naturally, we want to avoid this.
*/
if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
dsb(sy);
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 5992eb9a9a08..1d17dbeafe76 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -80,10 +80,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
static int __init arm64_dma_init(void)
{
- WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
- TAINT_CPU_OUT_OF_SPEC,
- "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
- ARCH_DMA_MINALIGN, cache_line_size());
return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
@@ -461,6 +457,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
+ int cls = cache_line_size_of_cpu();
+
+ WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
+ TAINT_CPU_OUT_OF_SPEC,
+ "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+ dev_driver_string(dev), dev_name(dev),
+ ARCH_DMA_MINALIGN, cls);
+
dev->dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2d115016feb4..c8c61b1eb479 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags,
- struct task_struct *tsk)
+ unsigned int mm_flags, unsigned long vm_flags)
{
- struct vm_area_struct *vma;
- vm_fault_t fault;
+ struct vm_area_struct *vma = find_vma(mm, addr);
- vma = find_vma(mm, addr);
- fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > addr))
- goto check_stack;
+ return VM_FAULT_BADMAP;
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
*/
-good_area:
+ if (unlikely(vma->vm_start > addr)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return VM_FAULT_BADMAP;
+ if (expand_stack(vma, addr))
+ return VM_FAULT_BADMAP;
+ }
+
/*
* Check that the permissions on the VMA allow for the fault which
* occurred.
*/
- if (!(vma->vm_flags & vm_flags)) {
- fault = VM_FAULT_BADACCESS;
- goto out;
- }
-
+ if (!(vma->vm_flags & vm_flags))
+ return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
-
-check_stack:
- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
- goto good_area;
-out:
- return fault;
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
+/*
+ * Note: not valid for EL1 DC IVAC, but we never use that such that it
+ * should fault. EL0 cannot issue DC IVAC (undef).
+ */
+static bool is_write_abort(unsigned int esr)
+{
+ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
- struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
vm_fault_t fault, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (notify_page_fault(regs, esr))
return 0;
- tsk = current;
- mm = tsk->mm;
-
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
@@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
- } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+ mm_flags |= FAULT_FLAG_INSTRUCTION;
+ } else if (is_write_abort(esr)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
@@ -492,12 +489,14 @@ retry:
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) && !search_exception_tables(regs->pc))
+ if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
+ up_read(&mm->mmap_sem);
goto no_context;
+ }
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
major |= fault & VM_FAULT_MAJOR;
if (fault & VM_FAULT_RETRY) {
@@ -537,11 +536,11 @@ retry:
* that point.
*/
if (major) {
- tsk->maj_flt++;
+ current->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
} else {
- tsk->min_flt++;
+ current->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f475e54fbc43..bbeb6a5a6ba6 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (sz == PUD_SIZE) {
ptep = (pte_t *)pudp;
- } else if (sz == (PAGE_SIZE * CONT_PTES)) {
+ } else if (sz == (CONT_PTE_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
@@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
ptep = huge_pmd_share(mm, addr, pudp);
else
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
- } else if (sz == (PMD_SIZE * CONT_PMDS)) {
+ } else if (sz == (CONT_PMD_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmdp;
@@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void)
#ifdef CONFIG_ARM64_4K_PAGES
add_huge_page_size(PUD_SIZE);
#endif
- add_huge_page_size(PMD_SIZE * CONT_PMDS);
+ add_huge_page_size(CONT_PMD_SIZE);
add_huge_page_size(PMD_SIZE);
- add_huge_page_size(PAGE_SIZE * CONT_PTES);
+ add_huge_page_size(CONT_PTE_SIZE);
return 0;
}
@@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt)
#ifdef CONFIG_ARM64_4K_PAGES
case PUD_SIZE:
#endif
- case PMD_SIZE * CONT_PMDS:
+ case CONT_PMD_SIZE:
case PMD_SIZE:
- case PAGE_SIZE * CONT_PTES:
+ case CONT_PTE_SIZE:
add_huge_page_size(ps);
return 1;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 749c9b269f08..f3c795278def 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+#ifdef CONFIG_ZONE_DMA32
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+#endif
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init_nodes(max_zone_pfns);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e5ae8663f230..3645f29bd814 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
return 0;
}
-#endif /* CONFIG_ARM64_64K_PAGES */
+#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
@@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void)
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
- pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
- pgprot_val(mk_sect_prot(prot)));
- pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
+ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
pud_val(new_pud)))
return 0;
- BUG_ON(phys & ~PUD_MASK);
+ VM_BUG_ON(phys & ~PUD_MASK);
set_pud(pudp, new_pud);
return 1;
}
int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
- pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
- pgprot_val(mk_sect_prot(prot)));
- pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
+ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
pmd_val(new_pmd)))
return 0;
- BUG_ON(phys & ~PMD_MASK);
+ VM_BUG_ON(phys & ~PMD_MASK);
set_pmd(pmdp, new_pmd);
return 1;
}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 47b057bfa803..fcdcf6cd7677 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -151,17 +151,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable)
__pgprot(PTE_VALID));
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ struct page_change_data data = {
+ .set_mask = __pgprot(0),
+ .clear_mask = __pgprot(PTE_VALID),
+ };
+
+ if (!rodata_full)
+ return 0;
+
+ return apply_to_page_range(&init_mm,
+ (unsigned long)page_address(page),
+ PAGE_SIZE, change_page_range, &data);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ struct page_change_data data = {
+ .set_mask = __pgprot(PTE_VALID | PTE_WRITE),
+ .clear_mask = __pgprot(PTE_RDONLY),
+ };
+
+ if (!rodata_full)
+ return 0;
+
+ return apply_to_page_range(&init_mm,
+ (unsigned long)page_address(page),
+ PAGE_SIZE, change_page_range, &data);
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
+ if (!debug_pagealloc_enabled() && !rodata_full)
+ return;
+
set_memory_valid((unsigned long)page_address(page), numpages, enable);
}
-#ifdef CONFIG_HIBERNATION
+
/*
- * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
- * is used to determine if a linear map page has been marked as not-valid by
- * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
- * This is based on kern_addr_valid(), which almost does what we need.
+ * This function is used to determine if a linear map page has been marked as
+ * not-valid. Walk the page table and check the PTE_VALID bit. This is based
+ * on kern_addr_valid(), which almost does what we need.
*
* Because this is only called on the kernel linear map, p?d_sect() implies
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
@@ -175,6 +206,9 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
+ if (!debug_pagealloc_enabled() && !rodata_full)
+ return true;
+
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
@@ -196,5 +230,3 @@ bool kernel_page_present(struct page *page)
ptep = pte_offset_kernel(pmdp, addr);
return pte_valid(READ_ONCE(*ptep));
}
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 87c568807925..f5b437f8a22b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index e72d9b6bc234..e456652facce 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -90,7 +90,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs)
return regs->a4;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index c4785c9b67a2..ec61034fdf56 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -250,7 +250,7 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
die_if_kernel(except_info->kernel_str, regs, addr);
force_sig_fault(except_info->signo, except_info->code,
- (void __user *)addr, current);
+ (void __user *)addr);
}
/*
diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c
index d789be36eb4f..27ef5b2c43ab 100644
--- a/arch/csky/abiv1/alignment.c
+++ b/arch/csky/abiv1/alignment.c
@@ -283,7 +283,7 @@ bad_area:
do_exit(SIGKILL);
}
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr);
}
static struct ctl_table alignment_tbl[4] = {
diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c
index e7e11344005a..86d187d4e5af 100644
--- a/arch/csky/abiv2/fpu.c
+++ b/arch/csky/abiv2/fpu.c
@@ -124,7 +124,7 @@ void fpu_fpe(struct pt_regs *regs)
code = FPE_FLTRES;
}
- force_sig_fault(sig, code, (void __user *)regs->pc, current);
+ force_sig_fault(sig, code, (void __user *)regs->pc);
}
#define FMFVR_FPU_REGS(vrx, vry) \
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index d47a3381aad8..9b1b7c039ddf 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -66,7 +66,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
- struct task_struct *task;
sigset_t set;
/* Always make any pending restarted system calls return -EINTR */
@@ -91,8 +90,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
return regs->a0;
badframe:
- task = current;
- force_sig(SIGSEGV, task);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c
index f487a9b996ae..2792e9601ac5 100644
--- a/arch/csky/kernel/traps.c
+++ b/arch/csky/kernel/traps.c
@@ -106,7 +106,7 @@ void buserr(struct pt_regs *regs)
pr_err("User mode Bus Error\n");
show_regs(regs);
- force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc, current);
+ force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc);
}
#define USR_BKPT 0x1464
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 18041f46ded1..f76618b630f9 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -179,7 +179,7 @@ bad_area:
bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -212,5 +212,5 @@ do_sigbus:
if (!user_mode(regs))
goto no_context;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c
index f5ff3b794c85..15db45a03b04 100644
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -250,7 +250,7 @@ asmlinkage void trace_trap(unsigned long bp)
{
if ((unsigned long)current->thread.breakinfo.addr == bp) {
user_disable_single_step(current);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
} else
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c
index c0af930052c0..ee21f37b7ed4 100644
--- a/arch/h8300/kernel/ptrace_s.c
+++ b/arch/h8300/kernel/ptrace_s.c
@@ -40,5 +40,5 @@ void user_enable_single_step(struct task_struct *child)
asmlinkage void trace_trap(unsigned long bp)
{
(void)bp;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index e0f2b708e5d9..ef7489b7c459 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -126,7 +126,7 @@ asmlinkage int sys_rt_sigreturn(void)
return er0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 5bc36db26475..d48864c48e5a 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -252,6 +252,6 @@ asmlinkage int sys_rt_sigreturn(void)
return regs->r00;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index a01da26dbfe1..69c623b14ddd 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -239,7 +239,7 @@ int die_if_kernel(char *str, struct pt_regs *regs, long err)
static void misaligned_instruction(struct pt_regs *regs)
{
die_if_kernel("Misaligned Instruction", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
/*
@@ -250,19 +250,19 @@ static void misaligned_instruction(struct pt_regs *regs)
static void misaligned_data_load(struct pt_regs *regs)
{
die_if_kernel("Misaligned Data Load", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static void misaligned_data_store(struct pt_regs *regs)
{
die_if_kernel("Misaligned Data Store", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static void illegal_instruction(struct pt_regs *regs)
{
die_if_kernel("Illegal Instruction", regs, 0);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/*
@@ -272,7 +272,7 @@ static void illegal_instruction(struct pt_regs *regs)
static void precise_bus_error(struct pt_regs *regs)
{
die_if_kernel("Precise Bus Error", regs, 0);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
/*
@@ -407,7 +407,7 @@ void do_trap0(struct pt_regs *regs)
* may want to use a different trap0 flavor.
*/
force_sig_fault(SIGTRAP, TRAP_BRKPT,
- (void __user *) pt_elr(regs), current);
+ (void __user *) pt_elr(regs));
} else {
#ifdef CONFIG_KGDB
kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index b7a99aa5b0ba..b3bc71680ae4 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -135,14 +135,14 @@ good_area:
si_signo = SIGSEGV;
si_code = SEGV_ACCERR;
}
- force_sig_fault(si_signo, si_code, (void __user *)address, current);
+ force_sig_fault(si_signo, si_code, (void __user *)address);
return;
bad_area:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
/* Kernel-mode fault falls through */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 206530d0751b..50440f3ddc43 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^)
#undef ATOMIC_OP
#define ATOMIC64_OP(op, c_op) \
-static __inline__ long \
-ia64_atomic64_##op (__s64 i, atomic64_t *v) \
+static __inline__ s64 \
+ia64_atomic64_##op (s64 i, atomic64_t *v) \
{ \
- __s64 old, new; \
+ s64 old, new; \
CMPXCHG_BUGCHECK_DECL \
\
do { \
@@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP(op, c_op) \
-static __inline__ long \
-ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \
+static __inline__ s64 \
+ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \
{ \
- __s64 old, new; \
+ s64 old, new; \
CMPXCHG_BUGCHECK_DECL \
\
do { \
@@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_add_return(i,v) \
({ \
- long __ia64_aar_i = (i); \
+ s64 __ia64_aar_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \
: ia64_atomic64_add(__ia64_aar_i, v); \
@@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_sub_return(i,v) \
({ \
- long __ia64_asr_i = (i); \
+ s64 __ia64_asr_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \
: ia64_atomic64_sub(__ia64_asr_i, v); \
@@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_fetch_add(i,v) \
({ \
- long __ia64_aar_i = (i); \
+ s64 __ia64_aar_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
: ia64_atomic64_fetch_add(__ia64_aar_i, v); \
@@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -)
#define atomic64_fetch_sub(i,v) \
({ \
- long __ia64_asr_i = (i); \
+ s64 __ia64_asr_i = (i); \
__ia64_atomic_const(i) \
? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
: ia64_atomic64_fetch_sub(__ia64_asr_i, v); \
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index c0239bf77a09..782c481d7052 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -197,21 +197,21 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
*/
printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
} else if (ia64_psr(regs)->tb) {
/*
* Branch Tracing is enabled.
* Force a taken branch signal.
*/
force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
} else if (ia64_psr(regs)->ss) {
/*
* Single Step is enabled.
* Force a trace signal.
*/
force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL,
- 0, 0, 0, current);
+ 0, 0, 0);
}
return rv;
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a52d761854b..79190d877fa7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->cpu = cpu;
p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
- cpumask_set_cpu(cpu, &p->cpus_allowed);
+ cpumask_set_cpu(cpu, &p->cpus_mask);
INIT_LIST_HEAD(&p->tasks);
p->parent = p->real_parent = p->group_leader = p;
INIT_LIST_HEAD(&p->children);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 58a6337c0690..7c52bd2695a2 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
}
/* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- goto cleanup_reserve;
- }
+ on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
/* officially change to the alternate interrupt handler */
pfm_alt_intr_handler = hdl;
@@ -6421,7 +6417,6 @@ int
pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
{
int i;
- int ret;
if (hdl == NULL) return -EINVAL;
@@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
pfm_alt_intr_handler = NULL;
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- }
+ on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
for_each_online_cpu(i) {
pfm_unreserve_session(NULL, 1, i);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 6062fd14e34e..e5044aed9452 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -152,7 +152,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
return retval;
give_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return retval;
}
@@ -257,7 +257,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
*/
check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
if (!likely(on_sig_stack(check_sp))) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
}
@@ -265,7 +265,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
if (!access_ok(frame, sizeof(*frame))) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
@@ -282,7 +282,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr)
err |= setup_sigcontext(&frame->sc, set, scr);
if (unlikely(err)) {
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return 1;
}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 85d8616ac4f6..e13cb905930f 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -176,7 +176,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
}
force_sig_fault(sig, code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- break_num, 0 /* clear __ISR_VALID */, 0, current);
+ break_num, 0 /* clear __ISR_VALID */, 0);
}
/*
@@ -353,7 +353,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
}
force_sig_fault(SIGFPE, si_code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
} else {
if (exception == -1) {
@@ -373,7 +373,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
}
force_sig_fault(SIGFPE, si_code,
(void __user *) (regs->cr_iip + ia64_psr(regs)->ri),
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
}
return 0;
@@ -408,7 +408,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
force_sig_fault(SIGILL, ILL_ILLOPC,
(void __user *) (regs.cr_iip + ia64_psr(&regs)->ri),
- 0, 0, 0, current);
+ 0, 0, 0);
return rv;
}
@@ -483,7 +483,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ ia64_psr(&regs)->ri);
}
force_sig_fault(sig, code, addr,
- vector, __ISR_VALID, isr, current);
+ vector, __ISR_VALID, isr);
return;
} else if (ia64_done_with_exception(&regs))
return;
@@ -493,7 +493,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 31: /* Unsupported Data Reference */
if (user_mode(&regs)) {
force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip,
- vector, __ISR_VALID, isr, current);
+ vector, __ISR_VALID, isr);
return;
}
sprintf(buf, "Unsupported data reference");
@@ -542,7 +542,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
== NOTIFY_STOP)
return;
force_sig_fault(SIGTRAP, si_code, (void __user *) ifa,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
return;
case 32: /* fp fault */
@@ -550,7 +550,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
}
return;
@@ -578,7 +578,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
if (user_mode(&regs)) {
force_sig_fault(SIGILL, ILL_BADIADDR,
(void __user *) iip,
- 0, 0, 0, current);
+ 0, 0, 0);
return;
}
sprintf(buf, "Unimplemented Instruction Address fault");
@@ -589,14 +589,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
iip, ifa, isr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
case 46:
printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
iip, ifa, isr, iim);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
case 47:
@@ -608,5 +608,5 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
break;
}
if (!die_if_kernel(buf, &regs, error))
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index a167a3824b35..eb7d5df59fa3 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1537,6 +1537,6 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
}
force_sigbus:
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa,
- 0, 0, 0, current);
+ 0, 0, 0);
goto done;
}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index edcdfc149311..16c6d377c502 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -121,8 +121,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
} else if (status != PAL_VISIBILITY_OK)
goto failed;
@@ -143,8 +143,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
if (status != PAL_STATUS_SUCCESS)
goto failed;
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
/*
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 5baeb022f474..3c3a283d3172 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -249,7 +249,7 @@ retry:
}
if (user_mode(regs)) {
force_sig_fault(signal, code, (void __user *) address,
- 0, __ISR_VALID, isr, current);
+ 0, __ISR_VALID, isr);
return;
}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 218e037ef901..00f5c98a5e05 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,10 +3,13 @@ config M68K
bool
default y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_NO_PREEMPT if !COLDFIRE
+ select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
select HAVE_IDE
select HAVE_AOUT if MMU
select HAVE_DEBUG_BUGVERBOSE
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index fea392cfcf1b..04e0f211afb3 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -71,9 +71,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -205,7 +202,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -231,7 +227,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -308,7 +303,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -436,6 +430,8 @@ CONFIG_FB_AMIGA_OCS=y
CONFIG_FB_AMIGA_ECS=y
CONFIG_FB_AMIGA_AGA=y
CONFIG_FB_FM2=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -553,13 +549,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -583,7 +580,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -626,6 +622,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 2474d267460e..c6abbb535878 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -67,9 +67,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -201,7 +198,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -227,7 +223,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -304,7 +299,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -397,6 +391,8 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_VGA16 is not set
@@ -513,13 +509,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -543,7 +540,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -586,6 +582,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0fc7d2992fe0..06ae65bad177 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -74,9 +74,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -208,7 +205,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -234,7 +230,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -311,7 +306,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -421,6 +415,8 @@ CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FB_ATARI=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -535,13 +531,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -565,7 +562,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -608,6 +604,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 699df9fdf866..5616b94053b6 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -64,9 +64,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -301,7 +296,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index b50802255324..1106521f3b56 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -66,9 +66,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -200,7 +197,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -226,7 +222,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -399,6 +393,8 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
@@ -515,13 +511,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -545,7 +542,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -588,6 +584,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 04e7d70f6030..226c6c063cd4 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -65,9 +65,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -305,7 +300,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -423,6 +417,8 @@ CONFIG_PTP_1588_CLOCK=m
CONFIG_FB=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -537,13 +533,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -567,7 +564,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -610,6 +606,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5e1cc4c17852..39f603417928 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -85,9 +85,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -219,7 +216,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -245,7 +241,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -325,7 +320,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -499,6 +493,8 @@ CONFIG_FB_FM2=y
CONFIG_FB_ATARI=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -619,13 +615,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -649,7 +646,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -692,6 +688,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 170ac8792c2d..175a607f576c 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -63,9 +63,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -197,7 +194,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -223,7 +219,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -300,7 +295,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -393,6 +387,8 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -505,13 +501,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -535,7 +532,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -578,6 +574,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index d865592a423e..f41c34d3cdd0 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -64,9 +64,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -301,7 +296,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
@@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 034a9de90484..c9d2cb0a1cf4 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -65,9 +65,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -302,7 +297,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -408,6 +402,8 @@ CONFIG_PPS_CLIENT_PARPORT=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
@@ -524,13 +520,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -554,7 +551,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -597,6 +593,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 49be0f9fcd8d..79a64fdd6bf0 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -61,9 +61,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -298,7 +293,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -394,6 +388,8 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -508,13 +504,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -538,7 +535,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -581,6 +577,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index a71acf4a6004..e3402a5d165b 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -61,9 +61,6 @@ CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
@@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
@@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -298,7 +293,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
@@ -393,6 +387,8 @@ CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
@@ -507,13 +503,14 @@ CONFIG_NLS_MAC_TURKISH=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
@@ -537,7 +534,6 @@ CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -580,6 +576,7 @@ CONFIG_ATOMIC64_SELFTEST=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index b4aa853051bd..30cd59caf037 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -18,57 +18,22 @@
#include <asm/pgalloc.h>
#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t flag, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- struct page *page, **map;
- pgprot_t pgprot;
- void *addr;
- int i, order;
-
- pr_debug("dma_alloc_coherent: %d,%x\n", size, flag);
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- page = alloc_pages(flag | __GFP_ZERO, order);
- if (!page)
- return NULL;
-
- *handle = page_to_phys(page);
- map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA);
- if (!map) {
- __free_pages(page, order);
- return NULL;
- }
- split_page(page, order);
-
- order = 1 << order;
- size >>= PAGE_SHIFT;
- map[0] = page;
- for (i = 1; i < size; i++)
- map[i] = page + i;
- for (; i < order; i++)
- __free_page(page + i);
- pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
- if (CPU_IS_040_OR_060)
- pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
- else
- pgprot_val(pgprot) |= _PAGE_NOCACHE030;
- addr = vmap(map, size, VM_MAP, pgprot);
- kfree(map);
-
- return addr;
+ cache_push(page_to_phys(page), size);
}
-void arch_dma_free(struct device *dev, size_t size, void *addr,
- dma_addr_t handle, unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
{
- pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
- vfree(addr);
+ if (CPU_IS_040_OR_060) {
+ pgprot_val(prot) &= ~_PAGE_CACHE040;
+ pgprot_val(prot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
+ } else {
+ pgprot_val(prot) |= _PAGE_NOCACHE030;
+ }
+ return prot;
}
-
#else
#include <asm/cacheflush.h>
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 87e7f3639839..05610e6924c1 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -803,7 +803,7 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
return regs->d0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -825,7 +825,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
return regs->d0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index b2fd000b9285..344f93d36a9a 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -431,7 +431,7 @@ static inline void bus_error030 (struct frame *fp)
pr_err("BAD KERNEL BUSERR\n");
die_if_kernel("Oops", &fp->ptregs,0);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return;
}
} else {
@@ -463,7 +463,7 @@ static inline void bus_error030 (struct frame *fp)
!(ssw & RW) ? "write" : "read", addr,
fp->ptregs.pc);
die_if_kernel ("Oops", &fp->ptregs, buserr_type);
- force_sig (SIGBUS, current);
+ force_sig (SIGBUS);
return;
}
@@ -493,7 +493,7 @@ static inline void bus_error030 (struct frame *fp)
do_page_fault (&fp->ptregs, addr, 0);
} else {
pr_debug("protection fault on insn access (segv).\n");
- force_sig (SIGSEGV, current);
+ force_sig (SIGSEGV);
}
}
#else
@@ -571,7 +571,7 @@ static inline void bus_error030 (struct frame *fp)
!(ssw & RW) ? "write" : "read", addr,
fp->ptregs.pc);
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
} else {
#if 0
@@ -598,7 +598,7 @@ static inline void bus_error030 (struct frame *fp)
#endif
pr_debug("Unknown SIGSEGV - 1\n");
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -621,7 +621,7 @@ static inline void bus_error030 (struct frame *fp)
buserr:
pr_err("BAD KERNEL BUSERR\n");
die_if_kernel("Oops",&fp->ptregs,0);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return;
}
@@ -660,7 +660,7 @@ static inline void bus_error030 (struct frame *fp)
addr, fp->ptregs.pc);
pr_debug("Unknown SIGSEGV - 2\n");
die_if_kernel("Oops",&fp->ptregs,mmusr);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -804,7 +804,7 @@ asmlinkage void buserr_c(struct frame *fp)
default:
die_if_kernel("bad frame format",&fp->ptregs,0);
pr_debug("Unknown SIGSEGV - 4\n");
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
}
@@ -1127,7 +1127,7 @@ asmlinkage void trap_c(struct frame *fp)
addr = (void __user*) fp->un.fmtb.daddr;
break;
}
- force_sig_fault(sig, si_code, addr, current);
+ force_sig_fault(sig, si_code, addr);
}
void die_if_kernel (char *str, struct pt_regs *fp, int nr)
@@ -1159,6 +1159,6 @@ asmlinkage void fpsp040_die(void)
#ifdef CONFIG_M68KFPU_EMU
asmlinkage void fpemu_signal(int signal, int code, void *addr)
{
- force_sig_fault(signal, code, addr, current);
+ force_sig_fault(signal, code, addr);
}
#endif
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 9b6163c05a75..e9b1d7585b43 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -30,13 +30,13 @@ int send_fault_sig(struct pt_regs *regs)
pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code);
if (user_mode(regs)) {
- force_sig_fault(signo, si_code, addr, current);
+ force_sig_fault(signo, si_code, addr);
} else {
if (fixup_exception(regs))
return -1;
//if (signo == SIGBUS)
- // force_sig_fault(si_signo, si_code, addr, current);
+ // force_sig_fault(si_signo, si_code, addr);
/*
* Oops. The kernel tried to access some bad page. We'll have to
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index eafff21fcb0e..cf99c411503e 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -63,7 +63,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
if (kernel_mode(regs))
die("Exception in kernel mode", regs, signr);
- force_sig_fault(signr, code, (void __user *)addr, current);
+ force_sig_fault(signr, code, (void __user *)addr);
}
asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 0685696349bb..cdd4feb279c5 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -108,7 +108,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return rval;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 202ad6a494f5..e6a810b0c7ad 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -289,7 +289,7 @@ out_of_memory:
do_sigbus:
up_read(&mm->mmap_sem);
if (user_mode(regs)) {
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return;
}
bad_page_fault(regs, address, SIGBUS);
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 8f4486c4415b..eceff9b75b22 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -17,6 +17,7 @@ archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
KBUILD_DEFCONFIG := 32r2el_defconfig
+KBUILD_DTBS := dtbs
#
# Select the object file format to substitute into the linker script.
@@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@
vmlinux.64: vmlinux
$(call cmd,64)
-all: $(all-y)
+all: $(all-y) $(KBUILD_DTBS)
# boot
$(boot-y): $(vmlinux-32) FORCE
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 3c453a1f1ff1..172801ed35b8 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
$(call if_changed,objcopy)
+HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE)
+
# Calculate the load address of the compressed kernel image
hostprogs-y := calc_vmlinuz_load_addr
diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
index 240f1d12df75..080b926d2623 100644
--- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
+++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
@@ -9,7 +9,7 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include "../../../../include/linux/sizes.h"
+#include <linux/sizes.h>
int main(int argc, char *argv[])
{
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 94096299fc56..9a82dd11c0e9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC64_OP(op, c_op, asm_op) \
-static __inline__ void atomic64_##op(long i, atomic64_t * v) \
+static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \
{ \
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
}
#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
-static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \
{ \
- long result; \
+ s64 result; \
\
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
}
#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
-static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \
{ \
- long result; \
+ s64 result; \
\
if (kernel_uses_llsc) { \
- long temp; \
+ s64 temp; \
\
loongson_llsc_mb(); \
__asm__ __volatile__( \
@@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor)
* Atomically test @v and subtract @i if @v is greater or equal than @i.
* The function returns the old value of @v minus @i.
*/
-static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
+static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v)
{
- long result;
+ s64 result;
smp_mb__before_llsc();
if (kernel_uses_llsc) {
- long temp;
+ s64 temp;
__asm__ __volatile__(
" .set push \n"
diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h
index b8f8af7dc47c..cacf3545e018 100644
--- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h
+++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h
@@ -24,8 +24,8 @@
#define AR933X_UART_CS_PARITY_S 0
#define AR933X_UART_CS_PARITY_M 0x3
#define AR933X_UART_CS_PARITY_NONE 0
-#define AR933X_UART_CS_PARITY_ODD 1
-#define AR933X_UART_CS_PARITY_EVEN 2
+#define AR933X_UART_CS_PARITY_ODD 2
+#define AR933X_UART_CS_PARITY_EVEN 3
#define AR933X_UART_CS_IF_MODE_S 2
#define AR933X_UART_CS_IF_MODE_M 0x3
#define AR933X_UART_CS_IF_MODE_NONE 0
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 0f813bb753c6..09cbe9042828 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -42,7 +42,7 @@ extern struct task_struct *ll_task;
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
- * isn't set), we undo the restriction on cpus_allowed.
+ * isn't set), we undo the restriction on cpus_mask.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
@@ -57,7 +57,7 @@ do { \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
- prev->cpus_allowed = prev->thread.user_cpus_allowed; \
+ prev->cpus_mask = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 180ad081afcf..1db29957a931 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -32,7 +32,7 @@ int __isa_exception_epc(struct pt_regs *regs)
/* Calculate exception PC in branch delay slot. */
if (__get_user(inst, (u16 __user *) msk_isa16_mode(epc))) {
/* This should never happen because delay slot was checked. */
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return epc;
}
if (cpu_has_mips16) {
@@ -305,7 +305,7 @@ int __microMIPS_compute_return_epc(struct pt_regs *regs)
return 0;
sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -328,7 +328,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
/* Read the instruction. */
addr = (u16 __user *)msk_isa16_mode(epc);
if (__get_user(inst.full, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -343,7 +343,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
case MIPS16e_jal_op:
addr += 1;
if (__get_user(inst2, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
fullinst = ((unsigned)inst.full << 16) | inst2;
@@ -829,17 +829,17 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
sigill_dsp:
pr_debug("%s: DSP branch but not DSP ASE - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
sigill_r2r6:
pr_debug("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
sigill_r6:
pr_debug("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n",
current->comm);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return -EFAULT;
}
EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn);
@@ -859,7 +859,7 @@ int __compute_return_epc(struct pt_regs *regs)
*/
addr = (unsigned int __user *) epc;
if (__get_user(insn.word, addr)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return -EFAULT;
}
@@ -867,7 +867,7 @@ int __compute_return_epc(struct pt_regs *regs)
unaligned:
printk("%s: unaligned epc - sending SIGBUS.\n", current->comm);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return -EFAULT;
}
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 07c941c99e92..81ba1d3c367c 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -220,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs,
unaligned:
pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return -EFAULT;
}
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index a7c0f97e4b0d..1a08428eedcf 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
if (retval)
goto out_unlock;
- cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed);
+ cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr);
cpumask_and(&mask, &allowed, cpu_active_mask);
out_unlock:
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index d75337974ee9..f6efabcb4e92 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -641,7 +641,7 @@ asmlinkage void sys_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
/*
* Don't let your children do this ...
@@ -654,7 +654,7 @@ asmlinkage void sys_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
#endif /* CONFIG_TRAD_SIGNALS */
@@ -678,7 +678,7 @@ asmlinkage void sys_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -694,7 +694,7 @@ asmlinkage void sys_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
#ifdef CONFIG_TRAD_SIGNALS
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 9a6e58b48bb6..7bd00fad61af 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -71,7 +71,7 @@ asmlinkage void sysn32_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (compat_restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -87,7 +87,7 @@ asmlinkage void sysn32_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static int setup_rt_frame_n32(void *sig_return, struct ksignal *ksig,
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
index df259618e834..299a7a28ca33 100644
--- a/arch/mips/kernel/signal_o32.c
+++ b/arch/mips/kernel/signal_o32.c
@@ -171,7 +171,7 @@ asmlinkage void sys32_rt_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
if (compat_restore_altstack(&frame->rs_uc.uc_stack))
goto badframe;
@@ -187,7 +187,7 @@ asmlinkage void sys32_rt_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig,
@@ -273,7 +273,7 @@ asmlinkage void sys32_sigreturn(void)
if (sig < 0)
goto badframe;
else if (sig)
- force_sig(sig, current);
+ force_sig(sig);
/*
* Don't let your children do this ...
@@ -286,5 +286,5 @@ asmlinkage void sys32_sigreturn(void)
/* Unreached */
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c52766a5b85f..342e41de9d64 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -482,7 +482,7 @@ asmlinkage void do_be(struct pt_regs *regs)
goto out;
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
out:
exception_exit(prev_state);
@@ -705,7 +705,7 @@ asmlinkage void do_ov(struct pt_regs *regs)
prev_state = exception_enter();
die_if_kernel("Integer overflow", regs);
- force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current);
+ force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc);
exception_exit(prev_state);
}
@@ -733,7 +733,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
else if (fcr31 & FPU_CSR_INE_X)
si_code = FPE_FLTRES;
- force_sig_fault(SIGFPE, si_code, fault_addr, tsk);
+ force_sig_fault_to_task(SIGFPE, si_code, fault_addr, tsk);
}
int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
@@ -750,7 +750,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
return 1;
case SIGBUS:
- force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr);
return 1;
case SIGSEGV:
@@ -761,11 +761,11 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
else
si_code = SEGV_MAPERR;
up_read(&current->mm->mmap_sem);
- force_sig_fault(SIGSEGV, si_code, fault_addr, current);
+ force_sig_fault(SIGSEGV, si_code, fault_addr);
return 1;
default:
- force_sig(sig, current);
+ force_sig(sig);
return 1;
}
}
@@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void)
* restricted the allowed set to exclude any CPUs with FPUs,
* we'll skip the procedure.
*/
- if (cpumask_intersects(&current->cpus_allowed, &mt_fpu_cpumask)) {
+ if (cpumask_intersects(&current->cpus_mask, &mt_fpu_cpumask)) {
cpumask_t tmask;
current->thread.user_cpus_allowed
- = current->cpus_allowed;
- cpumask_and(&tmask, &current->cpus_allowed,
+ = current->cpus_mask;
+ cpumask_and(&tmask, &current->cpus_mask,
&mt_fpu_cpumask);
set_cpus_allowed_ptr(current, &tmask);
set_thread_flag(TIF_FPUBOUND);
@@ -943,11 +943,11 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
die_if_kernel(b, regs);
force_sig_fault(SIGFPE,
code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF,
- (void __user *) regs->cp0_epc, current);
+ (void __user *) regs->cp0_epc);
break;
case BRK_BUG:
die_if_kernel("Kernel bug detected", regs);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
break;
case BRK_MEMU:
/*
@@ -962,15 +962,15 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
return;
die_if_kernel("Math emu break/trap", regs);
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
break;
default:
scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
die_if_kernel(b, regs);
if (si_code) {
- force_sig_fault(SIGTRAP, si_code, NULL, current);
+ force_sig_fault(SIGTRAP, si_code, NULL);
} else {
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
}
}
@@ -1063,7 +1063,7 @@ out:
return;
out_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1105,7 +1105,7 @@ out:
return;
out_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1191,7 +1191,7 @@ no_r2_instr:
if (unlikely(status > 0)) {
regs->cp0_epc = old_epc; /* Undo skip-over. */
regs->regs[31] = old31;
- force_sig(status, current);
+ force_sig(status);
}
out:
@@ -1220,7 +1220,7 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
die_if_kernel("COP2: Unhandled kernel unaligned access or invalid "
"instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
return NOTIFY_OK;
}
@@ -1383,7 +1383,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
if (unlikely(status > 0)) {
regs->cp0_epc = old_epc; /* Undo skip-over. */
regs->regs[31] = old31;
- force_sig(status, current);
+ force_sig(status);
}
break;
@@ -1403,7 +1403,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
* emulator too.
*/
if (raw_cpu_has_fpu || !cpu_has_mips_4_5_64_r2_r6) {
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
break;
}
/* Fall through. */
@@ -1437,7 +1437,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
#else /* CONFIG_MIPS_FP_SUPPORT */
case 1:
case 3:
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
break;
#endif /* CONFIG_MIPS_FP_SUPPORT */
@@ -1464,7 +1464,7 @@ asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
local_irq_enable();
die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
- force_sig(SIGFPE, current);
+ force_sig(SIGFPE);
out:
exception_exit(prev_state);
}
@@ -1477,7 +1477,7 @@ asmlinkage void do_msa(struct pt_regs *regs)
prev_state = exception_enter();
if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) {
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
goto out;
}
@@ -1485,7 +1485,7 @@ asmlinkage void do_msa(struct pt_regs *regs)
err = enable_restore_fp_context(1);
if (err)
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
out:
exception_exit(prev_state);
}
@@ -1495,7 +1495,7 @@ asmlinkage void do_mdmx(struct pt_regs *regs)
enum ctx_state prev_state;
prev_state = exception_enter();
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
exception_exit(prev_state);
}
@@ -1521,7 +1521,7 @@ asmlinkage void do_watch(struct pt_regs *regs)
if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) {
mips_read_watch_registers();
local_irq_enable();
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL);
} else {
mips_clear_watch_registers();
local_irq_enable();
@@ -1592,7 +1592,7 @@ asmlinkage void do_mt(struct pt_regs *regs)
}
die_if_kernel("MIPS MT Thread exception in kernel", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -1601,7 +1601,7 @@ asmlinkage void do_dsp(struct pt_regs *regs)
if (cpu_has_dsp)
panic("Unexpected DSP exception");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
asmlinkage void do_reserved(struct pt_regs *regs)
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index 76e33f940971..92bd2b0f0548 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -1365,20 +1365,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/* Recode table from 16-bit register notation to 32-bit GPR. */
@@ -1991,20 +1991,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr)
@@ -2271,20 +2271,20 @@ fault:
return;
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
sigbus:
die_if_kernel("Unhandled kernel unaligned access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
return;
sigill:
die_if_kernel
("Unhandled kernel unaligned access or invalid instruction", regs);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
asmlinkage void do_ade(struct pt_regs *regs)
@@ -2364,7 +2364,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
sigbus:
die_if_kernel("Kernel unaligned instruction access", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
/*
* XXX On return from the signal handler we should advance the epc
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 73d8a0f0b810..f589aa8f47d9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -223,7 +223,7 @@ bad_area_nosemaphore:
pr_cont("\n");
}
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -279,7 +279,7 @@ do_sigbus:
#endif
current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
tsk->thread.cp0_badvaddr = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return;
#ifndef CONFIG_64BIT
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 50ee7213b432..d79f2b432318 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
bool __virt_addr_valid(const volatile void *kaddr)
{
- unsigned long vaddr = (unsigned long)vaddr;
+ unsigned long vaddr = (unsigned long)kaddr;
if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE))
return false;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 65b6e85447b1..144ceb0fba88 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p)
static void build_restore_work_registers(u32 **p)
{
if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
return;
}
@@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
uasm_i_mtc0(p, 0, C0_PAGEMASK);
uasm_il_b(p, r, lid);
}
- if (scratch_reg >= 0)
+ if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- else
+ } else {
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+ }
} else {
/* Reset default page size */
if (PM_DEFAULT_MASK >> 16) {
@@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
uasm_i_jr(p, ptr);
if (mode == refill_scratch) {
- if (scratch_reg >= 0)
+ if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- else
+ } else {
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+ }
} else {
uasm_i_nop(p);
}
@@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
if (c0_scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg);
build_tlb_write_entry(p, l, r, tlb_random);
uasm_l_leave(l, *p);
@@ -1603,15 +1609,17 @@ static void build_setup_pgd(void)
uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
uasm_l_tlbl_goaround1(&l, p);
UASM_i_SLL(&p, a0, a0, 11);
- uasm_i_jr(&p, 31);
UASM_i_MTC0(&p, a0, C0_CONTEXT);
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
} else {
/* PGD in c0_KScratch */
- uasm_i_jr(&p, 31);
if (cpu_has_ldpte)
UASM_i_MTC0(&p, a0, C0_PWBASE);
else
UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
}
#else
#ifdef CONFIG_SMP
@@ -1625,13 +1633,16 @@ static void build_setup_pgd(void)
UASM_i_LA_mostly(&p, a2, pgdc);
UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
#endif /* SMP */
- uasm_i_jr(&p, 31);
/* if pgd_reg is allocated, save PGD also to scratch register */
- if (pgd_reg != -1)
+ if (pgd_reg != -1) {
UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
- else
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
+ } else {
+ uasm_i_jr(&p, 31);
uasm_i_nop(&p);
+ }
#endif
if (p >= (u32 *)tlbmiss_handler_setup_pgd_end)
panic("tlbmiss_handler_setup_pgd space exceeded");
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index 34bb9801d5ff..dc0110a607a5 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -98,7 +98,7 @@ void ip22_be_interrupt(int irq)
field, regs->cp0_epc, field, regs->regs[31]);
/* Assume it would be too dangerous to continue ... */
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
static int ip22_be_handler(struct pt_regs *regs, int is_fixup)
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 082541d33161..c0cf7baee36d 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -462,7 +462,7 @@ void ip22_be_interrupt(int irq)
if (ip28_be_interrupt(regs) != MIPS_BE_DISCARD) {
/* Assume it would be too dangerous to continue ... */
die_if_kernel("Oops", regs);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
} else if (debug_be_interrupt)
show_regs(regs);
}
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 83efe03d5c60..73ad29b180fb 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -74,7 +74,7 @@ int ip27_be_handler(struct pt_regs *regs, int is_fixup)
show_regs(regs);
dump_tlb_all();
while(1);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
void __init ip27_be_init(void)
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index c1f12a9cf305..c860f95ab7ed 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -29,7 +29,7 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
show_regs(regs);
dump_tlb_all();
while(1);
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
}
void __init ip32_be_init(void)
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
index cf0b8760f261..62bdafbc53f4 100644
--- a/arch/nds32/kernel/fpu.c
+++ b/arch/nds32/kernel/fpu.c
@@ -243,7 +243,7 @@ inline void handle_fpu_exception(struct pt_regs *regs)
}
force_sig_fault(si_signo, si_code,
- (void __user *)instruction_pointer(regs), current);
+ (void __user *)instruction_pointer(regs));
done:
own_fpu();
}
diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
index 5f7660aa2d68..fe61513982b4 100644
--- a/arch/nds32/kernel/signal.c
+++ b/arch/nds32/kernel/signal.c
@@ -163,7 +163,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return regs->uregs[0];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 5aa7c17da27a..f4d386b52622 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -205,7 +205,7 @@ int bad_syscall(int n, struct pt_regs *regs)
}
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)instruction_pointer(regs) - 4, current);
+ (void __user *)instruction_pointer(regs) - 4);
die_if_kernel("Oops - bad syscall", regs, n);
return regs->uregs[0];
}
@@ -255,14 +255,15 @@ void __init early_trap_init(void)
cpu_cache_wbinval_page(base, true);
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
tsk->thread.error_code = error_code;
force_sig_fault(SIGTRAP, si_code,
- (void __user *)instruction_pointer(regs), tsk);
+ (void __user *)instruction_pointer(regs));
}
void do_debug_trap(unsigned long entry, unsigned long addr,
@@ -274,7 +275,7 @@ void do_debug_trap(unsigned long entry, unsigned long addr,
if (user_mode(regs)) {
/* trap_signal */
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ send_sigtrap(regs, 0, TRAP_BRKPT);
} else {
/* kernel_trap */
if (!fixup_exception(regs))
@@ -288,7 +289,7 @@ void unhandled_interruption(struct pt_regs *regs)
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
void unhandled_exceptions(unsigned long entry, unsigned long addr,
@@ -299,7 +300,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr,
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGKILL);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
extern int do_page_fault(unsigned long entry, unsigned long addr,
@@ -326,7 +327,7 @@ void do_revinsn(struct pt_regs *regs)
show_regs(regs);
if (!user_mode(regs))
do_exit(SIGILL);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
#ifdef CONFIG_ALIGNMENT_TRAP
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 68d5f2a27f38..064ae5d2159d 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -271,7 +271,7 @@ bad_area_nosemaphore:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
return;
}
@@ -340,7 +340,7 @@ do_sigbus:
tsk->thread.address = addr;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = entry;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
return;
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index 4a81876b6086..a42dd09c6578 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -120,7 +120,7 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw)
return rval;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -211,7 +211,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return 0;
give_sigsegv:
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EFAULT;
}
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 3bc3cd22b750..486db793923c 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(die_lock);
static void _send_sig(int signo, int code, unsigned long addr)
{
- force_sig_fault(signo, code, (void __user *) addr, current);
+ force_sig_fault(signo, code, (void __user *) addr);
}
void die(const char *str, struct pt_regs *regs, long err)
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 801cad03a4c7..4f0754874d78 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -95,7 +95,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
return regs->gpr[11];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index e859bfb118a6..932a8ec2b520 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -244,7 +244,7 @@ void __init trap_init(void)
asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
{
- force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address);
regs->pc += 4;
}
@@ -253,7 +253,7 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
{
if (user_mode(regs)) {
/* Send a SIGBUS */
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address);
} else {
printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
show_registers(regs);
@@ -266,7 +266,7 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
{
if (user_mode(regs)) {
/* Send a SIGBUS */
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
} else { /* Kernel mode */
printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address);
show_registers(regs);
@@ -371,7 +371,7 @@ static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
if (get_user(value, lwa_addr)) {
if (user_mode(regs)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -418,7 +418,7 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
if (put_user(regs->gpr[rb], vaddr)) {
if (user_mode(regs)) {
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
@@ -461,7 +461,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs,
if (user_mode(regs)) {
/* Send a SIGILL */
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address);
} else { /* Kernel mode */
printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n",
address);
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 9eee5bf3db27..5d4d3a9691d0 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -209,7 +209,7 @@ bad_area_nosemaphore:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -274,7 +274,7 @@ do_sigbus:
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index a3d2fb4e6dd2..f642ba378ffa 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -88,9 +88,9 @@ void user_enable_single_step(struct task_struct *task)
ptrace_disable(task);
/* Don't wake up the task, but let the
parent know something happened. */
- force_sig_fault(SIGTRAP, TRAP_TRACE,
- (void __user *) (task_regs(task)->iaoq[0] & ~3),
- task);
+ force_sig_fault_to_task(SIGTRAP, TRAP_TRACE,
+ (void __user *) (task_regs(task)->iaoq[0] & ~3),
+ task);
/* notify_parent(task, SIGCHLD); */
return;
}
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 848c1934680b..02895a8f2c55 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -164,7 +164,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
give_sigsegv:
DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n");
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return;
}
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 096e319adeb3..58dcf445e32f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -275,7 +275,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
static void handle_gdb_break(struct pt_regs *regs, int wot)
{
force_sig_fault(SIGTRAP, wot,
- (void __user *) (regs->iaoq[0] & ~3), current);
+ (void __user *) (regs->iaoq[0] & ~3));
}
static void handle_break(struct pt_regs *regs)
@@ -609,13 +609,13 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
si_code = ILL_PRVREG;
give_sigill:
force_sig_fault(SIGILL, si_code,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
case 12:
/* Overflow Trap, let the userland signal handler do the cleanup */
force_sig_fault(SIGFPE, FPE_INTOVF,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
case 13:
@@ -627,7 +627,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
* to by si_addr.
*/
force_sig_fault(SIGFPE, FPE_CONDTRAP,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return;
}
/* The kernel doesn't want to handle condition codes */
@@ -739,7 +739,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
force_sig_fault(SIGSEGV, SEGV_MAPERR,
(code == 7)?
((void __user *) regs->iaoq[0]) :
- ((void __user *) regs->ior), current);
+ ((void __user *) regs->ior));
return;
case 28:
@@ -754,7 +754,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
task_pid_nr(current), current->comm);
/* SIGBUS, for lack of a better one. */
force_sig_fault(SIGBUS, BUS_OBJERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
return;
}
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
@@ -770,7 +770,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
code, fault_space,
task_pid_nr(current), current->comm);
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
return;
}
}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 30161b7c9ac2..237d20dd5622 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -676,14 +676,14 @@ void handle_unaligned(struct pt_regs *regs)
if (ret == ERR_PAGEFAULT)
{
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
}
else
{
force_sigbus:
/* couldn't handle it ... */
force_sig_fault(SIGBUS, BUS_ADRALN,
- (void __user *)regs->ior, current);
+ (void __user *)regs->ior);
}
return;
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index c83237c0cbc1..6ce427b58836 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -104,7 +104,7 @@ handle_fpe(struct pt_regs *regs)
memcpy(regs->fr, frcopy, sizeof regs->fr);
if (signalcode != 0) {
force_sig_fault(signalcode >> 24, signalcode & 0xffffff,
- (void __user *) regs->iaoq[0], current);
+ (void __user *) regs->iaoq[0]);
return -1;
}
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index c8e8b7c05558..6dd4669ce7a5 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -403,13 +403,13 @@ bad_area:
lsb = PAGE_SHIFT;
force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address,
- lsb, current);
+ lsb);
return;
}
#endif
show_signal_msg(regs, code, address, tsk, vma);
- force_sig_fault(signo, si_code, (void __user *) address, current);
+ force_sig_fault(signo, si_code, (void __user *) address);
return;
}
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 52eafaf74054..31c231ea56b7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ long atomic64_read(const atomic64_t *v)
+static __inline__ s64 atomic64_read(const atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, long i)
+static __inline__ void atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(long a, atomic64_t *v) \
+static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
@@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \
}
#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
-static inline long \
-atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
- long t; \
+ s64 t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
@@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
}
#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
-static inline long \
-atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+static inline s64 \
+atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
- long res, t; \
+ s64 res, t; \
\
__asm__ __volatile__( \
"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
@@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor)
static __inline__ void atomic64_inc(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc\n\
@@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
}
#define atomic64_inc atomic64_inc
-static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
@@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
static __inline__ void atomic64_dec(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec\n\
@@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
}
#define atomic64_dec atomic64_dec
-static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
@@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
PPC_ATOMIC_ENTRY_BARRIER
@@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long t;
+ s64 t;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
@@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
*/
static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
{
- long t1, t2;
+ s64 t1, t2;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ef573fe9873e..a9993e7a443b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define spin_cpu_relax() barrier()
-#define spin_cpu_yield() spin_cpu_relax()
-
#define spin_end() HMT_medium()
#define spin_until_cond(cond) \
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f0fbbf6a6a1f..b448b0938299 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -639,7 +639,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
- force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 684b0b315c32..8c92febf5f44 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child)
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b824f4c69622..0ab4c72515c4 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -990,8 +990,7 @@ int rtas_ibm_suspend_me(u64 handle)
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
+ on_each_cpu(rtas_percpu_suspend_me, &data, 0);
wait_for_completion(&done);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a2b74e057904..f50b708d6d77 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1245,7 +1245,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
current->comm, current->pid,
rt_sf, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -1334,7 +1334,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
current->comm, current->pid,
ctx, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -1512,6 +1512,6 @@ badframe:
current->comm, current->pid,
addr, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4292ea39baa4..2f80e270c7b0 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -808,7 +808,7 @@ badframe:
current->comm, current->pid, "rt_sigreturn",
(long)uc, regs->nip, regs->link);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 47df30982de1..11caa0291254 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -297,7 +297,7 @@ NOKPROBE_SYMBOL(die);
void user_single_step_report(struct pt_regs *regs)
{
- force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
}
static void show_signal_msg(int signr, struct pt_regs *regs, int code,
@@ -363,7 +363,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
if (!exception_common(signr, regs, code, addr))
return;
- force_sig_fault(signr, code, (void __user *)addr, current);
+ force_sig_fault(signr, code, (void __user *)addr);
}
/*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ec6b7ad70659..d989592b6fc8 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -178,13 +178,12 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address,
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb,
- current);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return 0;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
return 0;
}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 6dfd2cb1bce7..24adbe3c605c 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -31,22 +31,21 @@ static void spufs_handle_event(struct spu_context *ctx,
switch (type) {
case SPE_EVENT_INVALID_DMA:
- force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, NULL);
break;
case SPE_EVENT_SPE_DATA_STORAGE:
ctx->ops->restart_dma(ctx);
- force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea,
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea);
break;
case SPE_EVENT_DMA_ALIGNMENT:
/* DAR isn't set for an alignment fault :( */
- force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
break;
case SPE_EVENT_SPE_ERROR:
force_sig_fault(
SIGILL, ILL_ILLOPC,
(void __user *)(unsigned long)
- ctx->ops->npc_read(ctx) - 4, current);
+ ctx->ops->npc_read(ctx) - 4);
break;
}
}
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 07f82d7395ff..3f2380f40f99 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -443,7 +443,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
&& (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
ret = -ERESTARTSYS;
}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index e56b553de27b..f18d5067cd0f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
* runqueue. The context will be rescheduled on the proper node
* if it is timesliced or preempted.
*/
- cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 9038aeb900a6..96f95c9ebd97 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i)
#ifndef CONFIG_GENERIC_ATOMIC64
#define ATOMIC64_INIT(i) { (i) }
-static __always_inline long atomic64_read(const atomic64_t *v)
+static __always_inline s64 atomic64_read(const atomic64_t *v)
{
return READ_ONCE(v->counter);
}
-static __always_inline void atomic64_set(atomic64_t *v, long i)
+static __always_inline void atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_OP (op, asm_op, I, w, int, )
+ ATOMIC_OP (op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_OP (op, asm_op, I, w, int, ) \
- ATOMIC_OP (op, asm_op, I, d, long, 64)
+ ATOMIC_OP (op, asm_op, I, w, int, ) \
+ ATOMIC_OP (op, asm_op, I, d, s64, 64)
#endif
ATOMIC_OPS(add, add, i)
@@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
+ ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
- ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \
- ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
+ ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \
+ ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \
+ ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64)
#endif
ATOMIC_OPS(add, add, +, i)
@@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i)
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
+ ATOMIC_FETCH_OP(op, asm_op, I, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, I) \
- ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
- ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
+ ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \
+ ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64)
#endif
ATOMIC_OPS(and, and, i)
@@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
#define atomic_fetch_add_unless atomic_fetch_add_unless
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
+static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long prev, rc;
+ s64 prev;
+ long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
@@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS() \
- ATOMIC_OP( int, , 4)
+ ATOMIC_OP(int, , 4)
#else
#define ATOMIC_OPS() \
- ATOMIC_OP( int, , 4) \
- ATOMIC_OP(long, 64, 8)
+ ATOMIC_OP(int, , 4) \
+ ATOMIC_OP(s64, 64, 8)
#endif
ATOMIC_OPS()
@@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
#define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1)
#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
+static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
{
- long prev, rc;
+ s64 prev;
+ long rc;
__asm__ __volatile__ (
"0: lr.d %[p], %[c]\n"
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index f653bfc8a83b..07ceee8b1747 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -86,7 +86,7 @@ struct task_struct;
extern void die(struct pt_regs *regs, const char *str);
extern void do_trap(struct pt_regs *regs, int signo, int code,
- unsigned long addr, struct task_struct *tsk);
+ unsigned long addr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 1fe1b02e44d0..b14d7647d800 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -126,7 +126,7 @@ badframe:
task->comm, task_pid_nr(task), __func__,
frame, (void *)regs->sepc, (void *)regs->sp);
}
- force_sig(SIGSEGV, task);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 6b32190ba73c..424eb72d56b1 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -55,9 +55,10 @@ void die(struct pt_regs *regs, const char *str)
do_exit(SIGSEGV);
}
-void do_trap(struct pt_regs *regs, int signo, int code,
- unsigned long addr, struct task_struct *tsk)
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
{
+ struct task_struct *tsk = current;
+
if (show_unhandled_signals && unhandled_signal(tsk, signo)
&& printk_ratelimit()) {
pr_info("%s[%d]: unhandled signal %d code 0x%x at 0x" REG_FMT,
@@ -67,14 +68,14 @@ void do_trap(struct pt_regs *regs, int signo, int code,
show_regs(regs);
}
- force_sig_fault(signo, code, (void __user *)addr, tsk);
+ force_sig_fault(signo, code, (void __user *)addr);
}
static void do_trap_error(struct pt_regs *regs, int signo, int code,
unsigned long addr, const char *str)
{
if (user_mode(regs)) {
- do_trap(regs, signo, code, addr, current);
+ do_trap(regs, signo, code, addr);
} else {
if (!fixup_exception(regs))
die(regs, str);
@@ -140,7 +141,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
}
#endif /* CONFIG_GENERIC_BUG */
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc));
}
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index f960c3f4ce47..96add1427a75 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -169,7 +169,7 @@ bad_area:
up_read(&mm->mmap_sem);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
- do_trap(regs, SIGSEGV, code, addr, tsk);
+ do_trap(regs, SIGSEGV, code, addr);
return;
}
@@ -205,7 +205,7 @@ do_sigbus:
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
goto no_context;
- do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk);
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
vmalloc_fault:
@@ -219,7 +219,7 @@ vmalloc_fault:
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
- return do_trap(regs, SIGSEGV, code, addr, tsk);
+ return do_trap(regs, SIGSEGV, code, addr);
/*
* Synchronize this task's top level page-table
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 109243fdb6ec..fdb4246265a5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+config ARCH_HAS_MEM_ENCRYPT
+ def_bool y
+
config MMU
def_bool y
@@ -30,7 +33,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
def_bool y
config GENERIC_LOCKBREAK
- def_bool y if SMP && PREEMPT
+ def_bool y if PREEMPT
config PGSTE
def_bool y if KVM
@@ -113,7 +116,6 @@ config S390
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
- select GENERIC_CPU_DEVICES if !SMP
select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
@@ -187,6 +189,8 @@ config S390
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
+ select SWIOTLB
+ select GENERIC_ALLOCATOR
config SCHED_OMIT_FRAME_POINTER
@@ -399,27 +403,10 @@ config SYSVIPC_COMPAT
config SMP
def_bool y
- prompt "Symmetric multi-processing support"
- ---help---
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on uni- and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- uniprocessor machines. On a uniprocessor machine, the kernel
- will run faster if you say N here.
-
- See also the SMP-HOWTO available at
- <http://www.tldp.org/docs.html#howto>.
-
- Even if you don't know what to do here, say Y.
config NR_CPUS
int "Maximum number of CPUs (2-512)"
range 2 512
- depends on SMP
default "64"
help
This allows you to specify the maximum number of CPUs which this
@@ -431,12 +418,6 @@ config NR_CPUS
config HOTPLUG_CPU
def_bool y
- prompt "Support for hot-pluggable CPUs"
- depends on SMP
- help
- Say Y here to be able to turn CPUs off and on. CPUs
- can be controlled through /sys/devices/system/cpu/cpu#.
- Say N if you want to disable CPU hotplug.
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
@@ -448,7 +429,7 @@ config NODES_SPAN_OTHER_NODES
config NUMA
bool "NUMA support"
- depends on SMP && SCHED_TOPOLOGY
+ depends on SCHED_TOPOLOGY
default n
help
Enable NUMA support
@@ -523,7 +504,6 @@ config SCHED_DRAWER
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
- depends on SMP
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
@@ -763,7 +743,7 @@ config PCI_NR_FUNCTIONS
This allows you to specify the maximum number of PCI functions which
this kernel will support.
-endif # PCI
+endif # PCI
config HAS_IOMEM
def_bool PCI
@@ -829,16 +809,15 @@ menu "Dump support"
config CRASH_DUMP
bool "kernel crash dumps"
- depends on SMP
select KEXEC
help
Generate crash dump after being started by kexec.
Crash dump kernels are loaded in the main kernel with kexec-tools
into a specially reserved region and then later executed after
a crash by kdump/kexec.
- Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+ Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
This option also enables s390 zfcpdump.
- See also <file:Documentation/s390/zfcpdump.txt>
+ See also <file:Documentation/s390/zfcpdump.rst>
endmenu
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index b0920b35f87b..a6dc01a22048 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -88,6 +88,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_AP=m
+CONFIG_VFIO_CCW=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -498,6 +499,7 @@ CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_S390_AP_IOMMU=y
+CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index c59b922cb6c5..e4bc40073003 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -1,21 +1,22 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_USELIB=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-# CONFIG_CPU_ISOLATION is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-CONFIG_CGROUPS=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_FREEZER=y
@@ -26,98 +27,402 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_LIVEPATCH=y
-CONFIG_NR_CPUS=256
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
-CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
-CONFIG_CMM=m
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
-CONFIG_STATIC_KEYS_SELFTEST=y
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_BINFMT_MISC=m
+CONFIG_LIVEPATCH=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
+CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
+CONFIG_VFIO_CCW=m
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
-CONFIG_NET_KEY=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=y
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
-CONFIG_VLAN_8021Q=y
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_NET_PKTGEN=m
CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
-CONFIG_SCSI_VIRTIO=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
CONFIG_MD_MULTIPATH=m
-CONFIG_BLK_DEV_DM=y
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_MIRROR=m
CONFIG_DM_LOG_USERSPACE=m
CONFIG_DM_RAID=m
@@ -125,71 +430,216 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_SWITCH=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
CONFIG_TUN=m
-CONFIG_VIRTIO_NET=y
-# CONFIG_NET_VENDOR_ALACRITECH is not set
-# CONFIG_NET_VENDOR_AURORA is not set
-# CONFIG_NET_VENDOR_CORTINA is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
-# CONFIG_NET_VENDOR_SOCIONEXT is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
-# CONFIG_INPUT is not set
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_DEVKMEM=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
-CONFIG_VIRTIO_BALLOON=y
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
+CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
-# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_S390_PTDUMP=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
@@ -199,16 +649,16 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -217,38 +667,14 @@ CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
-# CONFIG_XZ_DEC_X86 is not set
-# CONFIG_XZ_DEC_POWERPC is not set
-# CONFIG_XZ_DEC_IA64 is not set
-# CONFIG_XZ_DEC_ARM is not set
-# CONFIG_XZ_DEC_ARMTHUMB is not set
-# CONFIG_XZ_DEC_SPARC is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_S390_PTDUMP=y
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
deleted file mode 100644
index 09aa5cb14873..000000000000
--- a/arch/s390/configs/performance_defconfig
+++ /dev/null
@@ -1,678 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_NAMESPACES=y
-CONFIG_USER_NS=y
-CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-CONFIG_MODULE_SIG_SHA256=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_WBT=y
-CONFIG_BLK_WBT_SQ=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_IBM_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_CFQ_GROUP_IOSCHED=y
-CONFIG_DEFAULT_DEADLINE=y
-CONFIG_LIVEPATCH=y
-CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=512
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
-CONFIG_EXPOLINE=y
-CONFIG_EXPOLINE_AUTO=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
-CONFIG_ZSMALLOC_STAT=y
-CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
-CONFIG_IDLE_PAGE_TRACKING=y
-CONFIG_PCI=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_S390=y
-CONFIG_CHSC_SCH=y
-CONFIG_VFIO_AP=m
-CONFIG_CRASH_DUMP=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=m
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=m
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NETFILTER_XT_SET=m
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_SET=m
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_PE_SIP=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=y
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-CONFIG_L2TP=m
-CONFIG_L2TP_DEBUGFS=m
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=y
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_DNS_RESOLVER=y
-CONFIG_OPENVSWITCH=m
-CONFIG_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_BPF_JIT=y
-CONFIG_NET_PKTGEN=m
-CONFIG_DEVTMPFS=y
-CONFIG_DMA_CMA=y
-CONFIG_CMA_SIZE_MBYTES=0
-CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_VIRTIO_BLK=y
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_NVME=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_GENWQE=m
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=y
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_ISCSI_TCP=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_ZFCP=y
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_SCSI_OSD_INITIATOR=m
-CONFIG_SCSI_OSD_ULD=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_SWITCH=m
-CONFIG_NETDEVICES=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_EQUALIZER=m
-CONFIG_IFB=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_VXLAN=m
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-# CONFIG_NET_VENDOR_ARC is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-CONFIG_MLX4_EN=m
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_CORE_EN=y
-# CONFIG_NET_VENDOR_NATSEMI is not set
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_MPPE=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_ISM=m
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_RAW_DRIVER=m
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TN3270_FS=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_NOWAYOUT=y
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_DIAG288_WATCHDOG=m
-CONFIG_DRM=y
-CONFIG_DRM_VIRTIO_GPU=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_VFIO=m
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=y
-CONFIG_S390_AP_IOMMU=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_JBD2_DEBUG=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_BTRFS_FS=y
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_FS_DAX=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=m
-CONFIG_OVERLAY_FS=m
-CONFIG_FSCACHE=m
-CONFIG_CACHEFILES=m
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
-CONFIG_ECRYPT_FS=m
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_ROMFS_FS=m
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_CIFS=m
-CONFIG_CIFS_STATS=y
-CONFIG_CIFS_STATS2=y
-CONFIG_CIFS_WEAK_PW_HASH=y
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-# CONFIG_CIFS_DEBUG is not set
-CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=1024
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_RCU_TORTURE_TEST=m
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_HIST_TRIGGERS=y
-CONFIG_LKDTM=m
-CONFIG_PERCPU_TEST=m
-CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_INTEGRITY_SIGNATURE=y
-CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
-CONFIG_IMA=y
-CONFIG_IMA_DEFAULT_HASH_SHA256=y
-CONFIG_IMA_WRITE_POLICY=y
-CONFIG_IMA_APPRAISE=y
-CONFIG_CRYPTO_FIPS=y
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=m
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRC7=m
-CONFIG_CRC8=m
-CONFIG_CORDIC=m
-CONFIG_CMM=m
-CONFIG_APPLDATA_BASE=y
-CONFIG_KVM=m
-CONFIG_KVM_S390_UCONTROL=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 7dc7f58c4287..d92bab844b73 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -24,7 +24,6 @@ CONFIG_CRASH_DUMP=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_BLK_DEV_XPRAM is not set
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 86aed30fad3a..eeeb6a7737a4 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -137,7 +137,7 @@ static struct shash_alg ghash_alg = {
static int __init ghash_mod_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
- return -EOPNOTSUPP;
+ return -ENODEV;
return crypto_register_shash(&ghash_alg);
}
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 12cca467af7d..d977643fa627 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -824,7 +824,7 @@ static int __init prng_init(void)
/* check if the CPU has a PRNG */
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
- return -EOPNOTSUPP;
+ return -ENODEV;
/* check if TRNG subfunction is available */
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@@ -837,7 +837,7 @@ static int __init prng_init(void)
if (prng_mode == PRNG_MODE_SHA512) {
pr_err("The prng module cannot "
"start in SHA-512 mode\n");
- return -EOPNOTSUPP;
+ return -ENODEV;
}
prng_mode = PRNG_MODE_TDES;
} else
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 009572e8276d..7c15542d3685 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg alg = {
static int __init sha1_s390_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
- return -EOPNOTSUPP;
+ return -ENODEV;
return crypto_register_shash(&alg);
}
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 62833a1d8724..af7505148f80 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -117,7 +117,7 @@ static int __init sha256_s390_init(void)
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
- return -EOPNOTSUPP;
+ return -ENODEV;
ret = crypto_register_shash(&sha256_alg);
if (ret < 0)
goto out;
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index be589c340d15..ad29db085a18 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -127,7 +127,7 @@ static int __init init(void)
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
- return -EOPNOTSUPP;
+ return -ENODEV;
if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
if ((ret = crypto_register_shash(&sha384_alg)) < 0)
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index c10d2ee2dfda..01936fdfaddb 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -11,6 +11,7 @@
#define _ASM_S390_AIRQ_H
#include <linux/bit_spinlock.h>
+#include <linux/dma-mapping.h>
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
@@ -29,6 +30,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq);
/* Adapter interrupt bit vector */
struct airq_iv {
unsigned long *vector; /* Adapter interrupt bit vector */
+ dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */
unsigned long *avail; /* Allocation bit mask for the bit vector */
unsigned long *bitlock; /* Lock bit mask for the bit vector */
unsigned long *ptr; /* Pointer associated with each bit */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index fd20ab5d4cf7..491ad53a0d4e 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline long atomic64_read(const atomic64_t *v)
+static inline s64 atomic64_read(const atomic64_t *v)
{
- long c;
+ s64 c;
asm volatile(
" lg %0,%1\n"
@@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v)
return c;
}
-static inline void atomic64_set(atomic64_t *v, long i)
+static inline void atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
: "=Q" (v->counter) : "d" (i));
}
-static inline long atomic64_add_return(long i, atomic64_t *v)
+static inline s64 atomic64_add_return(s64 i, atomic64_t *v)
{
- return __atomic64_add_barrier(i, &v->counter) + i;
+ return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
-static inline long atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
{
- return __atomic64_add_barrier(i, &v->counter);
+ return __atomic64_add_barrier(i, (long *)&v->counter);
}
-static inline void atomic64_add(long i, atomic64_t *v)
+static inline void atomic64_add(s64 i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
- __atomic64_add_const(i, &v->counter);
+ __atomic64_add_const(i, (long *)&v->counter);
return;
}
#endif
- __atomic64_add(i, &v->counter);
+ __atomic64_add(i, (long *)&v->counter);
}
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return __atomic64_cmpxchg(&v->counter, old, new);
+ return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define ATOMIC64_OPS(op) \
-static inline void atomic64_##op(long i, atomic64_t *v) \
+static inline void atomic64_##op(s64 i, atomic64_t *v) \
{ \
- __atomic64_##op(i, &v->counter); \
+ __atomic64_##op(i, (long *)&v->counter); \
} \
-static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+static inline long atomic64_fetch_##op(s64 i, atomic64_t *v) \
{ \
- return __atomic64_##op##_barrier(i, &v->counter); \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
@@ -145,8 +145,8 @@ ATOMIC64_OPS(xor)
#undef ATOMIC64_OPS
-#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
-#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
-#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(s64)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(s64)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(s64)(_i), _v)
#endif /* __ARCH_S390_ATOMIC__ */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index a29dd430fb40..865ce1cb86d5 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *);
extern void ccw_device_wait_idle(struct ccw_device *);
extern int ccw_device_force_console(struct ccw_device *);
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void ccw_device_dma_free(struct ccw_device *cdev,
+ void *cpu_addr, size_t size);
+
int ccw_device_siosl(struct ccw_device *);
extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1727180e8ca1..b5bfb3123cb1 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -7,6 +7,7 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
+#include <linux/genalloc.h>
#include <asm/types.h>
#define LPM_ANYPATH 0xff
@@ -264,6 +265,36 @@ struct ciw {
#define CIW_TYPE_RNI 0x2 /* read node identifier */
/*
+ * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
+ */
+
+#define ND_VALIDITY_VALID 0
+#define ND_VALIDITY_OUTDATED 1
+#define ND_VALIDITY_INVALID 2
+
+struct node_descriptor {
+ /* Flags. */
+ union {
+ struct {
+ u32 validity:3;
+ u32 reserved:5;
+ } __packed;
+ u8 byte0;
+ } __packed;
+
+ /* Node parameters. */
+ u32 params:24;
+
+ /* Node ID. */
+ char type[6];
+ char model[3];
+ char manufacturer[3];
+ char plant[2];
+ char seq[12];
+ u16 tag;
+} __packed;
+
+/*
* Flags used as input parameters for do_IO()
*/
#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */
@@ -328,6 +359,16 @@ static inline u8 pathmask_to_pos(u8 mask)
void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
+extern void *cio_dma_zalloc(size_t size);
+extern void cio_dma_free(void *cpu_addr, size_t size);
+extern struct device *cio_get_dma_css_dev(void);
+
+void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+ size_t size);
+void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
+void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
+struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
+
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 3bda757317cf..0cf6b53587db 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -112,13 +112,8 @@ union ctlreg2 {
};
};
-#ifdef CONFIG_SMP
-# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
-# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
-#else
-# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
-# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
-#endif
+#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index c305d39f5016..310134015541 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -107,13 +107,37 @@ void debug_unregister(debug_info_t *id);
void debug_set_level(debug_info_t *id, int new_level);
void debug_set_critical(void);
+
void debug_stop_all(void);
+/**
+ * debug_level_enabled() - Returns true if debug events for the specified
+ * level would be logged. Otherwise returns false.
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ *
+ * Return:
+ * - %true if level is less or equal to the current debug level.
+ */
static inline bool debug_level_enabled(debug_info_t *id, int level)
{
return level <= id->level;
}
+/**
+ * debug_event() - writes binary debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @data: pointer to data for debug entry
+ * @length: length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_event(debug_info_t *id, int level,
void *data, int length)
{
@@ -122,6 +146,18 @@ static inline debug_entry_t *debug_event(debug_info_t *id, int level,
return debug_event_common(id, level, data, length);
}
+/**
+ * debug_int_event() - writes unsigned integer debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
unsigned int tag)
{
@@ -132,6 +168,18 @@ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
return debug_event_common(id, level, &t, sizeof(unsigned int));
}
+/**
+ * debug_long_event() - writes unsigned long debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
unsigned long tag)
{
@@ -142,6 +190,18 @@ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
return debug_event_common(id, level, &t, sizeof(unsigned long));
}
+/**
+ * debug_text_event() - writes string debug entry in ascii format to active
+ * debug area (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @txt: string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
const char *txt)
{
@@ -152,12 +212,28 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
+/**
+ * debug_sprintf_event() - writes debug entry with format string
+ * and varargs (longs) to active debug area
+ * (if level $<=$ actual debug level).
+ *
+ * @_id: handle for debug log
+ * @_level: debug level
+ * @_fmt: format string for debug entry
+ * @...: varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
#define debug_sprintf_event(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
@@ -172,6 +248,20 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__ret; \
})
+/**
+ * debug_exception() - writes binary debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @data: pointer to data for debug entry
+ * @length: length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
void *data, int length)
{
@@ -180,6 +270,19 @@ static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
return debug_exception_common(id, level, data, length);
}
+/**
+ * debug_int_exception() - writes unsigned int debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
unsigned int tag)
{
@@ -190,6 +293,19 @@ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
return debug_exception_common(id, level, &t, sizeof(unsigned int));
}
+/**
+ * debug_long_exception() - writes long debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
unsigned long tag)
{
@@ -200,6 +316,20 @@ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
return debug_exception_common(id, level, &t, sizeof(unsigned long));
}
+/**
+ * debug_text_exception() - writes string debug entry in ascii format to active
+ * debug area (if level <= actual debug level)
+ * and switches to next debug area
+ * area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @txt: string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
const char *txt)
{
@@ -210,12 +340,30 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
+
+/**
+ * debug_sprintf_exception() - writes debug entry with format string and
+ * varargs (longs) to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area.
+ *
+ * @_id: handle for debug log
+ * @_level: debug level
+ * @_fmt: format string for debug entry
+ * @...: varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
#define debug_sprintf_exception(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
@@ -231,6 +379,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
})
int debug_register_view(debug_info_t *id, struct debug_view *view);
+
int debug_unregister_view(debug_info_t *id, struct debug_view *view);
/*
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index e78cda94456b..68c476b20b57 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -59,6 +59,18 @@ static inline int test_facility(unsigned long nr)
return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
+static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+{
+ register unsigned long reg0 asm("0") = size - 1;
+
+ asm volatile(
+ ".insn s,0xb2b00000,0(%1)" /* stfle */
+ : "+d" (reg0)
+ : "a" (stfle_fac_list)
+ : "memory", "cc");
+ return reg0;
+}
+
/**
* stfle - Store facility list extended
* @stfle_fac_list: array where facility list can be stored
@@ -75,13 +87,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size)
memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
if (S390_lowcore.stfl_fac_list & 0x01000000) {
/* More facility bits available with stfle */
- register unsigned long reg0 asm("0") = size - 1;
-
- asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
- : "+d" (reg0)
- : "a" (stfle_fac_list)
- : "memory", "cc");
- nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
+ nr = __stfle_asm(stfle_fac_list, size);
+ nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
}
memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
}
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 15578fd762f6..6fb7aced104a 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -122,8 +122,7 @@ idal_buffer_alloc(size_t size, int page_order)
nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
- ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
- GFP_DMA | GFP_KERNEL);
+ ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
if (ib == NULL)
return ERR_PTR(-ENOMEM);
ib->size = size;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2b00a3ebee08..4a928e2c667b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -18,6 +18,7 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
+#include <linux/module.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@@ -720,8 +721,14 @@ struct kvm_s390_cpu_model {
unsigned short ibc;
};
+struct kvm_s390_module_hook {
+ int (*hook)(struct kvm_vcpu *vcpu);
+ struct module *owner;
+};
+
struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
+ struct kvm_s390_module_hook *pqap_hook;
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..3eb018508190
--- /dev/null
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_MEM_ENCRYPT_H__
+#define S390_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#define sme_me_mask 0ULL
+
+static inline bool sme_active(void) { return false; }
+extern bool sev_active(void);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* S390_MEM_ENCRYPT_H__ */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd55326..a2399eff84ca 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -194,6 +194,11 @@ int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
#ifdef CONFIG_PCI
+static inline bool zpci_use_mio(struct zpci_dev *zdev)
+{
+ return static_branch_likely(&have_mio) && zdev->mio_capable;
+}
+
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 0095ddb58ff6..50b4ce8cddfd 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -16,7 +16,7 @@
* per cpu area, use weak definitions to force the compiler to
* generate external references.
*/
-#if defined(CONFIG_SMP) && defined(MODULE)
+#if defined(MODULE)
#define ARCH_NEEDS_WEAK_PER_CPU
#endif
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0fcbc37b637..14883b1562e0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -36,6 +36,7 @@
#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
@@ -221,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void)
return cpu_address;
}
-/*
- * Give up the time slice of the virtual PU.
- */
-#define cpu_relax_yield cpu_relax_yield
-void cpu_relax_yield(void);
-
#define cpu_relax() barrier()
#define ECAG_CACHE_ATTRIBUTE 0
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 3907ead27ffa..b157a81fb977 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -9,9 +9,6 @@
#define __ASM_SMP_H
#include <asm/sigp.h>
-
-#ifdef CONFIG_SMP
-
#include <asm/lowcore.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
@@ -40,33 +37,6 @@ extern int smp_cpu_get_polarization(int cpu);
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
-#else /* CONFIG_SMP */
-
-#define smp_cpu_mtid 0
-
-static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
-{
- func(data);
-}
-
-static inline void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- func(data);
-}
-
-static inline void smp_emergency_stop(void)
-{
-}
-
-static inline int smp_find_processor_id(u16 address) { return 0; }
-static inline int smp_store_status(int cpu) { return 0; }
-static inline int smp_vcpu_scheduled(int cpu) { return 1; }
-static inline void smp_yield_cpu(int cpu) { }
-static inline void smp_fill_possible_mask(void) { }
-static inline void smp_detect_cpus(void) { }
-
-#endif /* CONFIG_SMP */
-
static inline void smp_stop_cpu(void)
{
u16 pcpu = stap();
@@ -83,14 +53,9 @@ static inline int smp_get_base_cpu(int cpu)
return cpu - (cpu % (smp_cpu_mtid + 1));
}
-#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
-#else
-static inline int smp_rescan_cpus(void) { return 0; }
-static inline void cpu_die(void) { }
-#endif
#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 0a29588aa00b..c02bff33f6c7 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -20,11 +20,7 @@
extern int spin_retry;
-#ifndef CONFIG_SMP
-static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
-#else
bool arch_vcpu_is_preempted(int cpu);
-#endif
#define vcpu_is_preempted arch_vcpu_is_preempted
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 8c840f0904f3..82703e03f35d 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -32,7 +32,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
: : "a" (opt), "a" (asce) : "cc");
}
-#ifdef CONFIG_SMP
void smp_ptlb_all(void);
/*
@@ -83,22 +82,6 @@ static inline void __tlb_flush_kernel(void)
else
__tlb_flush_global();
}
-#else
-#define __tlb_flush_global() __tlb_flush_local()
-
-/*
- * Flush TLB entries for a specific ASCE on all CPUs.
- */
-static inline void __tlb_flush_mm(struct mm_struct *mm)
-{
- __tlb_flush_local();
-}
-
-static inline void __tlb_flush_kernel(void)
-{
- __tlb_flush_local();
-}
-#endif
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 6eb2ef105d87..d827b5b9a32c 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -79,23 +79,4 @@ static inline void unwind_module_init(struct module *mod, void *orc_ip,
size_t orc_ip_size, void *orc,
size_t orc_size) {}
-#ifdef CONFIG_KASAN
-/*
- * This disables KASAN checking when reading a value from another task's stack,
- * since the other task could be running on another CPU and could have poisoned
- * the stack in the meantime.
- */
-#define READ_ONCE_TASK_STACK(task, x) \
-({ \
- unsigned long val; \
- if (task == current) \
- val = READ_ONCE(x); \
- else \
- val = READ_ONCE_NOCHECK(x); \
- val; \
-})
-#else
-#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
-#endif
-
#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
index 45c9ec984e6b..455da46e3193 100644
--- a/arch/s390/include/uapi/asm/runtime_instr.h
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -57,7 +57,7 @@ struct runtime_instr_cb {
__u64 sf;
__u64 rsic;
__u64 reserved8;
-} __packed __aligned(8);
+} __attribute__((__packed__, __aligned__(8)));
static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
{
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index b0478d01a0c5..0f255b54b051 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -53,6 +53,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
+obj-y += smp.o
extra-y += head64.o vmlinux.lds
@@ -60,7 +61,6 @@ obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
obj-$(CONFIG_AUDIT) += audit.o
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 6f2a193ccccc..38d4bdbc34b9 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -194,7 +194,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -217,7 +217,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 0ebf08c3b35e..6d321f5f101d 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -647,11 +647,23 @@ static int debug_close(struct inode *inode, struct file *file)
return 0; /* success */
}
-/*
- * debug_register_mode:
- * - Creates and initializes debug area for the caller
- * The mode parameter allows to specify access rights for the s390dbf files
- * - Returns handle for debug area
+/**
+ * debug_register_mode() - creates and initializes debug area.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ * @mode: File mode for debugfs files. E.g. S_IRWXUGO
+ * @uid: User ID for debugfs files. Currently only 0 is supported.
+ * @gid: Group ID for debugfs files. Currently only 0 is supported.
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * Must not be called within an interrupt handler.
*/
debug_info_t *debug_register_mode(const char *name, int pages_per_area,
int nr_areas, int buf_size, umode_t mode,
@@ -681,10 +693,21 @@ out:
}
EXPORT_SYMBOL(debug_register_mode);
-/*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+/**
+ * debug_register() - creates and initializes debug area with default file mode.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * The debugfs file mode access permissions are read and write for user.
+ * Must not be called within an interrupt handler.
*/
debug_info_t *debug_register(const char *name, int pages_per_area,
int nr_areas, int buf_size)
@@ -694,9 +717,13 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
}
EXPORT_SYMBOL(debug_register);
-/*
- * debug_unregister:
- * - give back debug area
+/**
+ * debug_unregister() - give back debug area.
+ *
+ * @id: handle for debug log
+ *
+ * Return:
+ * none
*/
void debug_unregister(debug_info_t *id)
{
@@ -745,9 +772,14 @@ out:
return rc;
}
-/*
- * debug_set_level:
- * - set actual debug level
+/**
+ * debug_set_level() - Sets new actual debug level if new_level is valid.
+ *
+ * @id: handle for debug log
+ * @new_level: new debug level
+ *
+ * Return:
+ * none
*/
void debug_set_level(debug_info_t *id, int new_level)
{
@@ -873,6 +905,14 @@ static struct ctl_table s390dbf_dir_table[] = {
static struct ctl_table_header *s390dbf_sysctl_header;
+/**
+ * debug_stop_all() - stops the debug feature if stopping is allowed.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of a kernel oops.
+ */
void debug_stop_all(void)
{
if (debug_stoppable)
@@ -880,6 +920,17 @@ void debug_stop_all(void)
}
EXPORT_SYMBOL(debug_stop_all);
+/**
+ * debug_set_critical() - event/exception functions try lock instead of spin.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of stopping all CPUs but the current one.
+ * Once in this state, functions to write a debug entry for an
+ * event or exception no longer spin on the debug area lock,
+ * but only try to get it and fail if they do not get the lock.
+ */
void debug_set_critical(void)
{
debug_critical = 1;
@@ -1036,8 +1087,16 @@ debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *stri
}
EXPORT_SYMBOL(__debug_sprintf_exception);
-/*
- * debug_register_view:
+/**
+ * debug_register_view() - registers new debug view and creates debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
int debug_register_view(debug_info_t *id, struct debug_view *view)
{
@@ -1077,8 +1136,16 @@ out:
}
EXPORT_SYMBOL(debug_register_view);
-/*
- * debug_unregister_view:
+/**
+ * debug_unregister_view() - unregisters debug view and removes debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
int debug_unregister_view(debug_info_t *id, struct debug_view *view)
{
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index b2c68fbf2634..7abe6ae261b4 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -242,6 +242,7 @@ static const unsigned char formats[][6] = {
[INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
+ [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 },
[INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
[INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
[INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
@@ -306,7 +307,7 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
[INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
- [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 },
+ [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
@@ -326,10 +327,8 @@ static const unsigned char formats[][6] = {
[INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
- [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
- [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 },
[INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
[INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 9e87b68be21c..ac06c3949ab3 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -199,9 +199,7 @@ void die(struct pt_regs *regs, const char *str)
#ifdef CONFIG_PREEMPT
pr_cont("PREEMPT ");
#endif
-#ifdef CONFIG_SMP
pr_cont("SMP ");
-#endif
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
pr_cont("\n");
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3f4d272577d3..270d1d145761 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -986,14 +986,12 @@ ENTRY(psw_idle)
stg %r3,__SF_EMPTY(%r15)
larl %r1,.Lpsw_idle_lpsw+4
stg %r1,__SF_EMPTY+8(%r15)
-#ifdef CONFIG_SMP
larl %r1,smp_cpu_mtid
llgf %r1,0(%r1)
ltgr %r1,%r1
jz .Lpsw_idle_stcctm
.insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
.Lpsw_idle_stcctm:
-#endif
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
BPON
STCK __CLOCK_IDLE_ENTER(%r2)
@@ -1468,7 +1466,6 @@ ENDPROC(cleanup_critical)
mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
1: # calculate idle cycles
-#ifdef CONFIG_SMP
clg %r9,BASED(.Lcleanup_idle_insn)
jl 3f
larl %r1,smp_cpu_mtid
@@ -1486,7 +1483,6 @@ ENDPROC(cleanup_critical)
la %r3,8(%r3)
la %r4,8(%r4)
brct %r1,2b
-#endif
3: # account system time going idle
lg %r9,__LC_STEAL_TIMER
alg %r9,__CLOCK_IDLE_ENTER(%r2)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index 3f10b56bd5a3..ab584e8e3527 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -15,16 +15,11 @@ struct insn {
s32 offset;
} __packed;
-struct insn_args {
- struct jump_entry *entry;
- enum jump_label_type type;
-};
-
static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
{
- /* brcl 0,0 */
+ /* brcl 0,offset */
insn->opcode = 0xc004;
- insn->offset = 0;
+ insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
}
static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
@@ -77,23 +72,15 @@ static void __jump_label_transform(struct jump_entry *entry,
s390_kernel_write(code, &new, sizeof(new));
}
-static int __sm_arch_jump_label_transform(void *data)
+static void __jump_label_sync(void *dummy)
{
- struct insn_args *args = data;
-
- __jump_label_transform(args->entry, args->type, 0);
- return 0;
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct insn_args args;
-
- args.entry = entry;
- args.type = type;
-
- stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
+ __jump_label_transform(entry, type, 0);
+ smp_call_function(__jump_label_sync, NULL, 1);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 8a1ae140c5e2..444a19125a81 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -141,7 +141,6 @@ static noinline void __machine_kdump(void *image)
*/
store_status(__do_machine_kdump, image);
}
-#endif
static unsigned long do_start_kdump(unsigned long addr)
{
@@ -155,6 +154,8 @@ static unsigned long do_start_kdump(unsigned long addr)
return rc;
}
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* Check if kdump checksums are valid: We call purgatory with parameter "0"
*/
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5de13307b703..6ebc2117c66c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -7,6 +7,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/stop_machine.h>
#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
@@ -31,6 +32,7 @@ struct cpu_info {
};
static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
static bool machine_has_cpu_mhz;
@@ -58,15 +60,20 @@ void s390_update_cpu_mhz(void)
on_each_cpu(update_cpu_mhz, NULL, 0);
}
-void notrace cpu_relax_yield(void)
+void notrace stop_machine_yield(const struct cpumask *cpumask)
{
- if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
- diag_stat_inc(DIAG_STAT_X044);
- asm volatile("diag 0,0,0x44");
+ int cpu, this_cpu;
+
+ this_cpu = smp_processor_id();
+ if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+ __this_cpu_write(cpu_relax_retry, 0);
+ cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+ if (cpu >= nr_cpu_ids)
+ return;
+ if (arch_vcpu_is_preempted(cpu))
+ smp_yield_cpu(cpu);
}
- barrier();
}
-EXPORT_SYMBOL(cpu_relax_yield);
/*
* cpu_init - initializes state that is per-CPU.
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f8544d517430..2b94b0ad3588 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -461,11 +461,9 @@ static void __init setup_lowcore_dat_off(void)
mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
-#ifdef CONFIG_SMP
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
-#endif
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
set_prefix((u32)(unsigned long) lc);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 22f08245aa5d..e6fca5498e1f 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -232,7 +232,7 @@ SYSCALL_DEFINE0(sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -256,7 +256,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
load_sigregs();
return regs->gprs[2];
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 35fafa2b91a8..44974654cbd0 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -232,8 +232,6 @@ out:
return -ENOMEM;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
unsigned long async_stack, nodat_stack, lowcore;
@@ -253,8 +251,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
free_pages(lowcore, LC_ORDER);
}
-#endif /* CONFIG_HOTPLUG_CPU */
-
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct lowcore *lc = pcpu->lowcore;
@@ -418,7 +414,7 @@ void smp_yield_cpu(int cpu)
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
: : "d" (pcpu_devices[cpu].address));
- } else if (MACHINE_HAS_DIAG44) {
+ } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
diag_stat_inc_norecursion(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
}
@@ -895,8 +891,6 @@ static int __init _setup_possible_cpus(char *s)
}
early_param("possible_cpus", _setup_possible_cpus);
-#ifdef CONFIG_HOTPLUG_CPU
-
int __cpu_disable(void)
{
unsigned long cregs[16];
@@ -937,8 +931,6 @@ void __noreturn cpu_die(void)
for (;;) ;
}
-#endif /* CONFIG_HOTPLUG_CPU */
-
void __init smp_fill_possible_mask(void)
{
unsigned int possible, sclp_max, cpu;
@@ -996,7 +988,6 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
static ssize_t cpu_configure_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1073,7 +1064,6 @@ out:
return rc ? rc : count;
}
static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
-#endif /* CONFIG_HOTPLUG_CPU */
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1083,9 +1073,7 @@ static ssize_t show_cpu_address(struct device *dev,
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
static struct attribute *cpu_common_attrs[] = {
-#ifdef CONFIG_HOTPLUG_CPU
&dev_attr_configure.attr,
-#endif
&dev_attr_address.attr,
NULL,
};
@@ -1144,15 +1132,11 @@ static int smp_add_present_cpu(int cpu)
out_topology:
sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
out_cpu:
-#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu(c);
-#endif
out:
return rc;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
int __ref smp_rescan_cpus(void)
{
struct sclp_core_info *info;
@@ -1188,17 +1172,14 @@ static ssize_t __ref rescan_store(struct device *dev,
return rc ? rc : count;
}
static DEVICE_ATTR_WO(rescan);
-#endif /* CONFIG_HOTPLUG_CPU */
static int __init s390_smp_init(void)
{
int cpu, rc = 0;
-#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
-#endif
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 19a3c427801a..a7baf0b5f818 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -162,7 +162,6 @@ ENTRY(swsusp_arch_resume)
larl %r1,__swsusp_reset_dma
lg %r1,0(%r1)
BASR_EX %r14,%r1
-#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
jz smt_done
@@ -172,7 +171,6 @@ smt_loop:
brc 8,smt_done /* accepted */
brc 2,smt_loop /* busy, try again */
smt_done:
-#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 82e81a9f7112..164c0282b41a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -45,7 +45,7 @@ int is_valid_bugaddr(unsigned long addr)
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
if (user_mode(regs)) {
- force_sig_fault(si_signo, si_code, get_trap_ip(regs), current);
+ force_sig_fault(si_signo, si_code, get_trap_ip(regs));
report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
@@ -79,7 +79,7 @@ void do_per_trap(struct pt_regs *regs)
if (!current->ptrace)
return;
force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __force __user *) current->thread.per_event.address, current);
+ (void __force __user *) current->thread.per_event.address);
}
NOKPROBE_SYMBOL(do_per_trap);
@@ -165,7 +165,7 @@ void illegal_op(struct pt_regs *regs)
return;
if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
if (current->ptrace)
- force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
else
signal = SIGILL;
#ifdef CONFIG_UPROBES
@@ -229,17 +229,11 @@ void vector_exception(struct pt_regs *regs)
void data_exception(struct pt_regs *regs)
{
- int signal = 0;
-
save_fpu_regs();
if (current->thread.fpu.fpc & FPC_DXC_MASK)
- signal = SIGFPE;
- else
- signal = SIGILL;
- if (signal == SIGFPE)
do_fp_trap(regs, current->thread.fpu.fpc);
- else if (signal)
- do_trap(regs, signal, ILL_ILLOPN, "data exception");
+ else
+ do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
void space_switch_exception(struct pt_regs *regs)
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 57fd4e902f1f..3ce8a0808059 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -46,18 +46,18 @@ bool unwind_next_frame(struct unwind_state *state)
regs = state->regs;
if (unlikely(regs)) {
- sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
if (unlikely(outside_of_stack(state, sp))) {
if (!update_stack_info(state, sp))
goto out_err;
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
} else {
sf = (struct stack_frame *) state->sp;
- sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+ sp = READ_ONCE_NOCHECK(sf->back_chain);
if (likely(sp)) {
/* Non-zero back-chain points to the previous frame */
if (unlikely(outside_of_stack(state, sp))) {
@@ -65,7 +65,7 @@ bool unwind_next_frame(struct unwind_state *state)
goto out_err;
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = true;
} else {
/* No back-chain, look for a pt_regs structure */
@@ -73,9 +73,9 @@ bool unwind_next_frame(struct unwind_state *state)
if (!on_stack(info, sp, sizeof(struct pt_regs)))
goto out_stop;
regs = (struct pt_regs *) sp;
- if (user_mode(regs))
+ if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
goto out_stop;
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
}
}
@@ -132,11 +132,11 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Get the instruction pointer from pt_regs or the stack frame */
if (regs) {
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
} else {
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 28ebd647784c..1c4113f0f2a8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2461,6 +2461,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
+ if (css_general_characteristics.aiv && test_facility(65))
+ set_kvm_facility(kvm->arch.model.fac_mask, 65);
+
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8679bd74d337..ed52ffa8d5d4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -27,6 +27,7 @@
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
+#include <asm/ap.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
@@ -592,6 +593,89 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
}
}
+/*
+ * handle_pqap: Handling pqap interception
+ * @vcpu: the vcpu having issue the pqap instruction
+ *
+ * We now support PQAP/AQIC instructions and we need to correctly
+ * answer the guest even if no dedicated driver's hook is available.
+ *
+ * The intercepting code calls a dedicated callback for this instruction
+ * if a driver did register one in the CRYPTO satellite of the
+ * SIE block.
+ *
+ * If no callback is available, the queues are not available, return this
+ * response code to the caller and set CC to 3.
+ * Else return the response code returned by the callback.
+ */
+static int handle_pqap(struct kvm_vcpu *vcpu)
+{
+ struct ap_queue_status status = {};
+ unsigned long reg0;
+ int ret;
+ uint8_t fc;
+
+ /* Verify that the AP instruction are available */
+ if (!ap_instructions_available())
+ return -EOPNOTSUPP;
+ /* Verify that the guest is allowed to use AP instructions */
+ if (!(vcpu->arch.sie_block->eca & ECA_APIE))
+ return -EOPNOTSUPP;
+ /*
+ * The only possibly intercepted functions when AP instructions are
+ * available for the guest are AQIC and TAPQ with the t bit set
+ * since we do not set IC.3 (FIII) we currently will only intercept
+ * the AQIC function code.
+ */
+ reg0 = vcpu->run->s.regs.gprs[0];
+ fc = (reg0 >> 24) & 0xff;
+ if (WARN_ON_ONCE(fc != 0x03))
+ return -EOPNOTSUPP;
+
+ /* PQAP instruction is allowed for guest kernel only */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ /* Common PQAP instruction specification exceptions */
+ /* bits 41-47 must all be zeros */
+ if (reg0 & 0x007f0000UL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* APFT not install and T bit set */
+ if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* APXA not installed and APID greater 64 or APQI greater 16 */
+ if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* AQIC function code specific exception */
+ /* facility 65 not present for AQIC function code */
+ if (!test_kvm_facility(vcpu->kvm, 65))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /*
+ * Verify that the hook callback is registered, lock the owner
+ * and call the hook.
+ */
+ if (vcpu->kvm->arch.crypto.pqap_hook) {
+ if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
+ return -EOPNOTSUPP;
+ ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
+ module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
+ if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return ret;
+ }
+ /*
+ * A vfio_driver must register a hook.
+ * No hook means no driver to enable the SIE CRYCB and no queues.
+ * We send this response to the guest.
+ */
+ status.response_code = 0x01;
+ memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status));
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+}
+
static int handle_stfl(struct kvm_vcpu *vcpu)
{
int rc;
@@ -878,6 +962,8 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
return handle_sthyi(vcpu);
case 0x7d:
return handle_stsi(vcpu);
+ case 0xaf:
+ return handle_pqap(vcpu);
case 0xb1:
return handle_stfl(vcpu);
case 0xb2:
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 5418d10dc2a8..a1ec63abfb95 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,9 +3,8 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess.o find.o
+lib-y += delay.o string.o uaccess.o find.o spinlock.o
obj-y += mem.o xor.o
-lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index df75d574246d..0ba174f779da 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -248,8 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
report_user_fault(regs, SIGSEGV, 1);
force_sig_fault(SIGSEGV, si_code,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
- current);
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
const struct exception_table_entry *s390_search_extables(unsigned long addr)
@@ -310,8 +309,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
* or user mode.
*/
force_sig_fault(SIGBUS, BUS_ADRERR,
- (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
- current);
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
}
static noinline int signal_return(struct pt_regs *regs)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 14d1eae9fe43..f0bee6af3960 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -18,6 +18,7 @@
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/swiotlb.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
@@ -29,6 +30,7 @@
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -42,6 +44,8 @@
#include <asm/sclp.h>
#include <asm/set_memory.h>
#include <asm/kasan.h>
+#include <asm/dma-mapping.h>
+#include <asm/uv.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@@ -128,6 +132,47 @@ void mark_rodata_ro(void)
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
}
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ int i;
+
+ /* make specified pages unshared, (swiotlb, dma_free) */
+ for (i = 0; i < numpages; ++i) {
+ uv_remove_shared(addr);
+ addr += PAGE_SIZE;
+ }
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ int i;
+ /* make specified pages shared (swiotlb, dma_alloca) */
+ for (i = 0; i < numpages; ++i) {
+ uv_set_shared(addr);
+ addr += PAGE_SIZE;
+ }
+ return 0;
+}
+
+/* are we a protected virtualization guest? */
+bool sev_active(void)
+{
+ return is_prot_virt_guest();
+}
+
+/* protected virtualization */
+static void pv_init(void)
+{
+ if (!is_prot_virt_guest())
+ return;
+
+ /* make sure bounce buffers are shared */
+ swiotlb_init(1);
+ swiotlb_update_mem_attributes();
+ swiotlb_force = SWIOTLB_FORCE;
+}
+
void __init mem_init(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
@@ -136,6 +181,8 @@ void __init mem_init(void)
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ pv_init();
+
/* Setup guest page hinting */
cmma_init();
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 818deeb1ebc3..1864a8bb9622 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -52,21 +52,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
* Therefore we have a read-modify-write sequence: the function reads eight
* bytes from destination at an eight byte boundary, modifies the bytes
* requested and writes the result back in a loop.
- *
- * Note: this means that this function may not be called concurrently on
- * several cpus with overlapping words, since this may potentially
- * cause data corruption.
*/
+static DEFINE_SPINLOCK(s390_kernel_write_lock);
+
void notrace s390_kernel_write(void *dst, const void *src, size_t size)
{
+ unsigned long flags;
long copied;
+ spin_lock_irqsave(&s390_kernel_write_lock, flags);
while (size) {
copied = s390_kernel_write_odd(dst, src, size);
dst += copied;
src += copied;
size -= copied;
}
+ spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
}
static int __memcpy_real(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 687f2a4d3459..cbc718ba6d78 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -24,8 +24,6 @@ static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
return 0;
- if (current->personality & ADDR_NO_RANDOMIZE)
- return 0;
return STACK_RND_MASK << PAGE_SHIFT;
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 86ca7f88fb22..b8a64cbb5dea 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -421,12 +421,12 @@ static void zpci_map_resources(struct pci_dev *pdev)
if (!len)
continue;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
pdev->resource[i].start =
(resource_size_t __force) zdev->bars[i].mio_wb;
else
- pdev->resource[i].start =
- (resource_size_t __force) pci_iomap(pdev, i, 0);
+ pdev->resource[i].start = (resource_size_t __force)
+ pci_iomap_range_fh(pdev, i, 0, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
@@ -444,18 +444,19 @@ static void zpci_map_resources(struct pci_dev *pdev)
static void zpci_unmap_resources(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
return;
for (i = 0; i < PCI_BAR_COUNT; i++) {
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pci_iounmap(pdev, (void __iomem __force *)
- pdev->resource[i].start);
+ pci_iounmap_fh(pdev, (void __iomem __force *)
+ pdev->resource[i].start);
}
}
@@ -528,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
if (zdev->bars[i].val & 4)
flags |= IORESOURCE_MEM_64;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
addr = (unsigned long) zdev->bars[i].mio_wb;
else
addr = ZPCI_ADDR(entry);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d03631dba7c2..9bdff4defef1 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -291,7 +291,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
goto out;
zdev->fh = fh;
- if (zdev->mio_capable) {
+ if (zpci_use_mio(zdev)) {
rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
if (rc)
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 6b48ca7760a7..3408c0df3ebf 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m)
int i;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
- seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i],
+ seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
}
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index e9e66f178a6d..04a03433c720 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,2 +1,3 @@
-kexec-purgatory.c
+purgatory
+purgatory.lds
purgatory.ro
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index 2342b84b3386..b5e35e8f999a 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -6,7 +6,6 @@
kapi := arch/$(ARCH)/include/generated/asm
kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
-targets += $(addprefix ../../../,$(kapi-hdrs-y))
PHONY += kapi
kapi: $(kapi-hdrs-y)
@@ -14,11 +13,7 @@ kapi: $(kapi-hdrs-y)
hostprogs-y += gen_facilities
hostprogs-y += gen_opcode_table
-HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE)
-HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE)
-
-# Ensure output directory exists
-_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)
filechk_facility-defs.h = $(obj)/gen_facilities
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 64638b764d1c..46d8ed96cf06 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -520,6 +520,9 @@ b92e km RRE_RR
b92f kmc RRE_RR
b930 cgfr RRE_RR
b931 clgfr RRE_RR
+b938 sortl RRE_RR
+b939 dfltcc RRF_R0RR2
+b93a kdsa RRE_RR
b93c ppno RRE_RR
b93e kimd RRE_RR
b93f klmd RRE_RR
@@ -538,8 +541,16 @@ b95a cxlgtr RRF_UUFR
b95b cxlftr RRF_UUFR
b960 cgrt RRF_U0RR
b961 clgrt RRF_U0RR
+b964 nngrk RRF_R0RR2
+b965 ocgrk RRF_R0RR2
+b966 nogrk RRF_R0RR2
+b967 nxgrk RRF_R0RR2
b972 crt RRF_U0RR
b973 clrt RRF_U0RR
+b974 nnrk RRF_R0RR2
+b975 ocrk RRF_R0RR2
+b976 nork RRF_R0RR2
+b977 nxrk RRF_R0RR2
b980 ngr RRE_RR
b981 ogr RRE_RR
b982 xgr RRE_RR
@@ -573,6 +584,7 @@ b99f ssair RRE_R0
b9a0 clp RRF_U0RR
b9a1 tpei RRE_RR
b9a2 ptf RRE_R0
+b9a4 uvc RRF_URR
b9aa lptea RRF_RURR2
b9ab essa RRF_U0RR
b9ac irbm RRE_RR
@@ -585,6 +597,7 @@ b9b3 cu42 RRE_RR
b9bd trtre RRF_U0RR
b9be srstu RRE_RR
b9bf trte RRF_U0RR
+b9c0 selhhhr RRF_RURR
b9c8 ahhhr RRF_R0RR2
b9c9 shhhr RRF_R0RR2
b9ca alhhhr RRF_R0RR2
@@ -594,6 +607,9 @@ b9cf clhhr RRE_RR
b9d0 pcistg RRE_RR
b9d2 pcilg RRE_RR
b9d3 rpcit RRE_RR
+b9d4 pcistgi RRE_RR
+b9d5 pciwb RRE_00
+b9d6 pcilgi RRE_RR
b9d8 ahhlr RRF_R0RR2
b9d9 shhlr RRF_R0RR2
b9da alhhlr RRF_R0RR2
@@ -601,9 +617,11 @@ b9db slhhlr RRF_R0RR2
b9dd chlr RRE_RR
b9df clhlr RRE_RR
b9e0 locfhr RRF_U0RR
-b9e1 popcnt RRE_RR
+b9e1 popcnt RRF_U0RR
b9e2 locgr RRF_U0RR
+b9e3 selgr RRF_RURR
b9e4 ngrk RRF_R0RR2
+b9e5 ncgrk RRF_R0RR2
b9e6 ogrk RRF_R0RR2
b9e7 xgrk RRF_R0RR2
b9e8 agrk RRF_R0RR2
@@ -612,8 +630,10 @@ b9ea algrk RRF_R0RR2
b9eb slgrk RRF_R0RR2
b9ec mgrk RRF_R0RR2
b9ed msgrkc RRF_R0RR2
+b9f0 selr RRF_RURR
b9f2 locr RRF_U0RR
b9f4 nrk RRF_R0RR2
+b9f5 ncrk RRF_R0RR2
b9f6 ork RRF_R0RR2
b9f7 xrk RRF_R0RR2
b9f8 ark RRF_R0RR2
@@ -822,6 +842,7 @@ e3d4 stpcifc RXY_RRRD
e500 lasp SSE_RDRD
e501 tprot SSE_RDRD
e502 strag SSE_RDRD
+e50a mvcrl SSE_RDRD
e50e mvcsk SSE_RDRD
e50f mvcdk SSE_RDRD
e544 mvhhi SIL_RDI
@@ -835,6 +856,18 @@ e55c chsi SIL_RDI
e55d clfhsi SIL_RDU
e560 tbegin SIL_RDU
e561 tbeginc SIL_RDU
+e601 vlebrh VRX_VRRDU
+e602 vlebrg VRX_VRRDU
+e603 vlebrf VRX_VRRDU
+e604 vllebrz VRX_VRRDU
+e605 vlbrrep VRX_VRRDU
+e606 vlbr VRX_VRRDU
+e607 vler VRX_VRRDU
+e609 vstebrh VRX_VRRDU
+e60a vstebrg VRX_VRRDU
+e60b vstebrf VRX_VRRDU
+e60e vstbr VRX_VRRDU
+e60f vster VRX_VRRDU
e634 vpkz VSI_URDV
e635 vlrl VSI_URDV
e637 vlrlr VRS_RRDV
@@ -842,8 +875,8 @@ e63c vupkz VSI_URDV
e63d vstrl VSI_URDV
e63f vstrlr VRS_RRDV
e649 vlip VRI_V0UU2
-e650 vcvb VRR_RV0U
-e652 vcvbg VRR_RV0U
+e650 vcvb VRR_RV0UU
+e652 vcvbg VRR_RV0UU
e658 vcvd VRI_VR0UU
e659 vsrp VRI_VVUUU2
e65a vcvdg VRI_VR0UU
@@ -863,13 +896,13 @@ e702 vleg VRX_VRRDU
e703 vlef VRX_VRRDU
e704 vllez VRX_VRRDU
e705 vlrep VRX_VRRDU
-e706 vl VRX_VRRD
+e706 vl VRX_VRRDU
e707 vlbb VRX_VRRDU
e708 vsteb VRX_VRRDU
e709 vsteh VRX_VRRDU
e70a vsteg VRX_VRRDU
e70b vstef VRX_VRRDU
-e70e vst VRX_VRRD
+e70e vst VRX_VRRDU
e712 vgeg VRV_VVXRDU
e713 vgef VRV_VVXRDU
e71a vsceg VRV_VVXRDU
@@ -879,11 +912,11 @@ e722 vlvg VRS_VRRDU
e727 lcbb RXE_RRRDU
e730 vesl VRS_VVRDU
e733 verll VRS_VVRDU
-e736 vlm VRS_VVRD
+e736 vlm VRS_VVRDU
e737 vll VRS_VRRD
e738 vesrl VRS_VVRDU
e73a vesra VRS_VVRDU
-e73e vstm VRS_VVRD
+e73e vstm VRS_VVRDU
e73f vstl VRS_VRRD
e740 vleib VRI_V0IU
e741 vleih VRI_V0IU
@@ -932,7 +965,10 @@ e781 vfene VRR_VVV0U0U
e782 vfae VRR_VVV0U0U
e784 vpdi VRR_VVV0U
e785 vbperm VRR_VVV
+e786 vsld VRI_VVV0U
+e787 vsrd VRI_VVV0U
e78a vstrc VRR_VVVUU0V
+e78b vstrs VRR_VVVUU0V
e78c vperm VRR_VVV0V
e78d vsel VRR_VVV0V
e78e vfms VRR_VVVU0UV
@@ -1060,6 +1096,7 @@ eb9b stamy RSY_AARD
ebc0 tp RSL_R0RD
ebd0 pcistb RSY_RRRD
ebd1 sic RSY_RRRD
+ebd4 pcistbi RSY_RRRD
ebdc srak RSY_RRRD
ebdd slak RSY_RRRD
ebde srlk RSY_RRRD
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index 74b48db86dd7..0bcff11a4843 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -568,5 +568,5 @@ BUILD_TRAP_HANDLER(fpu_error)
return;
}
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 1ff56e5ba990..03ffd8cdf542 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -421,5 +421,5 @@ BUILD_TRAP_HANDLER(fpu_error)
}
}
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c
index 9218d9ed787e..3966b5ee8e93 100644
--- a/arch/sh/kernel/cpu/sh5/fpu.c
+++ b/arch/sh/kernel/cpu/sh5/fpu.c
@@ -100,9 +100,7 @@ void restore_fpu(struct task_struct *tsk)
asmlinkage void do_fpu_error(unsigned long ex, struct pt_regs *regs)
{
- struct task_struct *tsk = current;
-
regs->pc += 4;
- force_sig(SIGFPE, tsk);
+ force_sig(SIGFPE);
}
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index bc96b16288c1..3bd010b4c55f 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -338,7 +338,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
/* Deliver the signal to userspace */
if (!arch_check_bp_in_kernelspace(&bp->hw.info)) {
force_sig_fault(SIGTRAP, TRAP_HWBKPT,
- (void __user *)NULL, current);
+ (void __user *)NULL);
}
rcu_read_unlock();
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 3390349ff976..11085e48eaa6 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -550,7 +550,7 @@ asmlinkage void do_single_step(unsigned long long vec, struct pt_regs *regs)
continually stepping. */
local_irq_enable();
regs->sr &= ~SR_SSTEP;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
/* Called with interrupts disabled */
@@ -561,7 +561,7 @@ BUILD_TRAP_HANDLER(breakpoint)
/* We need to forward step the PC, to counteract the backstep done
in signal.c. */
local_irq_enable();
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
regs->pc += 4;
}
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 2a2121ba8ebe..24473fa6c3b6 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -176,7 +176,7 @@ asmlinkage int sys_sigreturn(void)
return r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -207,7 +207,7 @@ asmlinkage int sys_rt_sigreturn(void)
return r0;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index f1f1598879c2..b9aaa9266b34 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -277,7 +277,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
return (int) ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -311,7 +311,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
return (int) ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 8b49cced663d..63cf17bc760d 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -141,7 +141,7 @@ BUILD_TRAP_HANDLER(debug)
SIGTRAP) == NOTIFY_STOP)
return;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
/*
@@ -167,7 +167,7 @@ BUILD_TRAP_HANDLER(bug)
}
#endif
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
BUILD_TRAP_HANDLER(nmi)
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index f2a18b5fafd8..058c6181bb30 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -533,7 +533,7 @@ uspace_segv:
"access (PC %lx PR %lx)\n", current->comm, regs->pc,
regs->pr);
- force_sig_fault(SIGBUS, si_code, (void __user *)address, current);
+ force_sig_fault(SIGBUS, si_code, (void __user *)address);
} else {
inc_unaligned_kernel_access();
@@ -603,7 +603,7 @@ asmlinkage void do_divide_error(unsigned long r4)
/* Let gcc know unhandled cases don't make it past here */
return;
}
- force_sig_fault(SIGFPE, code, NULL, current);
+ force_sig_fault(SIGFPE, code, NULL);
}
#endif
@@ -611,7 +611,6 @@ asmlinkage void do_reserved_inst(void)
{
struct pt_regs *regs = current_pt_regs();
unsigned long error_code;
- struct task_struct *tsk = current;
#ifdef CONFIG_SH_FPU_EMU
unsigned short inst = 0;
@@ -633,7 +632,7 @@ asmlinkage void do_reserved_inst(void)
/* Enable DSP mode, and restart instruction. */
regs->sr |= SR_DSP;
/* Save DSP mode */
- tsk->thread.dsp_status.status |= SR_DSP;
+ current->thread.dsp_status.status |= SR_DSP;
return;
}
#endif
@@ -641,7 +640,7 @@ asmlinkage void do_reserved_inst(void)
error_code = lookup_exception_vector();
local_irq_enable();
- force_sig(SIGILL, tsk);
+ force_sig(SIGILL);
die_if_no_fixup("reserved instruction", regs, error_code);
}
@@ -697,7 +696,6 @@ asmlinkage void do_illegal_slot_inst(void)
{
struct pt_regs *regs = current_pt_regs();
unsigned long inst;
- struct task_struct *tsk = current;
if (kprobe_handle_illslot(regs->pc) == 0)
return;
@@ -716,7 +714,7 @@ asmlinkage void do_illegal_slot_inst(void)
inst = lookup_exception_vector();
local_irq_enable();
- force_sig(SIGILL, tsk);
+ force_sig(SIGILL);
die_if_no_fixup("illegal slot instruction", regs, inst);
}
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 8ce90a7da67d..37046f3a26d3 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -599,7 +599,7 @@ static void do_unhandled_exception(int signr, char *str, unsigned long error,
struct pt_regs *regs)
{
if (user_mode(regs))
- force_sig(signr, current);
+ force_sig(signr);
die_if_no_fixup(str, regs, error);
}
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index a0fa8fc88739..e8be0eca0444 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -560,7 +560,7 @@ static int ieee_fpe_handler(struct pt_regs *regs)
task_thread_info(tsk)->status |= TS_USEDFPU;
} else {
force_sig_fault(SIGFPE, FPE_FLTINV,
- (void __user *)regs->pc, tsk);
+ (void __user *)regs->pc);
}
regs->pc = nextpc;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6defd2c6d9b1..3093bc372138 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -39,10 +39,9 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap)
}
static void
-force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk)
+force_sig_info_fault(int si_signo, int si_code, unsigned long address)
{
- force_sig_fault(si_signo, si_code, (void __user *)address, tsk);
+ force_sig_fault(si_signo, si_code, (void __user *)address);
}
/*
@@ -244,8 +243,6 @@ static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, int si_code)
{
- struct task_struct *tsk = current;
-
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
/*
@@ -253,7 +250,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
*/
local_irq_enable();
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address);
return;
}
@@ -308,7 +305,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
if (!user_mode(regs))
no_context(regs, error_code, address);
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address);
}
static noinline int
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 6963482c81d8..b60448397d4f 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -23,15 +23,15 @@
#define ATOMIC_OP(op) \
void atomic_##op(int, atomic_t *); \
-void atomic64_##op(long, atomic64_t *);
+void atomic64_##op(s64, atomic64_t *);
#define ATOMIC_OP_RETURN(op) \
int atomic_##op##_return(int, atomic_t *); \
-long atomic64_##op##_return(long, atomic64_t *);
+s64 atomic64_##op##_return(s64, atomic64_t *);
#define ATOMIC_FETCH_OP(op) \
int atomic_fetch_##op(int, atomic_t *); \
-long atomic64_fetch_##op(long, atomic64_t *);
+s64 atomic64_fetch_##op(s64, atomic64_t *);
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
@@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-long atomic64_dec_if_positive(atomic64_t *v);
+s64 atomic64_dec_if_positive(atomic64_t *v);
#define atomic64_dec_if_positive atomic64_dec_if_positive
#endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 59eaf6227af1..4282116e28e7 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -519,7 +519,7 @@ void synchronize_user_stack(void)
static void stack_unaligned(unsigned long sp)
{
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0);
}
static const char uwfault32[] = KERN_INFO \
@@ -570,7 +570,7 @@ void fault_in_user_windows(struct pt_regs *regs)
barf:
set_thread_wsaved(window + 1);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage long sparc_do_fork(unsigned long clone_flags,
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index e800ce13cc6e..a237810aa9f4 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -170,7 +170,7 @@ void do_sigreturn32(struct pt_regs *regs)
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
@@ -256,7 +256,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
@@ -375,7 +375,7 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
@@ -509,7 +509,7 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 83953780ca01..42c3de313fd6 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -137,7 +137,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
return;
segv_and_exit:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
@@ -196,7 +196,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ca70787efd8e..69ae814b7e90 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -134,7 +134,7 @@ out:
exception_exit(prev_state);
return;
do_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -228,7 +228,7 @@ out:
exception_exit(prev_state);
return;
do_sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
goto out;
}
@@ -320,7 +320,7 @@ void do_rt_sigreturn(struct pt_regs *regs)
set_current_blocked(&set);
return;
segv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
@@ -374,7 +374,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n",
current->comm, current->pid, (unsigned long)sf,
regs->tpc, regs->u_regs[UREG_I7]);
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
return -EINVAL;
}
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 452e4d080855..be77538bc038 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -151,7 +151,7 @@ sparc_breakpoint (struct pt_regs *regs)
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc);
#endif
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9825ca6a6020..ccc88926bc00 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -511,7 +511,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0);
#ifdef DEBUG_SPARC_BREAKPOINT
printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
#endif
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index bcdfc6168dd5..4ceecad556a9 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -103,7 +103,7 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
die_if_kernel("Kernel bad trap", regs);
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)regs->pc, type - 0x80, current);
+ (void __user *)regs->pc, type - 0x80);
}
void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
@@ -327,7 +327,7 @@ void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc
printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n",
pc, npc, psr);
#endif
- force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0);
}
void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc,
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 04aa588d5dd1..27778b65a965 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -108,7 +108,7 @@ void bad_trap(struct pt_regs *regs, long lvl)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGILL, ILL_ILLTRP,
- (void __user *)regs->tpc, lvl, current);
+ (void __user *)regs->tpc, lvl);
}
void bad_trap_tl1(struct pt_regs *regs, long lvl)
@@ -202,7 +202,7 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGSEGV, SEGV_MAPERR,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -237,7 +237,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0);
}
void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
@@ -322,7 +322,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0);
out:
exception_exit(prev_state);
}
@@ -386,16 +386,13 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig
*/
switch (type) {
case HV_FAULT_TYPE_INV_ASI:
- force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0);
break;
case HV_FAULT_TYPE_MCD_DIS:
- force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0);
break;
default:
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0,
- current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0);
break;
}
}
@@ -572,7 +569,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current);
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0);
}
void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
@@ -2074,7 +2071,7 @@ void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent)
* code
*/
force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr,
- 0, current);
+ 0);
}
/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
@@ -2182,13 +2179,13 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
addr += PAGE_SIZE;
}
}
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return true;
}
if (attrs & SUN4V_ERR_ATTRS_PIO) {
force_sig_fault(SIGBUS, BUS_ADRERR,
- (void __user *)sun4v_get_vaddr(regs), 0, current);
+ (void __user *)sun4v_get_vaddr(regs), 0);
return true;
}
@@ -2345,7 +2342,7 @@ static void do_fpe_common(struct pt_regs *regs)
code = FPE_FLTRES;
}
force_sig_fault(SIGFPE, code,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
}
}
@@ -2400,7 +2397,7 @@ void do_tof(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGEMT, EMT_TAGOVF,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -2420,7 +2417,7 @@ void do_div0(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGFPE, FPE_INTDIV,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
@@ -2616,7 +2613,7 @@ void do_illegal_instruction(struct pt_regs *regs)
}
}
}
- force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
+ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0);
out:
exception_exit(prev_state);
}
@@ -2636,7 +2633,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0);
out:
exception_exit(prev_state);
}
@@ -2654,7 +2651,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c
if (is_no_fault_exception(regs))
return;
- force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0);
}
/* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI
@@ -2701,7 +2698,7 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr,
regs->tpc &= 0xffffffff;
regs->tnpc &= 0xffffffff;
}
- force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current);
+ force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0);
}
void do_privop(struct pt_regs *regs)
@@ -2717,7 +2714,7 @@ void do_privop(struct pt_regs *regs)
regs->tnpc &= 0xffffffff;
}
force_sig_fault(SIGILL, ILL_PRVOPC,
- (void __user *)regs->tpc, 0, current);
+ (void __user *)regs->tpc, 0);
out:
exception_exit(prev_state);
}
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index b0440b0edd97..8d69de111470 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -131,7 +131,7 @@ static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
show_signal_msg(regs, sig, code,
addr, current);
- force_sig_fault(sig, code, (void __user *) addr, 0, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0);
}
static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
@@ -425,7 +425,7 @@ do_sigbus:
static void check_stack_aligned(unsigned long sp)
{
if (sp & 0x7UL)
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
void window_overflow_fault(void)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 8f8a604c1300..83fda4d9c3b2 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -187,7 +187,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, sig, code, addr, current);
- force_sig_fault(sig, code, (void __user *) addr, 0, current);
+ force_sig_fault(sig, code, (void __user *) addr, 0);
}
static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index a43d42bf0a86..783b9247161f 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -32,7 +32,7 @@ void flush_thread(void)
if (ret) {
printk(KERN_ERR "flush_thread - clearing address space failed, "
"err = %d\n", ret);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp);
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 5f47422401e1..da1e96b1ec3e 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -112,13 +112,12 @@ long arch_ptrace(struct task_struct *child, long request,
return ret;
}
-static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
- int error_code)
+static void send_sigtrap(struct uml_pt_regs *regs, int error_code)
{
/* Send us the fake SIGTRAP */
force_sig_fault(SIGTRAP, TRAP_BRKPT,
/* User-mode eip? */
- UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk);
+ UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL);
}
/*
@@ -147,7 +146,7 @@ void syscall_trace_leave(struct pt_regs *regs)
/* Fake a debug trap */
if (ptraced & PT_DTRACE)
- send_sigtrap(current, &regs->regs, 0);
+ send_sigtrap(&regs->regs, 0);
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return;
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 7a1f2a936fd1..29e7f5f9f188 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -119,7 +119,7 @@ void uml_setup_stubs(struct mm_struct *mm)
return;
out:
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
}
void arch_exit_mmap(struct mm_struct *mm)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 8347161c2ae0..45f739bf302f 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -329,7 +329,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
"process: %d\n", task_tgid_vnr(current));
/* We are under mmap_sem, release it such that current can terminate */
up_write(&current->mm->mmap_sem);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
do_signal(&current->thread.regs);
}
}
@@ -487,7 +487,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
kill:
printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0e8b6158f224..58fe36856182 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -163,13 +163,12 @@ static void show_segv_info(struct uml_pt_regs *regs)
static void bad_segv(struct faultinfo fi, unsigned long ip)
{
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi),
- current);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi));
}
void fatal_sigsegv(void)
{
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
do_signal(&current->thread.regs);
/*
* This is to tell gcc that we're not returning - do_signal
@@ -268,13 +267,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (err == -EACCES) {
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address,
- current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
} else {
BUG_ON(err != -EFAULT);
current->thread.arch.faultinfo = fi;
- force_sig_fault(SIGSEGV, si_code, (void __user *) address,
- current);
+ force_sig_fault(SIGSEGV, si_code, (void __user *) address);
}
out:
@@ -304,12 +301,11 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) {
struct faultinfo *fi = UPT_FAULTINFO(regs);
current->thread.arch.faultinfo = *fi;
- force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi),
- current);
+ force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi));
} else {
printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
sig, code, err);
- force_sig(sig, current);
+ force_sig(sig);
}
}
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index e62f82bd1339..3946182a835d 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -126,7 +126,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs)
return regs->UCreg_00;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -383,7 +383,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
regs->UCreg_pc = KERN_RESTART_CODE;
} else {
regs->UCreg_sp += 4;
- force_sigsegv(0, current);
+ force_sigsegv(0);
}
}
if (regs->UCreg_00 == -ERESTARTNOHAND ||
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 1c1f0ce20e19..e24f67283864 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -245,7 +245,7 @@ void uc32_notify_die(const char *str, struct pt_regs *regs,
current->thread.error_code = err;
current->thread.trap_no = trap;
- force_sig_fault(sig, code, addr, current);
+ force_sig_fault(sig, code, addr);
} else
die(str, regs, err);
}
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index 33e0d8a267e8..76342de9cf8c 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -113,14 +113,15 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
* Something tried to access memory that isn't in our memory map..
* User mode accesses just cause a SIGSEGV
*/
-static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
- unsigned int fsr, unsigned int sig, int code,
- struct pt_regs *regs)
+static void __do_user_fault(unsigned long addr, unsigned int fsr,
+ unsigned int sig, int code, struct pt_regs *regs)
{
+ struct task_struct *tsk = current;
+
tsk->thread.address = addr;
tsk->thread.error_code = fsr;
tsk->thread.trap_no = 14;
- force_sig_fault(sig, code, (void __user *)addr, tsk);
+ force_sig_fault(sig, code, (void __user *)addr);
}
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
@@ -133,7 +134,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* have no context to handle this fault with.
*/
if (user_mode(regs))
- __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+ __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
else
__do_kernel_fault(mm, addr, fsr, regs);
}
@@ -307,7 +308,7 @@ retry:
code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, fsr, sig, code, regs);
+ __do_user_fault(addr, fsr, sig, code, regs);
return 0;
no_context:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..5a72c98e60bc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -17,6 +17,7 @@ config X86_32
select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+ select GENERIC_VDSO_32
config X86_64
def_bool y
@@ -121,6 +122,7 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
@@ -202,6 +204,7 @@ config X86
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
+ select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
@@ -217,6 +220,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
+ select PROC_PID_ARCH_STATUS if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -781,6 +785,9 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
+config X86_HV_CALLBACK_VECTOR
+ def_bool n
+
source "arch/x86/xen/Kconfig"
config KVM_GUEST
@@ -832,6 +839,17 @@ config JAILHOUSE_GUEST
cell. You can leave this option disabled if you only want to start
Jailhouse and run Linux afterwards in the root cell.
+config ACRN_GUEST
+ bool "ACRN Guest support"
+ depends on X86_64
+ select X86_HV_CALLBACK_VECTOR
+ help
+ This option allows to run Linux as guest in the ACRN hypervisor. ACRN is
+ a flexible, lightweight reference open-source hypervisor, built with
+ real-time and safety-criticality in mind. It is built for embedded
+ IOT with small footprint and real-time features. More details can be
+ found in https://projectacrn.org/.
+
endif #HYPERVISOR_GUEST
source "arch/x86/Kconfig.cpu"
@@ -2285,7 +2303,7 @@ config COMPAT_VDSO
choice
prompt "vsyscall table for legacy applications"
depends on X86_64
- default LEGACY_VSYSCALL_EMULATE
+ default LEGACY_VSYSCALL_XONLY
help
Legacy user code that does not know how to find the vDSO expects
to be able to issue three syscalls by calling fixed addresses in
@@ -2293,23 +2311,38 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[emulate|none].
+ line parameter vsyscall=[emulate|xonly|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
to improve security.
- If unsure, select "Emulate".
+ If unsure, select "Emulate execution only".
config LEGACY_VSYSCALL_EMULATE
- bool "Emulate"
+ bool "Full emulation"
+ help
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping. This makes the mapping non-executable, but
+ it still contains readable known contents, which could be
+ used in certain rare security vulnerability exploits. This
+ configuration is recommended when using legacy userspace
+ that still uses vsyscalls along with legacy binary
+ instrumentation tools that require code to be readable.
+
+ An example of this type of legacy userspace is running
+ Pin on an old binary that still uses vsyscalls.
+
+ config LEGACY_VSYSCALL_XONLY
+ bool "Emulate execution only"
help
- The kernel traps and emulates calls into the fixed
- vsyscall address mapping. This makes the mapping
- non-executable, but it still contains known contents,
- which could be used in certain rare security vulnerability
- exploits. This configuration is recommended when userspace
- still uses the vsyscall area.
+ The kernel traps and emulates calls into the fixed vsyscall
+ address mapping and does not allow reads. This
+ configuration is recommended when userspace might use the
+ legacy vsyscall area but support for legacy binary
+ instrumentation of legacy code is not needed. It mitigates
+ certain uses of the vsyscall area as an ASLR-bypassing
+ buffer.
config LEGACY_VSYSCALL_NONE
bool "None"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6adce15268bd..8e29c991ba3e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -480,3 +480,16 @@ config CPU_SUP_UMC_32
CPU might render the kernel unbootable.
If unsure, say N.
+
+config CPU_SUP_ZHAOXIN
+ default y
+ bool "Support Zhaoxin processors" if PROCESSOR_SELECT
+ help
+ This enables detection, tunings and quirks for Zhaoxin processors
+
+ You need this enabled if you want your kernel to run on a
+ Zhaoxin CPU. Disabling this option on other types of CPUs
+ makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin
+ CPU might render the kernel unbootable.
+
+ If unsure, say N.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index f730680dc818..6791a3c97589 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -179,26 +179,6 @@ config X86_DECODER_SELFTEST
decoder code.
If unsure, say "N".
-#
-# IO delay types:
-#
-
-config IO_DELAY_TYPE_0X80
- int
- default "0"
-
-config IO_DELAY_TYPE_0XED
- int
- default "1"
-
-config IO_DELAY_TYPE_UDELAY
- int
- default "2"
-
-config IO_DELAY_TYPE_NONE
- int
- default "3"
-
choice
prompt "IO delay type"
default IO_DELAY_0X80
@@ -229,30 +209,6 @@ config IO_DELAY_NONE
endchoice
-if IO_DELAY_0X80
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0X80
-endif
-
-if IO_DELAY_0XED
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_0XED
-endif
-
-if IO_DELAY_UDELAY
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_UDELAY
-endif
-
-if IO_DELAY_NONE
-config DEFAULT_IO_DELAY_TYPE
- int
- default IO_DELAY_TYPE_NONE
-endif
-
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2b2481acc661..59ce9ed58430 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -130,7 +130,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index e8829abf063a..d0a5ffeae8df 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -129,7 +129,6 @@ CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_MAC80211_LEDS=y
CONFIG_RFKILL=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEBUG_DEVRES=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e9b866e87d48..73c0ccb009a0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -371,20 +371,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
}
}
-static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_enc(ctx, dst, src);
-}
-
-static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
- aesni_dec(ctx, dst, src);
-}
-
static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int len)
{
@@ -920,7 +906,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
}
#endif
-static struct crypto_alg aesni_algs[] = { {
+static struct crypto_alg aesni_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
.cra_priority = 300,
@@ -937,24 +923,7 @@ static struct crypto_alg aesni_algs[] = { {
.cia_decrypt = aes_decrypt
}
}
-}, {
- .cra_name = "__aes",
- .cra_driver_name = "__aes-aesni",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = aes_set_key,
- .cia_encrypt = __aes_encrypt,
- .cia_decrypt = __aes_decrypt
- }
- }
-} };
+};
static struct skcipher_alg aesni_skciphers[] = {
{
@@ -1150,7 +1119,7 @@ static int __init aesni_init(void)
#endif
#endif
- err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ err = crypto_register_alg(&aesni_cipher_alg);
if (err)
return err;
@@ -1158,7 +1127,7 @@ static int __init aesni_init(void)
ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
if (err)
- goto unregister_algs;
+ goto unregister_cipher;
err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
aesni_simd_aeads);
@@ -1170,8 +1139,8 @@ static int __init aesni_init(void)
unregister_skciphers:
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
-unregister_algs:
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+unregister_cipher:
+ crypto_unregister_alg(&aesni_cipher_alg);
return err;
}
@@ -1181,7 +1150,7 @@ static void __exit aesni_exit(void)
aesni_simd_aeads);
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
- crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+ crypto_unregister_alg(&aesni_cipher_alg);
}
late_initcall(aesni_init);
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 1ce0019c059c..388f95a4ec24 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -124,7 +124,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_simd_stream_xor(struct skcipher_walk *walk,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
int next_yield = 4096; /* bytes until next FPU yield */
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..9f1f9e3b8230 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
- * the original rbp.
- */
-.macro ENCODE_FRAME_POINTER ptregs_offset=0
-#ifdef CONFIG_FRAME_POINTER
- leaq 1+\ptregs_offset(%rsp), %rbp
-#endif
-.endm
-
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 2418804e66b4..536b574b6161 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs)
struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
- bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ work = READ_ONCE(ti->flags);
- if (unlikely(work & _TIF_SYSCALL_EMU))
- emulated = true;
-
- if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- return -1L;
-
- if (emulated)
- return -1L;
+ if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
+ ret = tracehook_report_syscall_entry(regs);
+ if (ret || (work & _TIF_SYSCALL_EMU))
+ return -1L;
+ }
#ifdef CONFIG_SECCOMP
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..1285e5abf669 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
.macro TRACE_IRQS_IRET
@@ -203,9 +202,102 @@
.Lend_\@:
.endm
+#define CS_FROM_ENTRY_STACK (1 << 31)
+#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_KERNEL (1 << 29)
+
+.macro FIXUP_FRAME
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 3*4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 4*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+#endif
+ testl $SEGMENT_RPL_MASK, 3*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+
+ orl $CS_FROM_KERNEL, 3*4(%esp)
+
+ /*
+ * When we're here from kernel mode; the (exception) stack looks like:
+ *
+ * 5*4(%esp) - <previous context>
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 5 enties as gap:
+ *
+ * 12*4(%esp) - <previous context>
+ * 11*4(%esp) - gap / flags
+ * 10*4(%esp) - gap / cs
+ * 9*4(%esp) - gap / ip
+ * 8*4(%esp) - gap / orig_eax
+ * 7*4(%esp) - gap / gs / function
+ * 6*4(%esp) - ss
+ * 5*4(%esp) - sp
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ */
+
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ addl $6*4, (%esp) # point sp back at the previous context
+ pushl 6*4(%esp) # flags
+ pushl 6*4(%esp) # cs
+ pushl 6*4(%esp) # ip
+ pushl 6*4(%esp) # orig_eax
+ pushl 6*4(%esp) # gs / function
+.Lfrom_usermode_no_fixup_\@:
+.endm
+
+.macro IRET_FRAME
+ testl $CS_FROM_KERNEL, 1*4(%esp)
+ jz .Lfinished_frame_\@
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+.Lfinished_frame_\@:
+.endm
+
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
cld
PUSH_GS
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
@@ -247,22 +339,6 @@
.Lend_\@:
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just clearing the MSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
- * original rbp.
- */
-.macro ENCODE_FRAME_POINTER
-#ifdef CONFIG_FRAME_POINTER
- mov %esp, %ebp
- andl $0x7fffffff, %ebp
-#endif
-.endm
-
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -375,9 +451,6 @@
* switch to it before we do any copying.
*/
-#define CS_FROM_ENTRY_STACK (1 << 31)
-#define CS_FROM_USER_CR3 (1 << 30)
-
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +464,6 @@
* that register for the time this macro runs
*/
- /*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +821,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -765,18 +831,6 @@ ENTRY(resume_userspace)
jmp restore_all
END(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz restore_all_kernel
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_kernel
- call preempt_schedule_irq
- jmp restore_all_kernel
-END(resume_kernel)
-#endif
-
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1019,6 +1073,7 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
+ IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1027,6 +1082,15 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lno_preempt
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lno_preempt
+ call preempt_schedule_irq
+.Lno_preempt:
+#endif
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1104,6 +1168,30 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ pushl $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_spurious
+ .align 8
+ .endr
+END(spurious_entries_start)
+
+common_spurious:
+ ASM_CLAC
+ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ call smp_spurious_interrupt
+ jmp ret_from_intr
+ENDPROC(common_interrupt)
+#endif
+
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
@@ -1360,6 +1448,7 @@ END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 11aa3b2afa4d..a829dd3117d0 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -375,6 +375,18 @@ ENTRY(irq_entries_start)
.endr
END(irq_entries_start)
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ pushq $(~vector+0x80) /* Note: always in signed byte range */
+ jmp common_spurious
+ .align 8
+ vector=vector+1
+ .endr
+END(spurious_entries_start)
+
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
@@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry)
/* Interrupt entry/exit. */
- /*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_interrupt.
- */
+/*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_spurious/interrupt.
+ */
+common_spurious:
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call smp_spurious_interrupt /* rdi points to pt_regs */
+ jmp ret_from_intr
+END(common_spurious)
+_ASM_NOKPROBE(common_spurious)
+
+/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
@@ -1142,6 +1164,11 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_ACRN_GUEST)
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
+ acrn_hv_callback_vector acrn_hv_vector_handler
+#endif
+
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0 create_gap=1
idtentry stack_segment do_stack_segment has_error_code=1
@@ -1670,11 +1697,17 @@ nmi_restore:
iretq
END(nmi)
+#ifndef CONFIG_IA32_EMULATION
+/*
+ * This handles SYSCALL from 32-bit code. There is no way to program
+ * MSRs to fully disable 32-bit SYSCALL.
+ */
ENTRY(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
END(ignore_sysret)
+#endif
ENTRY(rewind_stack_do_exit)
UNWIND_HINT_FUNC
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 42fe42e82baf..39106111be86 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -3,6 +3,12 @@
# Building vDSO images for x86.
#
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
+ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
+include $(srctree)/lib/vdso/Makefile
+
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
@@ -51,6 +57,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
hostprogs-y += vdso2c
@@ -121,6 +128,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
@@ -160,6 +168,7 @@ $(obj)/vdso32.so.dbg: FORCE \
$(obj)/vdso32/system_call.o \
$(obj)/vdso32/sigreturn.o
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
#
# The DSO images are built using a special linker script.
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 4aed41f638bb..d9ff616bb0f6 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -1,251 +1,85 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- *
* Fast user context implementation of clock_gettime, gettimeofday, and time.
*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Copyright 2019 ARM Limited
+ *
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
*/
-
-#include <uapi/linux/time.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-#include <asm/unistd.h>
-#include <asm/msr.h>
-#include <asm/pvclock.h>
-#include <asm/mshyperv.h>
-#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
+#include <linux/types.h>
-#define gtod (&VVAR(vsyscall_gtod_data))
+#include "../../../../lib/vdso/gettimeofday.c"
-extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
-extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifndef BUILD_VDSO32
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- long ret;
- asm ("syscall" : "=a" (ret), "=m" (*ts) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
- "rcx", "r11");
- return ret;
+ return __cvdso_gettimeofday(tv, tz);
}
-#else
+int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+time_t __vdso_time(time_t *t)
{
- long ret;
-
- asm (
- "mov %%ebx, %%edx \n"
- "mov %[clock], %%ebx \n"
- "call __kernel_vsyscall \n"
- "mov %%edx, %%ebx \n"
- : "=a" (ret), "=m" (*ts)
- : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
- : "edx");
- return ret;
+ return __cvdso_time(t);
}
-#endif
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
-#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
-{
- return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
-}
-static notrace u64 vread_pvclock(void)
-{
- const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
- u32 version;
- u64 ret;
-
- /*
- * Note: The kernel and hypervisor must guarantee that cpu ID
- * number maps 1:1 to per-CPU pvclock time info.
- *
- * Because the hypervisor is entirely unaware of guest userspace
- * preemption, it cannot guarantee that per-CPU pvclock time
- * info is updated if the underlying CPU changes or that that
- * version is increased whenever underlying CPU changes.
- *
- * On KVM, we are guaranteed that pvti updates for any vCPU are
- * atomic as seen by *all* vCPUs. This is an even stronger
- * guarantee than we get with a normal seqlock.
- *
- * On Xen, we don't appear to have that guarantee, but Xen still
- * supplies a valid seqlock using the version field.
- *
- * We only do pvclock vdso timing at all if
- * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
- * mean that all vCPUs have matching pvti and that the TSC is
- * synced, so we can just look at vCPU 0's pvti.
- */
-
- do {
- version = pvclock_read_begin(pvti);
-
- if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
- return U64_MAX;
-
- ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
- } while (pvclock_read_retry(pvti, version));
-
- return ret;
-}
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
-static notrace u64 vread_hvclock(void)
-{
- const struct ms_hyperv_tsc_page *tsc_pg =
- (const struct ms_hyperv_tsc_page *)&hvclock_page;
+#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
+/* both 64-bit and x32 use these */
+extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
- return hv_read_tsc_page(tsc_pg);
-}
-#endif
-
-notrace static inline u64 vgetcyc(int mode)
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- if (mode == VCLOCK_TSC)
- return (u64)rdtsc_ordered();
-
- /*
- * For any memory-mapped vclock type, we need to make sure that gcc
- * doesn't cleverly hoist a load before the mode check. Otherwise we
- * might end up touching the memory-mapped page even if the vclock in
- * question isn't enabled, which will segfault. Hence the barriers.
- */
-#ifdef CONFIG_PARAVIRT_CLOCK
- if (mode == VCLOCK_PVCLOCK) {
- barrier();
- return vread_pvclock();
- }
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (mode == VCLOCK_HVCLOCK) {
- barrier();
- return vread_hvclock();
- }
-#endif
- return U64_MAX;
+ return __cvdso_clock_gettime(clock, ts);
}
-notrace static int do_hres(clockid_t clk, struct timespec *ts)
-{
- struct vgtod_ts *base = &gtod->basetime[clk];
- u64 cycles, last, sec, ns;
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- cycles = vgetcyc(gtod->vclock_mode);
- ns = base->nsec;
- last = gtod->cycle_last;
- if (unlikely((s64)cycles < 0))
- return vdso_fallback_gettime(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * gtod->mult;
- ns >>= gtod->shift;
- sec = base->sec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return 0;
-}
+int clock_gettime(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
-notrace static void do_coarse(clockid_t clk, struct timespec *ts)
+int __vdso_clock_getres(clockid_t clock,
+ struct __kernel_timespec *res)
{
- struct vgtod_ts *base = &gtod->basetime[clk];
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = base->sec;
- ts->tv_nsec = base->nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
+ return __cvdso_clock_getres(clock, res);
}
+int clock_getres(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+#else
+/* i386 only */
+extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
+
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- unsigned int msk;
-
- /* Sort out negative (CPU/FD) and invalid clocks */
- if (unlikely((unsigned int) clock >= MAX_CLOCKS))
- return vdso_fallback_gettime(clock, ts);
-
- /*
- * Convert the clockid to a bitmask and use it to check which
- * clocks are handled in the VDSO directly.
- */
- msk = 1U << clock;
- if (likely(msk & VGTOD_HRES)) {
- return do_hres(clock, ts);
- } else if (msk & VGTOD_COARSE) {
- do_coarse(clock, ts);
- return 0;
- }
- return vdso_fallback_gettime(clock, ts);
+ return __cvdso_clock_gettime32(clock, ts);
}
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct old_timespec32 *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- if (likely(tv != NULL)) {
- struct timespec *ts = (struct timespec *) tv;
-
- do_hres(CLOCK_REALTIME, ts);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- tz->tz_minuteswest = gtod->tz_minuteswest;
- tz->tz_dsttime = gtod->tz_dsttime;
- }
-
- return 0;
+ return __cvdso_clock_gettime(clock, ts);
}
-int gettimeofday(struct timeval *, struct timezone *)
- __attribute__((weak, alias("__vdso_gettimeofday")));
-/*
- * This will break when the xtime seconds get inaccurate, but that is
- * unlikely
- */
-notrace time_t __vdso_time(time_t *t)
-{
- /* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
+int clock_gettime64(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime64")));
- if (t)
- *t = result;
- return result;
+int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
+{
+ return __cvdso_clock_getres_time32(clock, res);
}
-time_t time(time_t *t)
- __attribute__((weak, alias("__vdso_time")));
+
+int clock_getres(clockid_t, struct old_timespec32 *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
+#endif
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index d3a2dce4cfa9..36b644e16272 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -25,6 +25,8 @@ VERSION {
__vdso_getcpu;
time;
__vdso_time;
+ clock_getres;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 422764a81d32..c7720995ab1a 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -26,6 +26,8 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
+ __vdso_clock_getres;
+ __vdso_clock_gettime64;
};
LINUX_2.5 {
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
index 05cd1c5c4a15..16a8050a4fb6 100644
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ b/arch/x86/entry/vdso/vdsox32.lds.S
@@ -21,6 +21,7 @@ VERSION {
__vdso_gettimeofday;
__vdso_getcpu;
__vdso_time;
+ __vdso_clock_getres;
local: *;
};
}
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8db1f594e8b1..349a61d8bf34 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -22,7 +22,7 @@
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
-#include <asm/mshyperv.h>
+#include <clocksource/hyperv_timer.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile
index 1ac4dd116c26..93c1b3e949a7 100644
--- a/arch/x86/entry/vsyscall/Makefile
+++ b/arch/x86/entry/vsyscall/Makefile
@@ -2,7 +2,5 @@
#
# Makefile for the x86 low level vsyscall code
#
-obj-y := vsyscall_gtod.o
-
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index d9d81ad7a400..e7c596dea947 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,9 +42,11 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NONE } vsyscall_mode =
+static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
+#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
+ XONLY;
#else
EMULATE;
#endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
+ else if (!strcmp("xonly", str))
+ vsyscall_mode = XONLY;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -106,14 +110,15 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr);
return false;
} else {
return true;
}
}
-bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
@@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
long ret;
unsigned long orig_dx;
+ /* Write faults or kernel-privilege faults never get fixed up. */
+ if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
+ return false;
+
+ if (!(error_code & X86_PF_INSTR)) {
+ /* Failed vsyscall read */
+ if (vsyscall_mode == EMULATE)
+ return false;
+
+ /*
+ * User code tried and failed to read the vsyscall page.
+ */
+ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
+ return false;
+ }
+
/*
* No point in checking CS -- the only way to get here is a user mode
* trap to a high address, which means that we're in 64-bit user code.
@@ -268,7 +289,7 @@ do_ret:
return true;
sigsegv:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return true;
}
@@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
static const struct vm_operations_struct gate_vma_ops = {
.name = gate_vma_name,
};
-static struct vm_area_struct gate_vma = {
+static struct vm_area_struct gate_vma __ro_after_init = {
.vm_start = VSYSCALL_ADDR,
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
.vm_page_prot = PAGE_READONLY_EXEC,
@@ -357,12 +378,20 @@ void __init map_vsyscall(void)
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- if (vsyscall_mode != NONE) {
+ /*
+ * For full emulation, the page needs to exist for real. In
+ * execute-only mode, there is no PTE at all backing the vsyscall
+ * page.
+ */
+ if (vsyscall_mode == EMULATE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
+ if (vsyscall_mode == XONLY)
+ gate_vma.vm_flags = VM_EXEC;
+
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
}
diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c
deleted file mode 100644
index cfcdba082feb..000000000000
--- a/arch/x86/entry/vsyscall/vsyscall_gtod.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Modified for x86 32 bit architecture by
- * Stefani Seibold <stefani@seibold.net>
- * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include <linux/timekeeper_internal.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-int vclocks_used __read_mostly;
-
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
-
-void update_vsyscall_tz(void)
-{
- vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
- vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- struct vgtod_ts *base;
- u64 nsec;
-
- /* Mark the new vclock used. */
- BUILD_BUG_ON(VCLOCK_MAX >= 32);
- WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
-
- gtod_write_begin(vdata);
-
- /* copy vsyscall data */
- vdata->vclock_mode = vclock_mode;
- vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mask = tk->tkr_mono.mask;
- vdata->mult = tk->tkr_mono.mult;
- vdata->shift = tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_REALTIME];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_TAI];
- base->sec = tk->xtime_sec + (s64)tk->tai_offset;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_MONOTONIC];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
- nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
- base->sec++;
- }
- base->nsec = nsec;
-
- base = &vdata->basetime[CLOCK_REALTIME_COARSE];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
- nsec += tk->wall_to_monotonic.tv_nsec;
- while (nsec >= NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- base->sec++;
- }
- base->nsec = nsec;
-
- gtod_write_end(vdata);
-}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 3cd94a21bd53..ceb712b0a1c6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2179,7 +2179,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
* For now, this can't happen because all callers hold mmap_sem
* for write. If this changes, we'll need a different solution.
*/
- lockdep_assert_held_exclusive(&mm->mmap_sem);
+ lockdep_assert_held_write(&mm->mmap_sem);
if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1)
on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 6072f92cb8ea..267d7f8e12ab 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -302,7 +302,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(event->cpu));
+ topology_die_cpumask(event->cpu));
} else {
return -ENOENT;
}
@@ -385,7 +385,7 @@ static int cstate_cpu_exit(unsigned int cpu)
if (has_cstate_pkg &&
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
@@ -414,7 +414,7 @@ static int cstate_cpu_init(unsigned int cpu)
* in the package cpu mask as the designated reader.
*/
target = cpumask_any_and(&cstate_pkg_cpu_mask,
- topology_core_cpumask(cpu));
+ topology_die_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
@@ -663,7 +663,13 @@ static int __init cstate_init(void)
}
if (has_cstate_pkg) {
- err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+ if (topology_max_die_per_package() > 1) {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ "cstate_die", -1);
+ } else {
+ err = perf_pmu_register(&cstate_pkg_pmu,
+ cstate_pkg_pmu.name, -1);
+ }
if (err) {
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 505c73dc6a73..2c8db2c19328 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -337,7 +337,7 @@ static int alloc_pebs_buffer(int cpu)
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
- void *buffer, *ibuffer, *cea;
+ void *buffer, *insn_buff, *cea;
if (!x86_pmu.pebs)
return 0;
@@ -351,12 +351,12 @@ static int alloc_pebs_buffer(int cpu)
* buffer then.
*/
if (x86_pmu.intel_cap.pebs_format < 2) {
- ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
- if (!ibuffer) {
+ insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!insn_buff) {
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
- per_cpu(insn_buffer, cpu) = ibuffer;
+ per_cpu(insn_buffer, cpu) = insn_buff;
}
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 26c03f5adfb9..8c7ecde3ba70 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -149,7 +149,7 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
- unsigned int maxpkg;
+ unsigned int maxdie;
struct rapl_pmu *pmus[];
};
@@ -162,13 +162,13 @@ static u64 rapl_timer_ms;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
+ return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -572,7 +572,7 @@ static int rapl_cpu_offline(unsigned int cpu)
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
@@ -599,14 +599,14 @@ static int rapl_cpu_online(unsigned int cpu)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
- target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
@@ -669,22 +669,22 @@ static void cleanup_rapl_pmus(void)
{
int i;
- for (i = 0; i < rapl_pmus->maxpkg; i++)
+ for (i = 0; i < rapl_pmus->maxdie; i++)
kfree(rapl_pmus->pmus[i]);
kfree(rapl_pmus);
}
static int __init init_rapl_pmus(void)
{
- int maxpkg = topology_max_packages();
+ int maxdie = topology_max_packages() * topology_max_die_per_package();
size_t size;
- size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+ size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
rapl_pmus = kzalloc(size, GFP_KERNEL);
if (!rapl_pmus)
return -ENOMEM;
- rapl_pmus->maxpkg = maxpkg;
+ rapl_pmus->maxdie = maxdie;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
rapl_pmus->pmu.event_init = rapl_pmu_event_init;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9e3fbd47cb56..6094c8db949d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -15,7 +15,7 @@ struct pci_driver *uncore_pci_driver;
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_extra_dev *uncore_extra_pci_dev;
-static int max_packages;
+static int max_dies;
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
@@ -101,13 +101,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
- unsigned int pkgid = topology_logical_package_id(cpu);
+ unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
- return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
+ return dieid < max_dies ? pmu->boxes[dieid] : NULL;
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -312,7 +312,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box);
box->cpu = -1;
box->pci_phys_id = -1;
- box->pkgid = -1;
+ box->dieid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -827,10 +827,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
{
- int pkg;
+ int die;
- for (pkg = 0; pkg < max_packages; pkg++)
- kfree(pmu->boxes[pkg]);
+ for (die = 0; die < max_dies; die++)
+ kfree(pmu->boxes[die]);
kfree(pmu->boxes);
}
@@ -867,7 +867,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
if (!pmus)
return -ENOMEM;
- size = max_packages * sizeof(struct intel_uncore_box *);
+ size = max_dies * sizeof(struct intel_uncore_box *);
for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = setid ? i : -1;
@@ -937,20 +937,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
- int phys_id, pkg, ret;
+ int phys_id, die, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
- pkg = topology_phys_to_logical_pkg(phys_id);
- if (pkg < 0)
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
+ if (die < 0)
return -EINVAL;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
- uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+ uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
@@ -989,7 +990,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
- if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+ if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL;
box = uncore_alloc_box(type, NUMA_NO_NODE);
@@ -1003,13 +1004,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
- box->pkgid = pkg;
+ box->dieid = die;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
- pmu->boxes[pkg] = box;
+ pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1)
return 0;
@@ -1017,7 +1018,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
ret = uncore_pmu_register(pmu);
if (ret) {
pci_set_drvdata(pdev, NULL);
- pmu->boxes[pkg] = NULL;
+ pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
}
@@ -1028,16 +1029,17 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
- int i, phys_id, pkg;
+ int i, phys_id, die;
phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
- pkg = topology_phys_to_logical_pkg(phys_id);
+ die = (topology_max_die_per_package() > 1) ? phys_id :
+ topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
- if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
- uncore_extra_pci_dev[pkg].dev[i] = NULL;
+ if (uncore_extra_pci_dev[die].dev[i] == pdev) {
+ uncore_extra_pci_dev[die].dev[i] = NULL;
break;
}
}
@@ -1050,7 +1052,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[box->pkgid] = NULL;
+ pmu->boxes[box->dieid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
@@ -1062,7 +1064,7 @@ static int __init uncore_pci_init(void)
size_t size;
int ret;
- size = max_packages * sizeof(struct pci_extra_dev);
+ size = max_dies * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
ret = -ENOMEM;
@@ -1109,11 +1111,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
- int i, pkg;
+ int i, die;
- pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+ die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[die];
if (!box)
continue;
@@ -1146,13 +1148,13 @@ static int uncore_event_cpu_offline(unsigned int cpu)
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, pkg, target;
+ int i, die, target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
goto unref;
/* Find a new cpu to collect uncore events */
- target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate uncore events to the new target */
if (target < nr_cpu_ids)
@@ -1165,12 +1167,12 @@ static int uncore_event_cpu_offline(unsigned int cpu)
unref:
/* Clear the references */
- pkg = topology_logical_package_id(cpu);
+ die = topology_logical_die_id(cpu);
for (; *types; types++) {
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[die];
if (box && atomic_dec_return(&box->refcnt) == 0)
uncore_box_exit(box);
}
@@ -1179,7 +1181,7 @@ unref:
}
static int allocate_boxes(struct intel_uncore_type **types,
- unsigned int pkg, unsigned int cpu)
+ unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
struct intel_uncore_type *type;
@@ -1192,20 +1194,20 @@ static int allocate_boxes(struct intel_uncore_type **types,
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- if (pmu->boxes[pkg])
+ if (pmu->boxes[die])
continue;
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
goto cleanup;
box->pmu = pmu;
- box->pkgid = pkg;
+ box->dieid = die;
list_add(&box->active_list, &allocated);
}
}
/* Install them in the pmus */
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
- box->pmu->boxes[pkg] = box;
+ box->pmu->boxes[die] = box;
}
return 0;
@@ -1222,10 +1224,10 @@ static int uncore_event_cpu_online(unsigned int cpu)
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, ret, pkg, target;
+ int i, ret, die, target;
- pkg = topology_logical_package_id(cpu);
- ret = allocate_boxes(types, pkg, cpu);
+ die = topology_logical_die_id(cpu);
+ ret = allocate_boxes(types, die, cpu);
if (ret)
return ret;
@@ -1233,7 +1235,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
- box = pmu->boxes[pkg];
+ box = pmu->boxes[die];
if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
@@ -1243,7 +1245,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
- target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
@@ -1400,6 +1402,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
{},
};
@@ -1418,7 +1421,7 @@ static int __init intel_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
- max_packages = topology_max_packages();
+ max_dies = topology_max_packages() * topology_max_die_per_package();
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_init->pci_init) {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79eb2e21e4f0..33aba2504cb1 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -108,7 +108,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
- int pkgid; /* Logical package ID */
+ int dieid; /* Logical die ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
@@ -467,7 +467,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box)
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
- return (box->pkgid < 0);
+ return (box->dieid < 0);
}
static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10e04387f38..bbe89bc589f9 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1058,8 +1058,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
- int pkg = box->pkgid;
- struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+ int die = box->dieid;
+ struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx];
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1608050e9df9..0e033ef11a9f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -17,64 +17,13 @@
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/clockchips.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-
-static struct ms_hyperv_tsc_page *tsc_pg;
-
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return tsc_pg;
-}
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-
-static u64 read_hv_clock_tsc(struct clocksource *arg)
-{
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick == U64_MAX)
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_tsc = {
- .name = "hyperv_clocksource_tsc_page",
- .rating = 400,
- .read = read_hv_clock_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-#endif
-
-static u64 read_hv_clock_msr(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_msr = {
- .name = "hyperv_clocksource_msr",
- .rating = 400,
- .read = read_hv_clock_msr,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
+#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
@@ -343,42 +292,8 @@ void __init hyperv_init(void)
x86_init.pci.arch_init = hv_pci_init;
- /*
- * Register Hyper-V specific clocksource.
- */
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
- union hv_x64_msr_hypercall_contents tsc_msr;
-
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
- if (!tsc_pg)
- goto register_msr_cs;
-
- hyperv_cs = &hyperv_cs_tsc;
-
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- tsc_msr.enable = 1;
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
-
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
-
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- return;
- }
-register_msr_cs:
-#endif
- /*
- * For 32 bit guests just use the MSR based mechanism for reading
- * the partition counter.
- */
-
- hyperv_cs = &hyperv_cs_msr;
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-
+ /* Register Hyper-V specific clocksource */
+ hv_init_clocksource();
return;
remove_cpuhp_state:
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644
index 000000000000..4adb13f08af7
--- /dev/null
+++ b/arch/x86/include/asm/acrn.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+extern void acrn_hv_callback_vector(void);
+#ifdef CONFIG_TRACING
+#define trace_acrn_hv_callback_vector acrn_hv_callback_vector
+#endif
+
+extern void acrn_hv_vector_handler(struct pt_regs *regs);
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1340fa53b575..050e5f9ebf81 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -53,7 +53,7 @@ extern unsigned int apic_verbosity;
extern int local_apic_timer_c2_ok;
extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
@@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr)
extern int apic_force_enable(unsigned long addr);
#endif
-extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
@@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_online(void);
extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
@@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ea3d95275b43..115127c7ad28 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
- : "ir" (i));
+ : "ir" (i) : "memory");
}
/**
@@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
@@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
+ : "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 6a5b0ec460da..52cfaecb13f9 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -9,7 +9,7 @@
/* An 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
@@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless);
* the old value.
*/
-static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
- long long n)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
@@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
* Atomically xchgs the value of @v to @n and returns
* the old value.
*/
-static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
- long long o;
+ s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
alternative_atomic64(xchg, "=&A" (o),
@@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
*
* Atomically sets the value of @v to @n.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
@@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i)
*
* Atomically reads the value of @v and returns it.
*/
-static inline long long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
}
@@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + *@v
*/
-static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
/*
* Other variants with different arithmetic operators:
*/
-static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
return i;
}
-static inline long long arch_atomic64_inc_return(atomic64_t *v)
+static inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(inc_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
-static inline long long arch_atomic64_dec_return(atomic64_t *v)
+static inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
- long long a;
+ s64 a;
alternative_atomic64(dec_return, "=&A" (a),
"S" (v) : "memory", "ecx");
return a;
@@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
*
* Atomically adds @i to @v.
*/
-static inline long long arch_atomic64_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns non-zero if the add was done, zero otherwise.
*/
-static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
- long long u)
+static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
@@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long long r;
+ s64 r;
alternative_atomic64(dec_if_positive, "=&A" (r),
"S" (v) : "ecx", "memory");
return r;
@@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void arch_atomic64_and(long long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
@@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_or(long long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
@@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
return old;
}
-static inline void arch_atomic64_xor(long long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
}
-static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
@@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
return old;
}
-static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- long long old, c = 0;
+ s64 old, c = 0;
while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
c = old;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index dadc20adba21..95c6ceac66b9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -17,7 +17,7 @@
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
-static inline long arch_atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE((v)->counter);
}
@@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v)
*
* Atomically sets the value of @v to @i.
*/
-static inline void arch_atomic64_set(atomic64_t *v, long i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
WRITE_ONCE(v->counter, i);
}
@@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i)
*
* Atomically adds @i to @v.
*/
-static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
*
* Atomically subtracts @i from @v.
*/
-static inline void arch_atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
- : "er" (i), "m" (v->counter));
+ : "er" (i), "m" (v->counter) : "memory");
}
/**
@@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
@@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_inc arch_atomic64_inc
@@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
- : "m" (v->counter));
+ : "m" (v->counter) : "memory");
}
#define arch_atomic64_dec arch_atomic64_dec
@@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
+static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
@@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
-static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
+static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
return arch_atomic64_add_return(-i, v);
}
-static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
-static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
-static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return arch_cmpxchg(&v->counter, old, new);
}
#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
{
return try_cmpxchg(&v->counter, old, new);
}
-static inline long arch_atomic64_xchg(atomic64_t *v, long new)
+static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
{
return arch_xchg(&v->counter, new);
}
-static inline void arch_atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "andq %1,%0"
: "+m" (v->counter)
@@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_or(long i, atomic64_t *v)
+static inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "orq %1,%0"
: "+m" (v->counter)
@@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
@@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
return val;
}
-static inline void arch_atomic64_xor(long i, atomic64_t *v)
+static inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "xorq %1,%0"
: "+m" (v->counter)
@@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v)
: "memory");
}
-static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
+static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
s64 val = arch_atomic64_read(v);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 14de0432d288..84f848c2541a 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -80,8 +80,8 @@ do { \
})
/* Atomic operations are already serializing on x86 */
-#define __smp_mb__before_atomic() barrier()
-#define __smp_mb__after_atomic() barrier()
+#define __smp_mb__before_atomic() do { } while (0)
+#define __smp_mb__after_atomic() do { } while (0)
#include <asm-generic/barrier.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1d337c51f7e6..58acda503817 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -22,8 +22,8 @@ enum cpuid_leafs
CPUID_LNX_3,
CPUID_7_0_EBX,
CPUID_D_1_EAX,
- CPUID_F_0_EDX,
- CPUID_F_1_EDX,
+ CPUID_LNX_4,
+ CPUID_7_1_EAX,
CPUID_8000_0008_EBX,
CPUID_6_EAX,
CPUID_8000_000A_EDX,
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 75f27ee2c263..998c2cc08363 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -239,12 +239,14 @@
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
@@ -269,13 +271,19 @@
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0xf, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -322,6 +330,7 @@
#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 7e42b285c856..c6136d79f8c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
-void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..296b346184b2 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -22,6 +22,35 @@
pop %_ASM_BP
.endm
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+ leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+ mov %esp, %ebp
+ andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
@@ -30,12 +59,32 @@
#define FRAME_END "pop %" _ASM_BP "\n"
+#ifdef CONFIG_X86_64
+#define ENCODE_FRAME_POINTER \
+ "lea 1(%rsp), %rbp\n\t"
+#else /* !CONFIG_X86_64 */
+#define ENCODE_FRAME_POINTER \
+ "movl %esp, %ebp\n\t" \
+ "andl $0x7fffffff, %ebp\n\t"
+#endif /* CONFIG_X86_64 */
+
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
+#ifdef __ASSEMBLY__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLY */
+
+#define ENCODE_FRAME_POINTER
+
+#endif
+
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d9069bb26c7f..07533795b8d2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -37,7 +37,7 @@ typedef struct {
#ifdef CONFIG_X86_MCE_AMD
unsigned int irq_deferred_error_count;
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
unsigned int irq_hv_callback_count;
#endif
#if IS_ENABLED(CONFIG_HYPERV)
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..6352dee37cda 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
struct irq_data;
-struct hpet_dev;
+struct hpet_channel;
struct irq_domain;
extern void hpet_msi_unmask(struct irq_data *data);
extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
extern int hpet_assign_irq(struct irq_domain *domain,
- struct hpet_dev *dev, int dev_num);
+ struct hpet_channel *hc, int dev_num);
#ifdef CONFIG_HPET_EMULATE_RTC
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..cbd97e22d2f3 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -150,8 +150,11 @@ extern char irq_entries_start[];
#define trace_irq_entries_start irq_entries_start
#endif
+extern char spurious_entries_start[];
+
#define VECTOR_UNUSED NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN ((void *)~0UL)
+#define VECTOR_RETRIGGERED ((void *)~1UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cdf44aa9a501..af78cd72b8f3 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+/*
+ * The Hyper-V TimeRefCount register and the TSC
+ * page provide a guest VM clock with 100ns tick rate
+ */
+#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
+
typedef struct _HV_REFERENCE_TSC_PAGE {
__u32 tsc_sequence;
__u32 res1;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 8c5aaba6633f..50a30f6c668b 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -29,6 +29,7 @@ enum x86_hypervisor_type {
X86_HYPER_XEN_HVM,
X86_HYPER_KVM,
X86_HYPER_JAILHOUSE,
+ X86_HYPER_ACRN,
};
#ifdef CONFIG_HYPERVISOR_GUEST
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 310118805f57..0278aa66ef62 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -56,6 +56,7 @@
#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+#define INTEL_FAM6_ICELAKE_NNPI 0x9D
/* "Small Core" Processors (Atom) */
@@ -76,6 +77,7 @@
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+
#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 8f3bee821e6c..187ce59aea28 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
{
- return this_cpu_read(irq_regs);
+ return __this_cpu_read(irq_regs);
}
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
struct pt_regs *old_regs;
old_regs = get_irq_regs();
- this_cpu_write(irq_regs, new_regs);
+ __this_cpu_write(irq_regs, new_regs);
return old_regs;
}
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..06c3cc22a058 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
+#define HAVE_JUMP_LABEL_BATCH
+
#define JUMP_LABEL_NOP_SIZE 5
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5e7d6b46de97 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -71,22 +71,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
- newregs->sp = (unsigned long)&(oldregs->sp);
- asm volatile("xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->ss));
-#endif
-}
-
-/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
* mode exception.
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
- crash_fixup_ss_esp(newregs, oldregs);
} else {
#ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..f4fa8a9d5d0b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -105,6 +105,17 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+#define hv_get_time_ref_count(val) \
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
+
+#define hv_get_reference_tsc(val) \
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_reference_tsc(val) \
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_clocksource_vdso(val) \
+ ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
+#define hv_get_raw_timer() rdtsc_ordered()
+
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
@@ -133,7 +144,6 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
@@ -387,73 +397,4 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- u64 scale, offset;
- u32 sequence;
-
- /*
- * The protocol for reading Hyper-V TSC page is specified in Hypervisor
- * Top-Level Functional Specification ver. 3.0 and above. To get the
- * reference time we must do the following:
- * - READ ReferenceTscSequence
- * A special '0' value indicates the time source is unreliable and we
- * need to use something else. The currently published specification
- * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
- * instead of '0' as the special value, see commit c35b82ef0294.
- * - ReferenceTime =
- * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
- * - READ ReferenceTscSequence again. In case its value has changed
- * since our first reading we need to discard ReferenceTime and repeat
- * the whole sequence as the hypervisor was updating the page in
- * between.
- */
- do {
- sequence = READ_ONCE(tsc_pg->tsc_sequence);
- if (!sequence)
- return U64_MAX;
- /*
- * Make sure we read sequence before we read other values from
- * TSC page.
- */
- smp_rmb();
-
- scale = READ_ONCE(tsc_pg->tsc_scale);
- offset = READ_ONCE(tsc_pg->tsc_offset);
- *cur_tsc = rdtsc_ordered();
-
- /*
- * Make sure we read sequence after we read all other values
- * from TSC page.
- */
- smp_rmb();
-
- } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
- return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
- u64 cur_tsc;
-
- return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
-
-#else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- BUG();
- return U64_MAX;
-}
-#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 979ef971cc78..6b4fc2788078 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,6 +61,15 @@
#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+#define MSR_IA32_UMWAIT_CONTROL 0xe1
+#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0)
+#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1)
+/*
+ * The time field is bit[31:2], but representing a 32bit value with
+ * bit[1:0] zero.
+ */
+#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index eb0f80ce8524..e28f8b723b5c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
-
trace_hardirqs_on();
+
+ mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2474e434a6f7..946f8f1f1efc 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -88,7 +88,7 @@ struct pv_init_ops {
* the number of bytes of code generated, as we nop pad the
* rest in generic code.
*/
- unsigned (*patch)(u8 type, void *insnbuf,
+ unsigned (*patch)(u8 type, void *insn_buff,
unsigned long addr, unsigned len);
} __no_randomize_layout;
@@ -370,18 +370,11 @@ extern struct paravirt_patch_template pv_ops;
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-#define DEF_NATIVE(ops, name, code) \
- __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
+unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
+unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
- unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
- const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len);
+unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
int paravirt_disable_iospace(void);
@@ -679,8 +672,8 @@ u64 _paravirt_ident_64(u64);
/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 instrtype; /* type of this instruction */
+ u8 *instr; /* original instructions */
+ u8 type; /* type of this instruction */
u8 len; /* length of original instruction */
};
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..2278797c769d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -87,7 +87,7 @@
* don't give an lvalue though). */
extern void __bad_percpu_size(void);
-#define percpu_to_op(op, var, val) \
+#define percpu_to_op(qual, op, var, val) \
do { \
typedef typeof(var) pto_T__; \
if (0) { \
@@ -97,22 +97,22 @@ do { \
} \
switch (sizeof(var)) { \
case 1: \
- asm(op "b %1,"__percpu_arg(0) \
+ asm qual (op "b %1,"__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pto_T__)(val))); \
break; \
case 2: \
- asm(op "w %1,"__percpu_arg(0) \
+ asm qual (op "w %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 4: \
- asm(op "l %1,"__percpu_arg(0) \
+ asm qual (op "l %1,"__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pto_T__)(val))); \
break; \
case 8: \
- asm(op "q %1,"__percpu_arg(0) \
+ asm qual (op "q %1,"__percpu_arg(0) \
: "+m" (var) \
: "re" ((pto_T__)(val))); \
break; \
@@ -124,7 +124,7 @@ do { \
* Generate a percpu add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(var, val) \
+#define percpu_add_op(qual, var, val) \
do { \
typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
@@ -138,41 +138,41 @@ do { \
switch (sizeof(var)) { \
case 1: \
if (pao_ID__ == 1) \
- asm("incb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decb "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addb %1, "__percpu_arg(0) \
+ asm qual ("addb %1, "__percpu_arg(0) \
: "+m" (var) \
: "qi" ((pao_T__)(val))); \
break; \
case 2: \
if (pao_ID__ == 1) \
- asm("incw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decw "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addw %1, "__percpu_arg(0) \
+ asm qual ("addw %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 4: \
if (pao_ID__ == 1) \
- asm("incl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decl "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addl %1, "__percpu_arg(0) \
+ asm qual ("addl %1, "__percpu_arg(0) \
: "+m" (var) \
: "ri" ((pao_T__)(val))); \
break; \
case 8: \
if (pao_ID__ == 1) \
- asm("incq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
else if (pao_ID__ == -1) \
- asm("decq "__percpu_arg(0) : "+m" (var)); \
+ asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
else \
- asm("addq %1, "__percpu_arg(0) \
+ asm qual ("addq %1, "__percpu_arg(0) \
: "+m" (var) \
: "re" ((pao_T__)(val))); \
break; \
@@ -180,27 +180,27 @@ do { \
} \
} while (0)
-#define percpu_from_op(op, var) \
+#define percpu_from_op(qual, op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm volatile(op "b "__percpu_arg(1)",%0"\
+ asm qual (op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \
: "m" (var)); \
break; \
case 2: \
- asm volatile(op "w "__percpu_arg(1)",%0"\
+ asm qual (op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 4: \
- asm volatile(op "l "__percpu_arg(1)",%0"\
+ asm qual (op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
case 8: \
- asm volatile(op "q "__percpu_arg(1)",%0"\
+ asm qual (op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: "m" (var)); \
break; \
@@ -238,23 +238,23 @@ do { \
pfo_ret__; \
})
-#define percpu_unary_op(op, var) \
+#define percpu_unary_op(qual, op, var) \
({ \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(0) \
+ asm qual (op "b "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 2: \
- asm(op "w "__percpu_arg(0) \
+ asm qual (op "w "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 4: \
- asm(op "l "__percpu_arg(0) \
+ asm qual (op "l "__percpu_arg(0) \
: "+m" (var)); \
break; \
case 8: \
- asm(op "q "__percpu_arg(0) \
+ asm qual (op "q "__percpu_arg(0) \
: "+m" (var)); \
break; \
default: __bad_percpu_size(); \
@@ -264,27 +264,27 @@ do { \
/*
* Add return operation
*/
-#define percpu_add_return_op(var, val) \
+#define percpu_add_return_op(qual, var, val) \
({ \
typeof(var) paro_ret__ = val; \
switch (sizeof(var)) { \
case 1: \
- asm("xaddb %0, "__percpu_arg(1) \
+ asm qual ("xaddb %0, "__percpu_arg(1) \
: "+q" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 2: \
- asm("xaddw %0, "__percpu_arg(1) \
+ asm qual ("xaddw %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 4: \
- asm("xaddl %0, "__percpu_arg(1) \
+ asm qual ("xaddl %0, "__percpu_arg(1) \
: "+r" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
case 8: \
- asm("xaddq %0, "__percpu_arg(1) \
+ asm qual ("xaddq %0, "__percpu_arg(1) \
: "+re" (paro_ret__), "+m" (var) \
: : "memory"); \
break; \
@@ -299,13 +299,13 @@ do { \
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
-#define percpu_xchg_op(var, nval) \
+#define percpu_xchg_op(qual, var, nval) \
({ \
typeof(var) pxo_ret__; \
typeof(var) pxo_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("\n\tmov "__percpu_arg(1)",%%al" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
"\n1:\tcmpxchgb %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -313,7 +313,7 @@ do { \
: "memory"); \
break; \
case 2: \
- asm("\n\tmov "__percpu_arg(1)",%%ax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
"\n1:\tcmpxchgw %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -321,7 +321,7 @@ do { \
: "memory"); \
break; \
case 4: \
- asm("\n\tmov "__percpu_arg(1)",%%eax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
"\n1:\tcmpxchgl %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -329,7 +329,7 @@ do { \
: "memory"); \
break; \
case 8: \
- asm("\n\tmov "__percpu_arg(1)",%%rax" \
+ asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
"\n1:\tcmpxchgq %2, "__percpu_arg(1) \
"\n\tjnz 1b" \
: "=&a" (pxo_ret__), "+m" (var) \
@@ -345,32 +345,32 @@ do { \
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
-#define percpu_cmpxchg_op(var, oval, nval) \
+#define percpu_cmpxchg_op(qual, var, oval, nval) \
({ \
typeof(var) pco_ret__; \
typeof(var) pco_old__ = (oval); \
typeof(var) pco_new__ = (nval); \
switch (sizeof(var)) { \
case 1: \
- asm("cmpxchgb %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgb %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "q" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 2: \
- asm("cmpxchgw %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgw %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 4: \
- asm("cmpxchgl %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgl %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
break; \
case 8: \
- asm("cmpxchgq %2, "__percpu_arg(1) \
+ asm qual ("cmpxchgq %2, "__percpu_arg(1) \
: "=a" (pco_ret__), "+m" (var) \
: "r" (pco_new__), "0" (pco_old__) \
: "memory"); \
@@ -391,58 +391,70 @@ do { \
*/
#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
-
-#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val)
+
+/*
+ * raw_cpu_xchg() can use a load-store since it is not required to be
+ * IRQ-safe.
+ */
+#define raw_percpu_xchg_op(var, nval) \
+({ \
+ typeof(var) pxo_ret__ = raw_cpu_read(var); \
+ raw_cpu_write(var, (nval)); \
+ pxo_ret__; \
+})
+
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -466,23 +478,23 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp)
+#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
+
+#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c34a35c78618..3eab6ece52b4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,7 +105,7 @@ struct cpuinfo_x86 {
int x86_power;
unsigned long loops_per_jiffy;
/* cpuid returned max cores value: */
- u16 x86_max_cores;
+ u16 x86_max_cores;
u16 apicid;
u16 initial_apicid;
u16 x86_clflush_size;
@@ -117,6 +117,8 @@ struct cpuinfo_x86 {
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
+ u16 cpu_die_id;
+ u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
@@ -144,7 +146,8 @@ enum cpuid_regs_idx {
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_NUM 11
#define X86_VENDOR_UNKNOWN 0xff
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..78cf265c5b58 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -102,8 +102,7 @@ extern unsigned long profile_pc(struct pt_regs *regs);
extern unsigned long
convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code);
+extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code);
static inline unsigned long regs_return_value(struct pt_regs *regs)
@@ -166,14 +165,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-#endif
#define GET_IP(regs) ((regs)->ip)
#define GET_FP(regs) ((regs)->bp)
@@ -201,14 +196,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
#ifdef CONFIG_X86_32
- /*
- * Traps from the kernel do not save sp and ss.
- * Use the helper function to retrieve sp.
- */
- if (offset == offsetof(struct pt_regs, sp) &&
- regs->cs == __KERNEL_CS)
- return kernel_stack_pointer(regs);
-
/* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +221,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
- return ((addr & ~(THREAD_SIZE - 1)) ==
- (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+ return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}
/**
@@ -249,7 +235,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *addr = (unsigned long *)regs->sp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index b6033680d458..19b695ff2c68 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
/* some helper functions for xen and kvm pv clock sources */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index da545df207b2..e1356a3b8223 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
@@ -162,7 +163,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
-#define raw_smp_processor_id() (this_cpu_read(cpu_number))
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
#ifdef CONFIG_X86_32
extern int safe_smp_processor_id(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..b2e84d113f2a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
#ifdef __KERNEL__
#include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
/*
* Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,10 @@
*/
extern unsigned long __force_order;
+/* Starts false and gets enabled once CPU feature detection is done. */
+DECLARE_STATIC_KEY_FALSE(cr_pinning);
+extern unsigned long cr4_pinned_bits;
+
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
@@ -25,7 +31,20 @@ static inline unsigned long native_read_cr0(void)
static inline void native_write_cr0(unsigned long val)
{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+ bits_missing = X86_CR0_WP;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+ }
}
static inline unsigned long native_read_cr2(void)
@@ -74,7 +93,21 @@ static inline unsigned long native_read_cr4(void)
static inline void native_write_cr4(unsigned long val)
{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+ bits_missing = ~val & cr4_pinned_bits;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+ bits_missing);
+ }
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a8d0cdf48616..14db05086bbf 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
if (regs)
- return (unsigned long *)kernel_stack_pointer(regs);
+ return (unsigned long *)regs->sp;
if (task == current)
return __builtin_frame_address(0);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 880b5515b1d6..70c09967a999 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
+/*
+ * Currently, the max observed size in the kernel code is
+ * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
+ * Raise it if needed.
+ */
+#define POKE_MAX_OPCODE_SIZE 5
+
+struct text_poke_loc {
+ void *detour;
+ void *addr;
+ size_t len;
+ const char opcode[POKE_MAX_OPCODE_SIZE];
+};
+
extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
@@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -51,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5
-#ifdef CONFIG_X86_64
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -69,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func);
}
-#endif /* CONFIG_X86_64 */
#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..8ac563abb567 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,6 +7,7 @@
extern void hpet_time_init(void);
extern void time_init(void);
+extern bool pit_timer_init(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..4b14d2318251 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
+#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
+#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef CONFIG_SMP
+#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
+extern unsigned int __max_die_per_package;
+
+static inline int topology_max_die_per_package(void)
+{
+ return __max_die_per_package;
+}
+
extern int __max_smt_threads;
static inline int topology_max_smt_threads(void)
@@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void)
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
+int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
#else
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int
+topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_phys_to_logical_die(unsigned int die,
+ unsigned int cpu) { return 0; }
+static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..ae91429129a6
--- /dev/null
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define __vdso_data (VVAR(_vdso_data))
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages. These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const. The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page hvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+ : "memory", "edx");
+
+ return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+ const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+ u32 version;
+ u64 ret;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+ *
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+
+ do {
+ version = pvclock_read_begin(pvti);
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
+
+ ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+ } while (pvclock_read_retry(pvti, version));
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static u64 vread_hvclock(void)
+{
+ return hv_read_tsc_page(&hvclock_page);
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ if (clock_mode == VCLOCK_TSC)
+ return (u64)rdtsc_ordered();
+ /*
+ * For any memory-mapped vclock type, we need to make sure that gcc
+ * doesn't cleverly hoist a load before the mode check. Otherwise we
+ * might end up touching the memory-mapped page even if the vclock in
+ * question isn't enabled, which will segfault. Hence the barriers.
+ */
+#ifdef CONFIG_PARAVIRT_CLOCK
+ if (clock_mode == VCLOCK_PVCLOCK) {
+ barrier();
+ return vread_pvclock();
+ }
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ if (clock_mode == VCLOCK_HVCLOCK) {
+ barrier();
+ return vread_hvclock();
+ }
+#endif
+ return U64_MAX;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return __vdso_data;
+}
+
+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..0026ab2123ce
--- /dev/null
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+
+int vclocks_used __read_mostly;
+
+DEFINE_VVAR(struct vdso_data, _vdso_data);
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__x86_get_k_vdso_data(void)
+{
+ return _vdso_data;
+}
+#define __arch_get_k_vdso_data __x86_get_k_vdso_data
+
+static __always_inline
+int __x86_get_clock_mode(struct timekeeper *tk)
+{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
+ return vclock_mode;
+}
+#define __arch_get_clock_mode __x86_get_clock_mode
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 913a133f8e6f..a2638c6124ed 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -3,7 +3,9 @@
#define _ASM_X86_VGTOD_H
#include <linux/compiler.h>
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
#include <uapi/linux/time.h>
@@ -13,81 +15,10 @@ typedef u64 gtod_long_t;
typedef unsigned long gtod_long_t;
#endif
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid. For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
- u64 sec;
- u64 nsec;
-};
-
-#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
-/*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
- */
-struct vsyscall_gtod_data {
- unsigned int seq;
-
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- struct vgtod_ts basetime[VGTOD_BASES];
-
- int tz_minuteswest;
- int tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
- unsigned int ret;
-
-repeat:
- ret = READ_ONCE(s->seq);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
- smp_rmb();
- return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
- unsigned int start)
-{
- smp_rmb();
- return unlikely(s->seq != start);
-}
-
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
- ++s->seq;
- smp_wmb();
-}
-
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
- smp_wmb();
- ++s->seq;
-}
-
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index b986b2ca688a..ab60a71a8dcb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
* Called on instruction fetch fault in vsyscall page.
* Returns true if handled.
*/
-extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+extern bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
-static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
+static inline bool emulate_vsyscall(unsigned long error_code,
+ struct pt_regs *regs, unsigned long address)
{
return false;
}
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index e474f5c6e387..32f5d9a0b90e 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -32,19 +32,20 @@
extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name __attribute__((visibility("hidden")));
+ extern type vvar_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden")));
#define VVAR(name) (vvar_ ## name)
#define DEFINE_VVAR(type, name) \
- type name \
+ type name[CS_BASES] \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
#endif
/* DECLARE_VVAR(offset, type, name) */
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
+DECLARE_VVAR(128, struct vdso_data, _vdso_data)
#undef DECLARE_VVAR
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index ce1b5cc360a2..3578ad248bc9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
ifdef CONFIG_FRAME_POINTER
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index a5e5484988fd..caf2edccbad2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
c->x86_stepping >= 0x0e))
flags->bm_check = 1;
}
+
+ if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
+ /*
+ * All Zhaoxin CPUs that support C3 share cache.
+ * And caches should not be flushed by software while
+ * entering C3 type state.
+ */
+ flags->bm_check = 1;
+ /*
+ * On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
+ * So, set bm_control to zero to indicate that ARB_DISABLE
+ * is not required while entering C3 type state.
+ */
+ flags->bm_control = 0;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 390596b761e3..99ef8b6f9a1a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -14,6 +14,7 @@
#include <linux/kdebug.h>
#include <linux/kprobes.h>
#include <linux/mmu_context.h>
+#include <linux/bsearch.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -277,7 +278,7 @@ static inline bool is_jmp(const u8 opcode)
}
static void __init_or_module
-recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
{
u8 *next_rip, *tgt_rip;
s32 n_dspl, o_dspl;
@@ -286,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
if (a->replacementlen != 5)
return;
- o_dspl = *(s32 *)(insnbuf + 1);
+ o_dspl = *(s32 *)(insn_buff + 1);
/* next_rip of the replacement JMP */
next_rip = repl_insn + a->replacementlen;
@@ -312,9 +313,9 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
two_byte_jmp:
n_dspl -= 2;
- insnbuf[0] = 0xeb;
- insnbuf[1] = (s8)n_dspl;
- add_nops(insnbuf + 2, 3);
+ insn_buff[0] = 0xeb;
+ insn_buff[1] = (s8)n_dspl;
+ add_nops(insn_buff + 2, 3);
repl_len = 2;
goto done;
@@ -322,8 +323,8 @@ two_byte_jmp:
five_byte_jmp:
n_dspl -= 5;
- insnbuf[0] = 0xe9;
- *(s32 *)&insnbuf[1] = n_dspl;
+ insn_buff[0] = 0xe9;
+ *(s32 *)&insn_buff[1] = n_dspl;
repl_len = 5;
@@ -370,7 +371,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
{
struct alt_instr *a;
u8 *instr, *replacement;
- u8 insnbuf[MAX_PATCH_LEN];
+ u8 insn_buff[MAX_PATCH_LEN];
DPRINTK("alt table %px, -> %px", start, end);
/*
@@ -383,11 +384,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* order.
*/
for (a = start; a < end; a++) {
- int insnbuf_sz = 0;
+ int insn_buff_sz = 0;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- BUG_ON(a->instrlen > sizeof(insnbuf));
+ BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
if (!boot_cpu_has(a->cpuid)) {
if (a->padlen > 1)
@@ -405,8 +406,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
- memcpy(insnbuf, replacement, a->replacementlen);
- insnbuf_sz = a->replacementlen;
+ memcpy(insn_buff, replacement, a->replacementlen);
+ insn_buff_sz = a->replacementlen;
/*
* 0xe8 is a relative jump; fix the offset.
@@ -414,24 +415,24 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* Instruction length is checked before the opcode to avoid
* accessing uninitialized bytes for zero-length replacements.
*/
- if (a->replacementlen == 5 && *insnbuf == 0xe8) {
- *(s32 *)(insnbuf + 1) += replacement - instr;
+ if (a->replacementlen == 5 && *insn_buff == 0xe8) {
+ *(s32 *)(insn_buff + 1) += replacement - instr;
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
- *(s32 *)(insnbuf + 1),
- (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+ *(s32 *)(insn_buff + 1),
+ (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
}
if (a->replacementlen && is_jmp(replacement[0]))
- recompute_jump(a, instr, replacement, insnbuf);
+ recompute_jump(a, instr, replacement, insn_buff);
if (a->instrlen > a->replacementlen) {
- add_nops(insnbuf + a->replacementlen,
+ add_nops(insn_buff + a->replacementlen,
a->instrlen - a->replacementlen);
- insnbuf_sz += a->instrlen - a->replacementlen;
+ insn_buff_sz += a->instrlen - a->replacementlen;
}
- DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
+ DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insnbuf, insnbuf_sz);
+ text_poke_early(instr, insn_buff, insn_buff_sz);
}
}
@@ -593,33 +594,104 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
{
struct paravirt_patch_site *p;
- char insnbuf[MAX_PATCH_LEN];
+ char insn_buff[MAX_PATCH_LEN];
for (p = start; p < end; p++) {
unsigned int used;
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
- memcpy(insnbuf, p->instr, p->len);
- used = pv_ops.init.patch(p->instrtype, insnbuf,
- (unsigned long)p->instr, p->len);
+ memcpy(insn_buff, p->instr, p->len);
+ used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
/* Pad the rest with nops */
- add_nops(insnbuf + used, p->len - used);
- text_poke_early(p->instr, insnbuf, p->len);
+ add_nops(insn_buff + used, p->len - used);
+ text_poke_early(p->instr, insn_buff, p->len);
}
}
extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */
+/*
+ * Self-test for the INT3 based CALL emulation code.
+ *
+ * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
+ * properly and that there is a stack gap between the INT3 frame and the
+ * previous context. Without this gap doing a virtual PUSH on the interrupted
+ * stack would corrupt the INT3 IRET frame.
+ *
+ * See entry_{32,64}.S for more details.
+ */
+static void __init int3_magic(unsigned int *ptr)
+{
+ *ptr = 1;
+}
+
+extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
+
+static int __init
+int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!regs || user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (val != DIE_INT3)
+ return NOTIFY_DONE;
+
+ if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
+ return NOTIFY_DONE;
+
+ int3_emulate_call(regs, (unsigned long)&int3_magic);
+ return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+ static __initdata struct notifier_block int3_exception_nb = {
+ .notifier_call = int3_exception_notify,
+ .priority = INT_MAX-1, /* last */
+ };
+ unsigned int val = 0;
+
+ BUG_ON(register_die_notifier(&int3_exception_nb));
+
+ /*
+ * Basically: int3_magic(&val); but really complicated :-)
+ *
+ * Stick the address of the INT3 instruction into int3_selftest_ip,
+ * then trigger the INT3, padded with NOPs to match a CALL instruction
+ * length.
+ */
+ asm volatile ("1: int3; nop; nop; nop; nop\n\t"
+ ".pushsection .init.data,\"aw\"\n\t"
+ ".align " __ASM_SEL(4, 8) "\n\t"
+ ".type int3_selftest_ip, @object\n\t"
+ ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
+ "int3_selftest_ip:\n\t"
+ __ASM_SEL(.long, .quad) " 1b\n\t"
+ ".popsection\n\t"
+ : : __ASM_SEL_RAW(a, D) (&val) : "memory");
+
+ BUG_ON(val != 1);
+
+ unregister_die_notifier(&int3_exception_nb);
+}
+
void __init alternative_instructions(void)
{
- /* The patching is not fully atomic, so try to avoid local interruptions
- that might execute the to be patched code.
- Other CPUs are not running. */
+ int3_selftest();
+
+ /*
+ * The patching is not fully atomic, so try to avoid local
+ * interruptions that might execute the to be patched code.
+ * Other CPUs are not running.
+ */
stop_nmi();
/*
@@ -644,10 +716,11 @@ void __init alternative_instructions(void)
_text, _etext);
}
- if (!uniproc_patched || num_possible_cpus() == 1)
+ if (!uniproc_patched || num_possible_cpus() == 1) {
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
+ }
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
@@ -848,81 +921,133 @@ static void do_sync_core(void *info)
sync_core();
}
-static bool bp_patching_in_progress;
-static void *bp_int3_handler, *bp_int3_addr;
+static struct bp_patching_desc {
+ struct text_poke_loc *vec;
+ int nr_entries;
+} bp_patching;
+
+static int patch_cmp(const void *key, const void *elt)
+{
+ struct text_poke_loc *tp = (struct text_poke_loc *) elt;
+
+ if (key < tp->addr)
+ return -1;
+ if (key > tp->addr)
+ return 1;
+ return 0;
+}
+NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
+ struct text_poke_loc *tp;
+ unsigned char int3 = 0xcc;
+ void *ip;
+
/*
* Having observed our INT3 instruction, we now must observe
- * bp_patching_in_progress.
+ * bp_patching.nr_entries.
*
- * in_progress = TRUE INT3
+ * nr_entries != 0 INT3
* WMB RMB
- * write INT3 if (in_progress)
+ * write INT3 if (nr_entries)
*
- * Idem for bp_int3_handler.
+ * Idem for other elements in bp_patching.
*/
smp_rmb();
- if (likely(!bp_patching_in_progress))
+ if (likely(!bp_patching.nr_entries))
return 0;
- if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ if (user_mode(regs))
return 0;
- /* set up the specified breakpoint handler */
- regs->ip = (unsigned long) bp_int3_handler;
+ /*
+ * Discount the sizeof(int3). See text_poke_bp_batch().
+ */
+ ip = (void *) regs->ip - sizeof(int3);
+
+ /*
+ * Skip the binary search if there is a single member in the vector.
+ */
+ if (unlikely(bp_patching.nr_entries > 1)) {
+ tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+ sizeof(struct text_poke_loc),
+ patch_cmp);
+ if (!tp)
+ return 0;
+ } else {
+ tp = bp_patching.vec;
+ if (tp->addr != ip)
+ return 0;
+ }
+
+ /* set up the specified breakpoint detour */
+ regs->ip = (unsigned long) tp->detour;
return 1;
}
NOKPROBE_SYMBOL(poke_int3_handler);
/**
- * text_poke_bp() -- update instructions on live kernel on SMP
- * @addr: address to patch
- * @opcode: opcode of new instruction
- * @len: length to copy
- * @handler: address to jump to when the temporary breakpoint is hit
+ * text_poke_bp_batch() -- update instructions on live kernel on SMP
+ * @tp: vector of instructions to patch
+ * @nr_entries: number of entries in the vector
*
* Modify multi-byte instruction by using int3 breakpoint on SMP.
* We completely avoid stop_machine() here, and achieve the
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - add a int3 trap to the address that will be patched
+ * - For each entry in the vector:
+ * - add a int3 trap to the address that will be patched
* - sync cores
- * - update all but the first byte of the patched range
+ * - For each entry in the vector:
+ * - update all but the first byte of the patched range
* - sync cores
- * - replace the first byte (int3) by the first byte of
- * replacing opcode
+ * - For each entry in the vector:
+ * - replace the first byte (int3) by the first byte of
+ * replacing opcode
* - sync cores
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
+ int patched_all_but_first = 0;
unsigned char int3 = 0xcc;
-
- bp_int3_handler = handler;
- bp_int3_addr = (u8 *)addr + sizeof(int3);
- bp_patching_in_progress = true;
+ unsigned int i;
lockdep_assert_held(&text_mutex);
+ bp_patching.vec = tp;
+ bp_patching.nr_entries = nr_entries;
+
/*
* Corresponding read barrier in int3 notifier for making sure the
- * in_progress and handler are correctly ordered wrt. patching.
+ * nr_entries and handler are correctly ordered wrt. patching.
*/
smp_wmb();
- text_poke(addr, &int3, sizeof(int3));
+ /*
+ * First step: add a int3 trap to the address that will be patched.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, &int3, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
- if (len - sizeof(int3) > 0) {
- /* patch all but the first byte */
- text_poke((char *)addr + sizeof(int3),
- (const char *) opcode + sizeof(int3),
- len - sizeof(int3));
+ /*
+ * Second step: update all but the first byte of the patched range.
+ */
+ for (i = 0; i < nr_entries; i++) {
+ if (tp[i].len - sizeof(int3) > 0) {
+ text_poke((char *)tp[i].addr + sizeof(int3),
+ (const char *)tp[i].opcode + sizeof(int3),
+ tp[i].len - sizeof(int3));
+ patched_all_but_first++;
+ }
+ }
+
+ if (patched_all_but_first) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -931,14 +1056,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
on_each_cpu(do_sync_core, NULL, 1);
}
- /* patch the first byte */
- text_poke(addr, opcode, sizeof(int3));
+ /*
+ * Third step: replace the first byte (int3) by the first byte of
+ * replacing opcode.
+ */
+ for (i = 0; i < nr_entries; i++)
+ text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
*/
- bp_patching_in_progress = false;
+ bp_patching.vec = NULL;
+ bp_patching.nr_entries = 0;
}
+/**
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @handler: address to jump to when the temporary breakpoint is hit
+ *
+ * Update a single instruction with the vector in the stack, avoiding
+ * dynamically allocated memory. This function should be used when it is
+ * not possible to allocate memory.
+ */
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+{
+ struct text_poke_loc tp = {
+ .detour = handler,
+ .addr = addr,
+ .len = len,
+ };
+
+ if (len > POKE_MAX_OPCODE_SIZE) {
+ WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
+ return;
+ }
+
+ memcpy((void *)tp.opcode, opcode, len);
+
+ text_poke_bp_batch(&tp, 1);
+}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 002aedc69393..d63e63b7d1d9 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -72,7 +72,7 @@ static const struct pci_device_id hygon_root_ids[] = {
{}
};
-const struct pci_device_id hygon_nb_misc_ids[] = {
+static const struct pci_device_id hygon_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 85be316665b4..1bd91cb7b320 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -195,7 +195,7 @@ static struct resource lapic_resource = {
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};
-unsigned int lapic_timer_frequency = 0;
+unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
@@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
- __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
+ __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
return 0;
}
@@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
static int __init lapic_init_clockevent(void)
{
- if (!lapic_timer_frequency)
+ if (!lapic_timer_period)
return -1;
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
TICK_NSEC, lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
@@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void)
return 0;
}
+bool __init apic_needs_pit(void)
+{
+ /*
+ * If the frequencies are not known, PIT is required for both TSC
+ * and apic timer calibration.
+ */
+ if (!tsc_khz || !cpu_khz)
+ return true;
+
+ /* Is there an APIC at all? */
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return true;
+
+ /* Deadline timer is based on TSC so no further PIT action required */
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+
+ /* APIC timer disabled? */
+ if (disable_apic_timer)
+ return true;
+ /*
+ * The APIC timer frequency is known already, no PIT calibration
+ * required. If unknown, let the PIT be initialized.
+ */
+ return lapic_timer_period == 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
+ lapic_timer_period);
/*
* Direct calibration methods must have an always running
* local APIC timer, no need for broadcast timer.
@@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
- lapic_timer_frequency);
+ lapic_timer_period);
if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
"%u.%04u MHz.\n",
- lapic_timer_frequency / (1000000 / HZ),
- lapic_timer_frequency % (1000000 / HZ));
+ lapic_timer_period / (1000000 / HZ),
+ lapic_timer_period % (1000000 / HZ));
/*
* Do a sanity check on the APIC calibration result
*/
- if (lapic_timer_frequency < (1000000 / HZ)) {
+ if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
@@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void)
apic_write(APIC_LVT1, value);
}
+static void __init apic_bsp_setup(bool upmode);
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
@@ -2041,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
entering_irq();
trace_spurious_apic_entry(vector);
+ inc_irq_stat(irq_spurious_count);
+
+ /*
+ * If this is a spurious interrupt then do not acknowledge
+ */
+ if (vector == SPURIOUS_APIC_VECTOR) {
+ /* See SDM vol 3 */
+ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+ smp_processor_id());
+ goto out;
+ }
+
/*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
+ * If it is a vectored one, verify it's set in the ISR. If set,
+ * acknowledge it.
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
- if (v & (1 << (vector & 0x1f)))
+ if (v & (1 << (vector & 0x1f))) {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ vector, smp_processor_id());
ack_APIC_irq();
-
- inc_irq_stat(irq_spurious_count);
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
- "should never happen.\n", vector, smp_processor_id());
-
+ } else {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ vector, smp_processor_id());
+ }
+out:
trace_spurious_apic_exit(vector);
exiting_irq();
}
@@ -2416,11 +2456,8 @@ static void __init apic_bsp_up_setup(void)
/**
* apic_bsp_setup - Setup function for local apic and io-apic
* @upmode: Force UP mode (for APIC_init_uniprocessor)
- *
- * Returns:
- * apic_id of BSP APIC
*/
-void __init apic_bsp_setup(bool upmode)
+static void __init apic_bsp_setup(bool upmode)
{
connect_bsp_APIC();
if (upmode)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index bf083c3f1d73..bbdca603f94a 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
int cpu = smp_processor_id();
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
@@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector)
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53aa234a6803..c7bb6c69f21c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -58,6 +58,7 @@
#include <asm/acpi.h>
#include <asm/dma.h>
#include <asm/timer.h>
+#include <asm/time.h>
#include <asm/i8259.h>
#include <asm/setup.h>
#include <asm/irq_remapping.h>
@@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
return ret;
}
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ struct mp_chip_data *mcd = irqd->chip_data;
+ struct IO_APIC_route_entry rentry;
+ struct irq_pin_list *p;
+
+ if (which != IRQCHIP_STATE_ACTIVE)
+ return -EINVAL;
+
+ *state = false;
+ raw_spin_lock(&ioapic_lock);
+ for_each_irq_pin(p, mcd->irq_2_pin) {
+ rentry = __ioapic_read_entry(p->apic, p->pin);
+ /*
+ * The remote IRR is only valid in level trigger mode. It's
+ * meaning is undefined for edge triggered interrupts and
+ * irrelevant because the IO-APIC treats them as fire and
+ * forget.
+ */
+ if (rentry.irr && rentry.trigger) {
+ *state = true;
+ break;
+ }
+ }
+ raw_spin_unlock(&ioapic_lock);
+ return 0;
+}
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_eoi = ioapic_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_eoi = ioapic_ir_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -2083,6 +2130,9 @@ static inline void __init check_timer(void)
unsigned long flags;
int no_pin1 = 0;
+ if (!global_clock_event)
+ return;
+
local_irq_save(flags);
/*
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index dad0dd759de2..7f7533462474 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
return d;
}
-int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
+int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
int dev_num)
{
struct irq_alloc_info info;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.hpet_data = dev;
+ info.hpet_data = hc;
info.hpet_id = hpet_dev_id(domain);
info.hpet_index = dev_num;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index e7cb78aed644..fdacb864c3dd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -340,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd)
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
apicd->prev_cpu);
- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
apicd->vector = 0;
@@ -349,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd)
if (!vector)
return;
- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 7685444a106b..609e499387a1 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
cpumask_copy(tmpmsk, mask);
/* If IPI should not be sent to self, clear current CPU */
if (apic_dest != APIC_DEST_ALLINC)
- cpumask_clear_cpu(smp_processor_id(), tmpmsk);
+ __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..da64452584b0 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,7 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5102bf7c8192..d7a1e5a9331c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -24,6 +24,7 @@ obj-y += match.o
obj-y += bugs.o
obj-y += aperfmperf.o
obj-y += cpuid-deps.o
+obj-y += umwait.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
+obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
@@ -47,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+obj-$(CONFIG_ACRN_GUEST) += acrn.o
ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
@@ -54,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@
cpufeature = $(src)/../../include/asm/cpufeatures.h
-targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
endif
-clean-files += capflags.c
+targets += capflags.c
diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c
new file mode 100644
index 000000000000..676022e71791
--- /dev/null
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN detection support
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ *
+ * Jason Chen CJ <jason.cj.chen@intel.com>
+ * Zhao Yakui <yakui.zhao@intel.com>
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <asm/acrn.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+#include <asm/hypervisor.h>
+#include <asm/irq_regs.h>
+
+static uint32_t __init acrn_detect(void)
+{
+ return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0);
+}
+
+static void __init acrn_init_platform(void)
+{
+ /* Setup the IDT for ACRN hypervisor callback */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector);
+}
+
+static bool acrn_x2apic_available(void)
+{
+ /*
+ * x2apic is not supported for now. Future enablement will have to check
+ * X86_FEATURE_X2APIC to determine whether x2apic is supported in the
+ * guest.
+ */
+ return false;
+}
+
+static void (*acrn_intr_handler)(void);
+
+__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ /*
+ * The hypervisor requires that the APIC EOI should be acked.
+ * If the APIC EOI is not acked, the APIC ISR bit for the
+ * HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it
+ * will block the interrupt whose vector is lower than
+ * HYPERVISOR_CALLBACK_VECTOR.
+ */
+ entering_ack_irq();
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (acrn_intr_handler)
+ acrn_intr_handler();
+
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
+const __initconst struct hypervisor_x86 x86_hyper_acrn = {
+ .name = "ACRN",
+ .detect = acrn_detect,
+ .type = X86_HYPER_ACRN,
+ .init.init_platform = acrn_init_platform,
+ .init.x2apic_available = acrn_x2apic_available,
+};
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index e71a6ff8a67e..e2f319dc992d 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -13,6 +13,7 @@
#include <linux/percpu.h>
#include <linux/cpufreq.h>
#include <linux/smp.h>
+#include <linux/sched/isolation.h>
#include "cpu.h"
@@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
return per_cpu(samples.khz, cpu);
}
@@ -101,9 +105,12 @@ void arch_freq_prepare_all(void)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ continue;
if (!aperfmperf_snapshot_cpu(cpu, now, false))
wait = true;
+ }
if (wait)
msleep(APERFMPERF_REFRESH_DELAY_MS);
@@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
+ if (!housekeeping_cpu(cpu, HK_FLAG_MISC))
+ return 0;
+
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
return per_cpu(samples.khz, cpu);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 395d46f78582..c7503be92f35 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
- } else if (c->x86 == 0x17 &&
- c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2c57fffebf9b..309b6b9b49d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,25 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+EXPORT_SYMBOL(cr_pinning);
+unsigned long cr4_pinned_bits __ro_after_init;
+EXPORT_SYMBOL(cr4_pinned_bits);
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+ unsigned long mask;
+
+ mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ static_key_enable(&cr_pinning.key);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -801,6 +820,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
}
}
+static void init_cqm(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
+ c->x86_cache_max_rmid = -1;
+ c->x86_cache_occ_scale = -1;
+ return;
+ }
+
+ /* will be overridden if occupancy monitoring exists */
+ c->x86_cache_max_rmid = cpuid_ebx(0xf);
+
+ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
+ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* QoS sub-leaf, EAX=0Fh, ECX=1 */
+ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_cache_max_rmid = ecx;
+ c->x86_cache_occ_scale = ebx;
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -823,6 +866,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
c->x86_capability[CPUID_7_EDX] = edx;
+
+ /* Check valid sub-leaf index before accessing it */
+ if (eax >= 1) {
+ cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_7_1_EAX] = eax;
+ }
}
/* Extended state features: level 0x0000000d */
@@ -832,33 +881,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* Additional Intel-defined flags: level 0x0000000F */
- if (c->cpuid_level >= 0x0000000F) {
-
- /* QoS sub-leaf, EAX=0Fh, ECX=0 */
- cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_0_EDX] = edx;
-
- if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
- /* will be overridden if occupancy monitoring exists */
- c->x86_cache_max_rmid = ebx;
-
- /* QoS sub-leaf, EAX=0Fh, ECX=1 */
- cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_F_1_EDX] = edx;
-
- if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
- ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
- (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
- c->x86_cache_max_rmid = ecx;
- c->x86_cache_occ_scale = ebx;
- }
- } else {
- c->x86_cache_max_rmid = -1;
- c->x86_cache_occ_scale = -1;
- }
- }
-
/* AMD-defined flags: level 0x80000001 */
eax = cpuid_eax(0x80000000);
c->extended_cpuid_level = eax;
@@ -889,6 +911,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
init_speculation_control(c);
+ init_cqm(c);
/*
* Clear/Set all flags overridden by options, after probe.
@@ -1299,6 +1322,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
cpu, apicid, c->initial_apicid);
}
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
+ BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
c->logical_proc_id = 0;
#endif
@@ -1464,6 +1488,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu();
#endif
cpu_detect_tlb(&boot_cpu_data);
+ setup_cr_pinning();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 2c0bd38a44ab..b5353244749b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,6 +20,7 @@ struct cpuid_dep {
* but it's difficult to tell that to the init reference checker.
*/
static const struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_FXSR, X86_FEATURE_FPU },
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
@@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMX, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
@@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{}
};
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 479ca4728de0..87e39ad8d873 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -32,6 +32,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_pv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
extern const struct hypervisor_x86 x86_hyper_jailhouse;
+extern const struct hypervisor_x86 x86_hyper_acrn;
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
@@ -49,6 +50,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#ifdef CONFIG_JAILHOUSE_GUEST
&x86_hyper_jailhouse,
#endif
+#ifdef CONFIG_ACRN_GUEST
+ &x86_hyper_acrn,
+#endif
};
enum x86_hypervisor_type x86_hyper_type;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f17c1a714779..8d6d92ebeb54 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+/*
+ * Processors which have self-snooping capability can handle conflicting
+ * memory type across CPUs by snooping its own cache. However, there exists
+ * CPU models in which having conflicting memory types still leads to
+ * unpredictable behavior, machine check errors, or hangs. Clear this
+ * feature to prevent its use on machines with known erratas.
+ */
+static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
+{
+ switch (c->x86_model) {
+ case INTEL_FAM6_CORE_YONAH:
+ case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_G:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_SANDYBRIDGE:
+ setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
+ }
+}
+
static bool ring3mwait_disabled __read_mostly;
static int __init ring3mwait_disable(char *__unused)
@@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
}
check_mpx_erratum(c);
+ check_memory_type_self_snoop_errata(c);
/*
* Get the number of SMT siblings early from the extended topology
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 785050af85e5..6ea7fdc82f3c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = {
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
-static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
-{
- [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
-};
-
static const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
+/* Map of banks that have more than MCA_MISC0 available. */
+static DEFINE_PER_CPU(u32, smca_misc_banks_map);
+
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
+static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
+{
+ u32 low, high;
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return;
+
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
+ return;
+
+ if (low & MASK_BLKPTR_LO)
+ per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
+
+}
+
static void smca_configure(unsigned int bank, unsigned int cpu)
{
unsigned int i, hwid_mcatype;
@@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
wrmsr(smca_config, low, high);
}
+ smca_set_misc_banks_map(bank, cpu);
+
/* Return early if this bank was already initialized. */
if (smca_banks[bank].hwid)
return;
@@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int bank, unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
- u32 low, high;
- u32 addr = 0;
-
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
- /* Check our cache first: */
- if (smca_bank_addrs[bank][block] != -1)
- return smca_bank_addrs[bank][block];
-
- /*
- * For SMCA enabled processors, BLKPTR field of the first MISC register
- * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
- */
- if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- goto out;
-
- if (!(low & MCI_CONFIG_MCAX))
- goto out;
-
- if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
+ return 0;
-out:
- smca_bank_addrs[bank][block] = addr;
- return addr;
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
}
static u32 get_block_address(u32 current_addr, u32 low, u32 high,
- unsigned int bank, unsigned int block)
+ unsigned int bank, unsigned int block,
+ unsigned int cpu)
{
u32 addr = 0, offset = 0;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return addr;
if (mce_flags.smca)
- return smca_get_block_address(bank, block);
+ return smca_get_block_address(bank, block, cpu);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
+ u32 low = 0, high = 0, address = 0;
int offset = -1;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
disable_err_thresholding(c, bank);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block, cpu);
if (!address)
break;
@@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank)
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
log_error_deferred(bank);
}
@@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void)
struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
unsigned int bank, cpu = smp_processor_id();
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
@@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
u32 low, high;
int err;
- if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
return 0;
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block, cpu);
if (!address)
return 0;
@@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu)
{
unsigned int bank;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
@@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;
- bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
+ bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;
per_cpu(threshold_banks, cpu) = bp;
- for (bank = 0; bank < mca_cfg.banks; ++bank) {
+ for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 282916f3b8d8..743370ee4983 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
DEFINE_PER_CPU(unsigned, mce_exception_count);
-struct mce_bank *mce_banks __read_mostly;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
+
+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+ bool init; /* initialise bank? */
+};
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
+#define ATTR_LEN 16
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank_dev {
+ struct device_attribute attr; /* device attribute */
+ char attrname[ATTR_LEN]; /* attribute name */
+ u8 bank; /* bank number */
+};
+static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
+
struct mce_vendor_flags mce_flags __read_mostly;
struct mca_config mca_cfg __read_mostly = {
@@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
bool error_seen = false;
struct mce m;
int i;
@@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (flags & MCP_TIMESTAMP)
m.tsc = rdtsc();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
char *tmp;
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
m->status = mce_rdmsrl(msr_ops.status(i));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (test_bit(i, toclear))
mce_wrmsrl(msr_ops.status(i), 0);
}
@@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i;
- for (i = 0; i < cfg->banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1330,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
local_irq_enable();
if (kill_it || do_memory_failure(&m))
- force_sig(SIGBUS, current);
+ force_sig(SIGBUS);
local_irq_disable();
ist_end_non_atomic();
} else {
@@ -1463,27 +1481,29 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int __mcheck_cpu_mce_banks_init(void)
+static void __mcheck_cpu_mce_banks_init(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u8 n_banks = this_cpu_read(mce_num_banks);
int i;
- mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
- if (!mce_banks)
- return -ENOMEM;
-
- for (i = 0; i < MAX_NR_BANKS; i++) {
+ for (i = 0; i < n_banks; i++) {
struct mce_bank *b = &mce_banks[i];
+ /*
+ * Init them all, __mcheck_cpu_apply_quirks() is going to apply
+ * the required vendor quirks before
+ * __mcheck_cpu_init_clear_banks() does the final bank setup.
+ */
b->ctl = -1ULL;
b->init = 1;
}
- return 0;
}
/*
* Initialize Machine Checks for a CPU.
*/
-static int __mcheck_cpu_cap_init(void)
+static void __mcheck_cpu_cap_init(void)
{
u64 cap;
u8 b;
@@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (WARN_ON_ONCE(b > MAX_NR_BANKS))
+
+ if (b > MAX_NR_BANKS) {
+ pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
+ smp_processor_id(), MAX_NR_BANKS, b);
b = MAX_NR_BANKS;
+ }
- mca_cfg.banks = max(mca_cfg.banks, b);
+ this_cpu_write(mce_num_banks, b);
- if (!mce_banks) {
- int err = __mcheck_cpu_mce_banks_init();
- if (err)
- return err;
- }
+ __mcheck_cpu_mce_banks_init();
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void)
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
-
- return 0;
}
static void __mcheck_cpu_init_generic(void)
@@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void)
static void __mcheck_cpu_init_clear_banks(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1549,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void)
}
/*
+ * Do a final check to see if there are any unused/RAZ banks.
+ *
+ * This must be done after the banks have been initialized and any quirks have
+ * been applied.
+ *
+ * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs.
+ * Otherwise, a user who disables a bank will not be able to re-enable it
+ * without a system reboot.
+ */
+static void __mcheck_cpu_check_banks(void)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+ u64 msrval;
+ int i;
+
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
+ struct mce_bank *b = &mce_banks[i];
+
+ if (!b->init)
+ continue;
+
+ rdmsrl(msr_ops.ctl(i), msrval);
+ b->init = !!msrval;
+ }
+}
+
+/*
* During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
* EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
* Vol 3B Table 15-20). But this confuses both the code that determines
@@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && cfg->banks > 4) {
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6 && cfg->banks > 0)
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].ctl = 0;
/*
@@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
+ if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
mce_banks[0].init = 0;
/*
@@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
if (!mce_available(c))
return;
- if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
+ __mcheck_cpu_cap_init();
+
+ if (__mcheck_cpu_apply_quirks(c) < 0) {
mca_cfg.disabled = 1;
return;
}
@@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_clear_banks();
+ __mcheck_cpu_check_banks();
__mcheck_cpu_setup_timer();
}
@@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg)
void mce_disable_bank(int bank)
{
- if (bank >= mca_cfg.banks) {
+ if (bank >= this_cpu_read(mce_num_banks)) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
@@ -1949,9 +1999,10 @@ int __init mcheck_init(void)
*/
static void mce_disable_error_reporting(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = {
DEFINE_PER_CPU(struct device *, mce_device);
-static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
+static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
{
- return container_of(attr, struct mce_bank, attr);
+ return container_of(attr, struct mce_bank_dev, attr);
}
static ssize_t show_bank(struct device *s, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
+
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ return sprintf(buf, "%llx\n", b->ctl);
}
static ssize_t set_bank(struct device *s, struct device_attribute *attr,
const char *buf, size_t size)
{
+ u8 bank = attr_to_bank(attr)->bank;
+ struct mce_bank *b;
u64 new;
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
- attr_to_bank(attr)->ctl = new;
+ if (bank >= per_cpu(mce_num_banks, s->id))
+ return -EINVAL;
+
+ b = &per_cpu(mce_banks_array, s->id)[bank];
+
+ if (!b->init)
+ return -ENODEV;
+
+ b->ctl = new;
mce_restart();
return size;
@@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev)
kfree(dev);
}
-/* Per cpu device init. All of the cpus still share the same ctrl bank: */
+/* Per CPU device init. All of the CPUs still share the same bank device: */
static int mce_device_create(unsigned int cpu)
{
struct device *dev;
@@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
- for (j = 0; j < mca_cfg.banks; j++) {
- err = device_create_file(dev, &mce_banks[j].attr);
+ for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
+ err = device_create_file(dev, &mce_bank_devs[j].attr);
if (err)
goto error2;
}
@@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- device_remove_file(dev, &mce_banks[j].attr);
+ device_remove_file(dev, &mce_bank_devs[j].attr);
error:
while (--i >= 0)
device_remove_file(dev, mce_device_attrs[i]);
@@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
- for (i = 0; i < mca_cfg.banks; i++)
- device_remove_file(dev, &mce_banks[i].attr);
+ for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
+ device_remove_file(dev, &mce_bank_devs[i].attr);
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
@@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void)
static void mce_reenable_cpu(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;
if (!mce_available(raw_cpu_ptr(&cpu_info)))
@@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void)
if (!cpuhp_tasks_frozen)
cmci_reenable();
- for (i = 0; i < mca_cfg.banks; i++) {
+ for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void)
{
int i;
- for (i = 0; i < mca_cfg.banks; i++) {
- struct mce_bank *b = &mce_banks[i];
+ for (i = 0; i < MAX_NR_BANKS; i++) {
+ struct mce_bank_dev *b = &mce_bank_devs[i];
struct device_attribute *a = &b->attr;
+ b->bank = i;
+
sysfs_attr_init(&a->attr);
a->attr.name = b->attrname;
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
@@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
"%llu\n");
-static int __init mcheck_debugfs_init(void)
+static void __init mcheck_debugfs_init(void)
{
- struct dentry *dmce, *ffake_panic;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- return -ENOMEM;
- ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
- NULL, &fake_panic_fops);
- if (!ffake_panic)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
+ &fake_panic_fops);
}
#else
-static int __init mcheck_debugfs_init(void) { return -EINVAL; }
+static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
@@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
- pr_info("Using %d MCE banks\n", mca_cfg.banks);
-
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5d108f70f315..1f30117b24ba 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -645,7 +645,6 @@ static const struct file_operations readme_fops = {
static struct dfs_node {
char *name;
- struct dentry *d;
const struct file_operations *fops;
umode_t perm;
} dfs_fls[] = {
@@ -659,49 +658,23 @@ static struct dfs_node {
{ .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
};
-static int __init debugfs_init(void)
+static void __init debugfs_init(void)
{
unsigned int i;
dfs_inj = debugfs_create_dir("mce-inject", NULL);
- if (!dfs_inj)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
- dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
- dfs_fls[i].perm,
- dfs_inj,
- &i_mce,
- dfs_fls[i].fops);
-
- if (!dfs_fls[i].d)
- goto err_dfs_add;
- }
-
- return 0;
-
-err_dfs_add:
- while (i-- > 0)
- debugfs_remove(dfs_fls[i].d);
- debugfs_remove(dfs_inj);
- dfs_inj = NULL;
-
- return -ENODEV;
+ for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
+ debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
+ &i_mce, dfs_fls[i].fops);
}
static int __init inject_init(void)
{
- int err;
-
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
- err = debugfs_init();
- if (err) {
- free_cpumask_var(mce_inject_cpumask);
- return err;
- }
+ debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
mce_register_injector_chain(&inject_nb);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a34b55baa7aa..43031db429d2 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -22,17 +22,8 @@ enum severity_level {
extern struct blocking_notifier_head x86_mce_decoder_chain;
-#define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
-/* One object for each MCE bank, shared by all CPUs */
-struct mce_bank {
- u64 ctl; /* subevents to enable */
- unsigned char init; /* initialise bank? */
- struct device_attribute attr; /* device attribute */
- char attrname[ATTR_LEN]; /* attribute name */
-};
-
struct mce_evt_llist {
struct llist_node llnode;
struct mce mce;
@@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
-extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
@@ -128,7 +118,6 @@ struct mca_config {
bios_cmci_threshold : 1,
__reserved : 59;
- u8 banks;
s8 bootlog;
int tolerant;
int monarch_timeout;
@@ -137,6 +126,7 @@ struct mca_config {
};
extern struct mca_config mca_cfg;
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
struct mce_vendor_flags {
/*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 2d33a26d257e..210f1f5db5f7 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = {
static int __init severities_debugfs_init(void)
{
- struct dentry *dmce, *fsev;
+ struct dentry *dmce;
dmce = mce_get_debugfs_dir();
- if (!dmce)
- goto err_out;
-
- fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
- &severities_coverage_fops);
- if (!fsev)
- goto err_out;
+ debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+ &severities_coverage_fops);
return 0;
-
-err_out:
- return -ENOMEM;
}
late_initcall(severities_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index d0dfb892c72f..aed45b8895d5 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -4,6 +4,8 @@
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
+set -e
+
IN=$1
OUT=$2
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 7df29f08871b..062f77279ce3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -17,6 +17,7 @@
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
@@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
+ add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
exiting_irq();
@@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
{
*vector = HYPERV_STIMER0_VECTOR;
- *irq = 0; /* Unused on x86/x64 */
+ *irq = -1; /* Unused on x86/x64 */
hv_stimer0_handler = handler;
return 0;
}
@@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
- lapic_timer_frequency = hv_lapic_frequency;
+ lapic_timer_period = hv_lapic_frequency;
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ lapic_timer_period);
}
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9356c1c9024d..aa5c064a6a22 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
- wbinvd();
+
+ /*
+ * Cache flushing is the most time-consuming step when programming
+ * the MTRRs. Fortunately, as per the Intel Software Development
+ * Manual, we can skip it if the processor supports cache self-
+ * snooping.
+ */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (boot_cpu_has(X86_FEATURE_PGE)) {
@@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
- wbinvd();
+
+ /* Again, only flush caches if we have to. */
+ if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
+ wbinvd();
}
static void post_set(void) __releases(set_atomicity_lock)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 604c0e3bcc83..d7623e1b927d 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
#else
register unsigned int line_size asm("esi");
register unsigned int size asm("edi");
-#ifdef CONFIG_X86_64
- register void *mem_r asm("rbx");
-#else
- register void *mem_r asm("ebx");
-#endif /* CONFIG_X86_64 */
+ register void *mem_r asm(_ASM_BX);
#endif /* CONFIG_KASAN */
/*
@@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2f4824793798..bf3034994754 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2488,21 +2488,21 @@ out_destroy:
* modification to the CBM if the default does not satisfy the
* requirements.
*/
-static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
+static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
{
- unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
+ unsigned long val = _val;
- if (val == 0)
- return;
+ if (!val)
+ return 0;
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
- *_val = (u32)val;
+ return (u32)val;
}
/*
@@ -2560,7 +2560,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
* Force the initial CBM to be valid, user can
* modify the CBM based on system availability.
*/
- cbm_ensure_valid(&d->new_ctrl, r);
+ d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r);
/*
* Assign the u32 CBM to an unsigned long to ensure that
* bitmap_weight() does not access out-of-bound memory.
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 94aa1c72ca98..adf9b71386ef 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -26,6 +26,10 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
+ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
+ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8f6c784141d1..ee48c3fc8a65 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -15,33 +15,66 @@
/* leaf 0xb SMT level */
#define SMT_LEVEL 0
-/* leaf 0xb sub-leaf types */
+/* extended topology sub-leaf types */
#define INVALID_TYPE 0
#define SMT_TYPE 1
#define CORE_TYPE 2
+#define DIE_TYPE 5
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
-int detect_extended_topology_early(struct cpuinfo_x86 *c)
-{
#ifdef CONFIG_SMP
+unsigned int __max_die_per_package __read_mostly = 1;
+EXPORT_SYMBOL(__max_die_per_package);
+
+/*
+ * Check if given CPUID extended toplogy "leaf" is implemented
+ */
+static int check_extended_topology_leaf(int leaf)
+{
unsigned int eax, ebx, ecx, edx;
- if (c->cpuid_level < 0xb)
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+
+ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return -1;
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ return 0;
+}
+/*
+ * Return best CPUID Extended Toplogy Leaf supported
+ */
+static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
+{
+ if (c->cpuid_level >= 0x1f) {
+ if (check_extended_topology_leaf(0x1f) == 0)
+ return 0x1f;
+ }
- /*
- * check if the cpuid leaf 0xb is actually implemented.
- */
- if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
+ if (c->cpuid_level >= 0xb) {
+ if (check_extended_topology_leaf(0xb) == 0)
+ return 0xb;
+ }
+
+ return -1;
+}
+#endif
+
+int detect_extended_topology_early(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+ unsigned int eax, ebx, ecx, edx;
+ int leaf;
+
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
/*
* initial apic id, which also represents 32-bit extended x2apic id.
*/
@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
}
/*
- * Check for extended topology enumeration cpuid leaf 0xb and if it
+ * Check for extended topology enumeration cpuid leaf, and if it
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
- unsigned int ht_mask_width, core_plus_mask_width;
+ unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
+ unsigned int die_select_mask, die_level_siblings;
+ int leaf;
- if (detect_extended_topology_early(c) < 0)
+ leaf = detect_extended_topology_leaf(c);
+ if (leaf < 0)
return -1;
/*
* Populate HT related information from sub-leaf level 0.
*/
- cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ c->initial_apicid = edx;
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
do {
- cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+ cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
/*
* Check for the Core type in the implemented sub leaves.
@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
- break;
+ die_level_siblings = core_level_siblings;
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
+ }
+ if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
+ die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
sub_index++;
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
-
- c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
- & core_select_mask;
- c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+ die_select_mask = (~(-1 << die_plus_mask_width)) >>
+ core_plus_mask_width;
+
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
+ ht_mask_width) & core_select_mask;
+ c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
+ core_plus_mask_width) & die_select_mask;
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
+ die_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+ __max_die_per_package = (die_level_siblings / core_level_siblings);
#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
new file mode 100644
index 000000000000..6a204e7336c1
--- /dev/null
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/syscore_ops.h>
+#include <linux/suspend.h>
+#include <linux/cpu.h>
+
+#include <asm/msr.h>
+
+#define UMWAIT_C02_ENABLE 0
+
+#define UMWAIT_CTRL_VAL(max_time, c02_disable) \
+ (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \
+ ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE))
+
+/*
+ * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default,
+ * umwait max time is 100000 in TSC-quanta and C0.2 is enabled
+ */
+static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
+
+/*
+ * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
+ * the sysfs write functions.
+ */
+static DEFINE_MUTEX(umwait_lock);
+
+static void umwait_update_control_msr(void * unused)
+{
+ lockdep_assert_irqs_disabled();
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0);
+}
+
+/*
+ * The CPU hotplug callback sets the control MSR to the global control
+ * value.
+ *
+ * Disable interrupts so the read of umwait_control_cached and the WRMSR
+ * are protected against a concurrent sysfs write. Otherwise the sysfs
+ * write could update the cached value after it had been read on this CPU
+ * and issue the IPI before the old value had been written. The IPI would
+ * interrupt, write the new value and after return from IPI the previous
+ * value would be written by this CPU.
+ *
+ * With interrupts disabled the upcoming CPU either sees the new control
+ * value or the IPI is updating this CPU to the new control value after
+ * interrupts have been reenabled.
+ */
+static int umwait_cpu_online(unsigned int cpu)
+{
+ local_irq_disable();
+ umwait_update_control_msr(NULL);
+ local_irq_enable();
+ return 0;
+}
+
+/*
+ * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
+ * is the only active CPU at this time. The MSR is set up on the APs via the
+ * CPU hotplug callback.
+ *
+ * This function is invoked on resume from suspend and hibernation. On
+ * resume from suspend the restore should be not required, but we neither
+ * trust the firmware nor does it matter if the same value is written
+ * again.
+ */
+static void umwait_syscore_resume(void)
+{
+ umwait_update_control_msr(NULL);
+}
+
+static struct syscore_ops umwait_syscore_ops = {
+ .resume = umwait_syscore_resume,
+};
+
+/* sysfs interface */
+
+/*
+ * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
+ * Otherwise, C0.2 is enabled.
+ */
+static inline bool umwait_ctrl_c02_enabled(u32 ctrl)
+{
+ return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE);
+}
+
+static inline u32 umwait_ctrl_max_time(u32 ctrl)
+{
+ return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+}
+
+static inline void umwait_update_control(u32 maxtime, bool c02_enable)
+{
+ u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK;
+
+ if (!c02_enable)
+ ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE;
+
+ WRITE_ONCE(umwait_control_cached, ctrl);
+ /* Propagate to all CPUs */
+ on_each_cpu(umwait_update_control_msr, NULL, 1);
+}
+
+static ssize_t
+enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl));
+}
+
+static ssize_t enable_c02_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool c02_enable;
+ u32 ctrl;
+ int ret;
+
+ ret = kstrtobool(buf, &c02_enable);
+ if (ret)
+ return ret;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (c02_enable != umwait_ctrl_c02_enabled(ctrl))
+ umwait_update_control(ctrl, c02_enable);
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(enable_c02);
+
+static ssize_t
+max_time_show(struct device *kobj, struct device_attribute *attr, char *buf)
+{
+ u32 ctrl = READ_ONCE(umwait_control_cached);
+
+ return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl));
+}
+
+static ssize_t max_time_store(struct device *kobj,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 max_time, ctrl;
+ int ret;
+
+ ret = kstrtou32(buf, 0, &max_time);
+ if (ret)
+ return ret;
+
+ /* bits[1:0] must be zero */
+ if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK)
+ return -EINVAL;
+
+ mutex_lock(&umwait_lock);
+
+ ctrl = READ_ONCE(umwait_control_cached);
+ if (max_time != umwait_ctrl_max_time(ctrl))
+ umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl));
+
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(max_time);
+
+static struct attribute *umwait_attrs[] = {
+ &dev_attr_enable_c02.attr,
+ &dev_attr_max_time.attr,
+ NULL
+};
+
+static struct attribute_group umwait_attr_group = {
+ .attrs = umwait_attrs,
+ .name = "umwait_control",
+};
+
+static int __init umwait_init(void)
+{
+ struct device *dev;
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_WAITPKG))
+ return -ENODEV;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
+ umwait_cpu_online, NULL);
+
+ register_syscore_ops(&umwait_syscore_ops);
+
+ /*
+ * Add umwait control interface. Ignore failure, so at least the
+ * default values are set up in case the machine manages to boot.
+ */
+ dev = cpu_subsys.dev_root;
+ return sysfs_create_group(&dev->kobj, &umwait_attr_group);
+}
+device_initcall(umwait_init);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0eda91f8eeac..3c648476d4fb 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_LOCAL_APIC
/* Skip lapic calibration since we know the bus frequency. */
- lapic_timer_frequency = ecx / HZ;
+ lapic_timer_period = ecx / HZ;
pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
ecx);
#endif
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
new file mode 100644
index 000000000000..8e6f2f4b4afe
--- /dev/null
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+
+#include <asm/cpufeature.h>
+
+#include "cpu.h"
+
+#define MSR_ZHAOXIN_FCR57 0x00001257
+
+#define ACE_PRESENT (1 << 6)
+#define ACE_ENABLED (1 << 7)
+#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */
+
+#define RNG_PRESENT (1 << 2)
+#define RNG_ENABLED (1 << 3)
+#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */
+
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
+static void init_zhaoxin_cap(struct cpuinfo_x86 *c)
+{
+ u32 lo, hi;
+
+ /* Test for Extended Feature Flags presence */
+ if (cpuid_eax(0xC0000000) >= 0xC0000001) {
+ u32 tmp = cpuid_edx(0xC0000001);
+
+ /* Enable ACE unit, if present and disabled */
+ if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable ACE unit */
+ lo |= ACE_FCR;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled ACE h/w crypto\n");
+ }
+
+ /* Enable RNG unit, if present and disabled */
+ if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
+ rdmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ /* Enable RNG unit */
+ lo |= RNG_ENABLE;
+ wrmsr(MSR_ZHAOXIN_FCR57, lo, hi);
+ pr_info("CPU: Enabled h/w RNG\n");
+ }
+
+ /*
+ * Store Extended Feature Flags as word 5 of the CPU
+ * capability bit array
+ */
+ c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
+ }
+
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ cpu_detect_cache_sizes(c);
+}
+
+static void early_init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ if (c->x86 >= 0x6)
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+#endif
+ if (c->x86_power & (1 << 8)) {
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ }
+
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ /*
+ * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+ * apicids which are reserved per package. Store the resulting
+ * shift value for the package management code.
+ */
+ if (edx & (1U << 28))
+ c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+ }
+
+}
+
+static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
+static void init_zhaoxin(struct cpuinfo_x86 *c)
+{
+ early_init_zhaoxin(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
+ if (c->x86 >= 0x6)
+ init_zhaoxin_cap(c);
+#ifdef CONFIG_X86_64
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ zhaoxin_detect_vmx_virtcap(c);
+}
+
+#ifdef CONFIG_X86_32
+static unsigned int
+zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+{
+ return size;
+}
+#endif
+
+static const struct cpu_dev zhaoxin_cpu_dev = {
+ .c_vendor = "zhaoxin",
+ .c_ident = { " Shanghai " },
+ .c_early_init = early_init_zhaoxin,
+ .c_init = init_zhaoxin,
+#ifdef CONFIG_X86_32
+ .legacy_cache_size = zhaoxin_size_cache,
+#endif
+ .c_x86_vendor = X86_VENDOR_ZHAOXIN,
+};
+
+cpu_dev_register(zhaoxin_cpu_dev);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 576b2e1bfc12..a55094b5f452 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -56,7 +56,6 @@ struct crash_memmap_data {
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -73,14 +72,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
-#ifdef CONFIG_X86_32
- struct pt_regs fixed_regs;
-
- if (!user_mode(regs)) {
- crash_fixup_ss_esp(&fixed_regs, regs);
- regs = &fixed_regs;
- }
-#endif
crash_save_cpu(regs, cpu);
/*
@@ -181,6 +172,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
+
+static unsigned long crash_zero_bytes;
+
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 649fbc3fcf9f..12c70840980e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-static void kernel_fpu_disable(void)
-{
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
-}
-
-static void kernel_fpu_enable(void)
-{
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, false);
-}
-
static bool kernel_fpu_disabled(void)
{
return this_cpu_read(in_kernel_fpu);
@@ -94,42 +82,33 @@ bool irq_fpu_usable(void)
}
EXPORT_SYMBOL(irq_fpu_usable);
-static void __kernel_fpu_begin(void)
+void kernel_fpu_begin(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ preempt_disable();
WARN_ON_FPU(!irq_fpu_usable());
+ WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- kernel_fpu_disable();
+ this_cpu_write(in_kernel_fpu, true);
- if (!(current->flags & PF_KTHREAD)) {
- if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
- set_thread_flag(TIF_NEED_FPU_LOAD);
- /*
- * Ignore return value -- we don't care if reg state
- * is clobbered.
- */
- copy_fpregs_to_fpstate(fpu);
- }
+ if (!(current->flags & PF_KTHREAD) &&
+ !test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
+ copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
}
-
-static void __kernel_fpu_end(void)
-{
- kernel_fpu_enable();
-}
-
-void kernel_fpu_begin(void)
-{
- preempt_disable();
- __kernel_fpu_begin();
-}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
void kernel_fpu_end(void)
{
- __kernel_fpu_end();
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
@@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_after_save(fpu);
fpregs_unlock();
}
-EXPORT_SYMBOL_GPL(fpu__save);
/*
* Legacy x87 fpstate state init:
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index ef0030e3fe6b..6ce7e0a23268 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
if (!boot_cpu_has(X86_FEATURE_FPU)) {
- /*
- * Disable xsave as we do not support it if i387
- * emulation is enabled.
- */
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
fpu_kernel_xstate_size = sizeof(struct swregs_state);
} else {
if (boot_cpu_has(X86_FEATURE_FXSR))
@@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void)
char *argptr = arg;
int bit;
+#ifdef CONFIG_X86_32
if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
+#endif
- if (cmdline_find_option_bool(boot_command_line, "nofxsr")) {
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- }
+#endif
if (cmdline_find_option_bool(boot_command_line, "noxsave"))
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 3c36dd1784db..e5cb67d67c03 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -8,6 +8,8 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -68,15 +70,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
unsigned int fpu_user_xstate_size;
/*
- * Clear all of the X86_FEATURE_* bits that are unavailable
- * when the CPU has no XSAVE support.
- */
-void fpu__xstate_clear_all_cpu_caps(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-}
-
-/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
@@ -709,7 +702,7 @@ static void fpu__init_disable_system_xstate(void)
{
xfeatures_mask = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
- fpu__xstate_clear_all_cpu_caps();
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
}
/*
@@ -1240,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ long delta;
+
+ if (!timestamp) {
+ /*
+ * Report -1 if no AVX512 usage
+ */
+ delta = -1;
+ } else {
+ delta = (long)(jiffies - timestamp);
+ /*
+ * Cap to LONG_MAX if time difference > LONG_MAX
+ */
+ if (delta < 0)
+ delta = LONG_MAX;
+ delta = jiffies_to_msecs(delta);
+ }
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+ seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ /*
+ * Report AVX512 state if the processor and build option supported.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+ avx512_status(m, task);
+
+ return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..4b73f5937f41 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/memory.h>
#include <trace/syscall.h>
@@ -34,16 +35,25 @@
#ifdef CONFIG_DYNAMIC_FTRACE
int ftrace_arch_code_modify_prepare(void)
+ __acquires(&text_mutex)
{
+ /*
+ * Need to grab text_mutex to prevent a race from module loading
+ * and live kernel patching from changing the text permissions while
+ * ftrace has it set to "read/write".
+ */
+ mutex_lock(&text_mutex);
set_kernel_text_rw();
set_all_modules_text_rw();
return 0;
}
int ftrace_arch_code_modify_post_process(void)
+ __releases(&text_mutex)
{
set_all_modules_text_ro();
set_kernel_text_ro();
+ mutex_unlock(&text_mutex);
return 0;
}
@@ -300,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
ip = regs->ip - INT3_INSN_SIZE;
-#ifdef CONFIG_X86_64
if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1;
@@ -312,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
int3_emulate_call(regs, ftrace_update_func_call);
return 1;
}
-#else
- if (ftrace_location(ip) || is_ftrace_caller(ip)) {
- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
- return 1;
- }
-#endif
return 0;
}
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 2ba914a34b06..073aab525d80 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -9,6 +9,8 @@
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/nospec-branch.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
# define function_hook __fentry__
EXPORT_SYMBOL(__fentry__)
@@ -89,26 +91,38 @@ END(ftrace_caller)
ENTRY(ftrace_regs_caller)
/*
- * i386 does not save SS and ESP when coming from kernel.
- * Instead, to get sp, &regs->sp is used (see ptrace.h).
- * Unfortunately, that means eflags must be at the same location
- * as the current return ip is. We move the return ip into the
- * regs->ip location, and move flags into the return ip location.
+ * We're here from an mcount/fentry CALL, and the stack frame looks like:
+ *
+ * <previous context>
+ * RET-IP
+ *
+ * The purpose of this function is to call out in an emulated INT3
+ * environment with a stack frame like:
+ *
+ * <previous context>
+ * gap / RET-IP
+ * gap
+ * gap
+ * gap
+ * pt_regs
+ *
+ * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/
- pushl $__KERNEL_CS
- pushl 4(%esp) /* Save the return ip */
- pushl $0 /* Load 0 into orig_ax */
+ subl $3*4, %esp # RET-IP + 3 gaps
+ pushl %ss # ss
+ pushl %esp # points at ss
+ addl $5*4, (%esp) # make it point at <previous context>
+ pushfl # flags
+ pushl $__KERNEL_CS # cs
+ pushl 7*4(%esp) # ip <- RET-IP
+ pushl $0 # orig_eax
+
pushl %gs
pushl %fs
pushl %es
pushl %ds
- pushl %eax
-
- /* Get flags and place them into the return ip slot */
- pushf
- popl %eax
- movl %eax, 8*4(%esp)
+ pushl %eax
pushl %ebp
pushl %edi
pushl %esi
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
pushl %ecx
pushl %ebx
- movl 12*4(%esp), %eax /* Load ip (1st parameter) */
- subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
- movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
- movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
- pushl %esp /* Save pt_regs as 4th parameter */
+ ENCODE_FRAME_POINTER
+
+ movl PT_EIP(%esp), %eax # 1st argument: IP
+ subl $MCOUNT_INSN_SIZE, %eax
+ movl 21*4(%esp), %edx # 2nd argument: parent ip
+ movl function_trace_op, %ecx # 3rd argument: ftrace_pos
+ pushl %esp # 4th argument: pt_regs
GLOBAL(ftrace_regs_call)
call ftrace_stub
- addl $4, %esp /* Skip pt_regs */
+ addl $4, %esp # skip 4th argument
- /* restore flags */
- push 14*4(%esp)
- popf
+ /* place IP below the new SP */
+ movl PT_OLDESP(%esp), %eax
+ movl PT_EIP(%esp), %ecx
+ movl %ecx, -4(%eax)
- /* Move return ip back to its original location */
- movl 12*4(%esp), %eax
- movl %eax, 14*4(%esp)
+ /* place EAX below that */
+ movl PT_EAX(%esp), %ecx
+ movl %ecx, -8(%eax)
popl %ebx
popl %ecx
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
popl %esi
popl %edi
popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- /* use lea to not affect flags */
- lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
+ lea -8(%eax), %esp
+ popl %eax
jmp .Lftrace_ret
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 10eb2760ef2c..809d54397dba 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -9,6 +9,7 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.code64
.section .entry.text, "ax"
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
movq %rcx, RSP(%rsp)
+ ENCODE_FRAME_POINTER
+
/* regs go into 4th parameter */
leaq (%rsp), %rcx
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a0573f2e7763..c43e96a938d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,32 +1,44 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
-#include <linux/irq.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/i8253.h>
-#include <linux/slab.h>
#include <linux/hpet.h>
-#include <linux/init.h>
#include <linux/cpu.h>
-#include <linux/pm.h>
-#include <linux/io.h>
+#include <linux/irq.h>
-#include <asm/cpufeature.h>
-#include <asm/irqdomain.h>
-#include <asm/fixmap.h>
#include <asm/hpet.h>
#include <asm/time.h>
-#define HPET_MASK CLOCKSOURCE_MASK(32)
+#undef pr_fmt
+#define pr_fmt(fmt) "hpet: " fmt
-#define HPET_DEV_USED_BIT 2
-#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
-#define HPET_DEV_VALID 0x8
-#define HPET_DEV_FSB_CAP 0x1000
-#define HPET_DEV_PERI_CAP 0x2000
+enum hpet_mode {
+ HPET_MODE_UNUSED,
+ HPET_MODE_LEGACY,
+ HPET_MODE_CLOCKEVT,
+ HPET_MODE_DEVICE,
+};
+
+struct hpet_channel {
+ struct clock_event_device evt;
+ unsigned int num;
+ unsigned int cpu;
+ unsigned int irq;
+ unsigned int in_use;
+ enum hpet_mode mode;
+ unsigned int boot_cfg;
+ char name[10];
+};
+
+struct hpet_base {
+ unsigned int nr_channels;
+ unsigned int nr_clockevents;
+ unsigned int boot_cfg;
+ struct hpet_channel *channels;
+};
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
#define HPET_MIN_CYCLES 128
#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
@@ -39,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */
bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned int hpet_num_timers;
+static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
+static struct irq_domain *hpet_domain;
#endif
+
static void __iomem *hpet_virt_address;
-struct hpet_dev {
- struct clock_event_device evt;
- unsigned int num;
- int cpu;
- unsigned int irq;
- unsigned int flags;
- char name[10];
-};
+static struct hpet_base hpet_base;
+
+static bool hpet_legacy_int_enabled;
+static unsigned long hpet_freq;
-static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
+
+static inline
+struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
{
- return container_of(evtdev, struct hpet_dev, evt);
+ return container_of(evt, struct hpet_channel, evt);
}
inline unsigned int hpet_readl(unsigned int a)
@@ -67,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a)
writel(d, hpet_virt_address + a);
}
-#ifdef CONFIG_X86_64
-#include <asm/pgtable.h>
-#endif
-
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
@@ -85,10 +96,6 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-bool boot_hpet_disable;
-bool hpet_force_user;
-static bool hpet_verbose;
-
static int __init hpet_setup(char *str)
{
while (str) {
@@ -120,13 +127,8 @@ static inline int is_hpet_capable(void)
return !boot_hpet_disable && hpet_address;
}
-/*
- * HPET timer interrupt enable / disable
- */
-static bool hpet_legacy_int_enabled;
-
/**
- * is_hpet_enabled - check whether the hpet timer interrupt is enabled
+ * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled
*/
int is_hpet_enabled(void)
{
@@ -136,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
static void _hpet_print_config(const char *function, int line)
{
- u32 i, timers, l, h;
- printk(KERN_INFO "hpet: %s(%d):\n", function, line);
- l = hpet_readl(HPET_ID);
- h = hpet_readl(HPET_PERIOD);
- timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
- l = hpet_readl(HPET_CFG);
- h = hpet_readl(HPET_STATUS);
- printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+ u32 i, id, period, cfg, status, channels, l, h;
+
+ pr_info("%s(%d):\n", function, line);
+
+ id = hpet_readl(HPET_ID);
+ period = hpet_readl(HPET_PERIOD);
+ pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period);
+
+ cfg = hpet_readl(HPET_CFG);
+ status = hpet_readl(HPET_STATUS);
+ pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status);
+
l = hpet_readl(HPET_COUNTER);
h = hpet_readl(HPET_COUNTER+4);
- printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+ pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
- for (i = 0; i < timers; i++) {
+ for (i = 0; i < channels; i++) {
l = hpet_readl(HPET_Tn_CFG(i));
h = hpet_readl(HPET_Tn_CFG(i)+4);
- printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_CMP(i));
h = hpet_readl(HPET_Tn_CMP(i)+4);
- printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
+
l = hpet_readl(HPET_Tn_ROUTE(i));
h = hpet_readl(HPET_Tn_ROUTE(i)+4);
- printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
- i, l, h);
+ pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
}
}
@@ -172,31 +178,20 @@ do { \
} while (0)
/*
- * When the hpet driver (/dev/hpet) is enabled, we need to reserve
+ * When the HPET driver (/dev/hpet) is enabled, we need to reserve
* timer 0 and timer 1 in case of RTC emulation.
*/
#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd);
-
-static void hpet_reserve_platform_timers(unsigned int id)
+static void __init hpet_reserve_platform_timers(void)
{
- struct hpet __iomem *hpet = hpet_virt_address;
- struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
- unsigned int nrtimers, i;
struct hpet_data hd;
-
- nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+ unsigned int i;
memset(&hd, 0, sizeof(hd));
hd.hd_phys_address = hpet_address;
- hd.hd_address = hpet;
- hd.hd_nirqs = nrtimers;
- hpet_reserve_timer(&hd, 0);
-
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
+ hd.hd_address = hpet_virt_address;
+ hd.hd_nirqs = hpet_base.nr_channels;
/*
* NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
@@ -206,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id)
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
- for (i = 2; i < nrtimers; timer++, i++) {
- hd.hd_irq[i] = (readl(&timer->hpet_config) &
- Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
- }
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ if (i >= 2)
+ hd.hd_irq[i] = hc->irq;
- hpet_reserve_msi_timers(&hd);
+ switch (hc->mode) {
+ case HPET_MODE_UNUSED:
+ case HPET_MODE_DEVICE:
+ hc->mode = HPET_MODE_DEVICE;
+ break;
+ case HPET_MODE_CLOCKEVT:
+ case HPET_MODE_LEGACY:
+ hpet_reserve_timer(&hd, hc->num);
+ break;
+ }
+ }
hpet_alloc(&hd);
+}
+static void __init hpet_select_device_channel(void)
+{
+ int i;
+
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+
+ /* Associate the first unused channel to /dev/hpet */
+ if (hc->mode == HPET_MODE_UNUSED) {
+ hc->mode = HPET_MODE_DEVICE;
+ return;
+ }
+ }
}
+
#else
-static void hpet_reserve_platform_timers(unsigned int id) { }
+static inline void hpet_reserve_platform_timers(void) { }
+static inline void hpet_select_device_channel(void) {}
#endif
-/*
- * Common hpet info
- */
-static unsigned long hpet_freq;
-
-static struct clock_event_device hpet_clockevent;
-
+/* Common HPET functions */
static void hpet_stop_counter(void)
{
u32 cfg = hpet_readl(HPET_CFG);
+
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -243,6 +260,7 @@ static void hpet_reset_counter(void)
static void hpet_start_counter(void)
{
unsigned int cfg = hpet_readl(HPET_CFG);
+
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -274,24 +292,9 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = true;
}
-static void hpet_legacy_clockevent_register(void)
-{
- /* Start HPET legacy interrupts */
- hpet_enable_legacy_int();
-
- /*
- * Start hpet with the boot cpu mask and make it
- * global after the IO_APIC has been initialized.
- */
- hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
- clockevents_config_and_register(&hpet_clockevent, hpet_freq,
- HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
- global_clock_event = &hpet_clockevent;
- printk(KERN_DEBUG "hpet clockevent registered\n");
-}
-
-static int hpet_set_periodic(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg, cmp, now;
uint64_t delta;
@@ -300,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned int)delta;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
- hpet_writel(cmp, HPET_Tn_CMP(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
+ hpet_writel(cmp, HPET_Tn_CMP(channel));
udelay(1);
/*
* HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
@@ -313,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer)
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer));
+ hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel));
hpet_start_counter();
hpet_print_config();
return 0;
}
-static int hpet_set_oneshot(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_shutdown(struct clock_event_device *evt, int timer)
+static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
- cfg = hpet_readl(HPET_Tn_CFG(timer));
+ cfg = hpet_readl(HPET_Tn_CFG(channel));
cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cfg, HPET_Tn_CFG(channel));
return 0;
}
-static int hpet_resume(struct clock_event_device *evt)
+static int hpet_clkevt_legacy_resume(struct clock_event_device *evt)
{
hpet_enable_legacy_int();
hpet_print_config();
return 0;
}
-static int hpet_next_event(unsigned long delta,
- struct clock_event_device *evt, int timer)
+static int
+hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt)
{
+ unsigned int channel = clockevent_to_channel(evt)->num;
u32 cnt;
s32 res;
cnt = hpet_readl(HPET_COUNTER);
cnt += (u32) delta;
- hpet_writel(cnt, HPET_Tn_CMP(timer));
+ hpet_writel(cnt, HPET_Tn_CMP(channel));
/*
* HPETs are a complete disaster. The compare register is
@@ -387,360 +393,250 @@ static int hpet_next_event(unsigned long delta,
return res < HPET_MIN_CYCLES ? -ETIME : 0;
}
-static int hpet_legacy_shutdown(struct clock_event_device *evt)
+static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating)
{
- return hpet_shutdown(evt, 0);
-}
+ struct clock_event_device *evt = &hc->evt;
-static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
-{
- return hpet_set_oneshot(evt, 0);
-}
+ evt->rating = rating;
+ evt->irq = hc->irq;
+ evt->name = hc->name;
+ evt->cpumask = cpumask_of(hc->cpu);
+ evt->set_state_oneshot = hpet_clkevt_set_state_oneshot;
+ evt->set_next_event = hpet_clkevt_set_next_event;
+ evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
-static int hpet_legacy_set_periodic(struct clock_event_device *evt)
-{
- return hpet_set_periodic(evt, 0);
+ evt->features = CLOCK_EVT_FEAT_ONESHOT;
+ if (hc->boot_cfg & HPET_TN_PERIODIC) {
+ evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+ evt->set_state_periodic = hpet_clkevt_set_state_periodic;
+ }
}
-static int hpet_legacy_resume(struct clock_event_device *evt)
+static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
{
- return hpet_resume(evt);
-}
+ /*
+ * Start HPET with the boot CPU's cpumask and make it global after
+ * the IO_APIC has been initialized.
+ */
+ hc->cpu = boot_cpu_data.cpu_index;
+ strncpy(hc->name, "hpet", sizeof(hc->name));
+ hpet_init_clockevent(hc, 50);
-static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- return hpet_next_event(delta, evt, 0);
-}
+ hc->evt.tick_resume = hpet_clkevt_legacy_resume;
-/*
- * The hpet clock event device
- */
-static struct clock_event_device hpet_clockevent = {
- .name = "hpet",
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .set_state_periodic = hpet_legacy_set_periodic,
- .set_state_oneshot = hpet_legacy_set_oneshot,
- .set_state_shutdown = hpet_legacy_shutdown,
- .tick_resume = hpet_legacy_resume,
- .set_next_event = hpet_legacy_next_event,
- .irq = 0,
- .rating = 50,
-};
+ /*
+ * Legacy horrors and sins from the past. HPET used periodic mode
+ * unconditionally forever on the legacy channel 0. Removing the
+ * below hack and using the conditional in hpet_init_clockevent()
+ * makes at least Qemu and one hardware machine fail to boot.
+ * There are two issues which cause the boot failure:
+ *
+ * #1 After the timer delivery test in IOAPIC and the IOAPIC setup
+ * the next interrupt is not delivered despite the HPET channel
+ * being programmed correctly. Reprogramming the HPET after
+ * switching to IOAPIC makes it work again. After fixing this,
+ * the next issue surfaces:
+ *
+ * #2 Due to the unconditional periodic mode availability the Local
+ * APIC timer calibration can hijack the global clockevents
+ * event handler without causing damage. Using oneshot at this
+ * stage makes if hang because the HPET does not get
+ * reprogrammed due to the handler hijacking. Duh, stupid me!
+ *
+ * Both issues require major surgery and especially the kick HPET
+ * again after enabling IOAPIC results in really nasty hackery.
+ * This 'assume periodic works' magic has survived since HPET
+ * support got added, so it's questionable whether this should be
+ * fixed. Both Qemu and the failing hardware machine support
+ * periodic mode despite the fact that both don't advertise it in
+ * the configuration register and both need that extra kick after
+ * switching to IOAPIC. Seems to be a feature...
+ */
+ hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC;
+ hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic;
+
+ /* Start HPET legacy interrupts */
+ hpet_enable_legacy_int();
+
+ clockevents_config_and_register(&hc->evt, hpet_freq,
+ HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
+ global_clock_event = &hc->evt;
+ pr_debug("Clockevent registered\n");
+}
/*
* HPET MSI Support
*/
#ifdef CONFIG_PCI_MSI
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-static struct hpet_dev *hpet_devs;
-static struct irq_domain *hpet_domain;
-
void hpet_msi_unmask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* unmask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
void hpet_msi_mask(struct irq_data *data)
{
- struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);
+ struct hpet_channel *hc = irq_data_get_irq_handler_data(data);
unsigned int cfg;
- /* mask it */
- cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+ cfg = hpet_readl(HPET_Tn_CFG(hc->num));
cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
- hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
-}
-
-void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg)
-{
- hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
- hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+ hpet_writel(cfg, HPET_Tn_CFG(hc->num));
}
-void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
+void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg)
{
- msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
- msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
- msg->address_hi = 0;
+ hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num));
+ hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
}
-static int hpet_msi_shutdown(struct clock_event_device *evt)
+static int hpet_clkevt_msi_resume(struct clock_event_device *evt)
{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_shutdown(evt, hdev->num);
-}
-
-static int hpet_msi_set_oneshot(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_oneshot(evt, hdev->num);
-}
-
-static int hpet_msi_set_periodic(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
- return hpet_set_periodic(evt, hdev->num);
-}
-
-static int hpet_msi_resume(struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- struct irq_data *data = irq_get_irq_data(hdev->irq);
+ struct hpet_channel *hc = clockevent_to_channel(evt);
+ struct irq_data *data = irq_get_irq_data(hc->irq);
struct msi_msg msg;
/* Restore the MSI msg and unmask the interrupt */
irq_chip_compose_msi_msg(data, &msg);
- hpet_msi_write(hdev, &msg);
+ hpet_msi_write(hc, &msg);
hpet_msi_unmask(data);
return 0;
}
-static int hpet_msi_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
- return hpet_next_event(delta, evt, hdev->num);
-}
-
-static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data)
{
- struct hpet_dev *dev = (struct hpet_dev *)data;
- struct clock_event_device *hevt = &dev->evt;
+ struct hpet_channel *hc = data;
+ struct clock_event_device *evt = &hc->evt;
- if (!hevt->event_handler) {
- printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
- dev->num);
+ if (!evt->event_handler) {
+ pr_info("Spurious interrupt HPET channel %d\n", hc->num);
return IRQ_HANDLED;
}
- hevt->event_handler(hevt);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
-static int hpet_setup_irq(struct hpet_dev *dev)
+static int hpet_setup_msi_irq(struct hpet_channel *hc)
{
-
- if (request_irq(dev->irq, hpet_interrupt_handler,
+ if (request_irq(hc->irq, hpet_msi_interrupt_handler,
IRQF_TIMER | IRQF_NOBALANCING,
- dev->name, dev))
+ hc->name, hc))
return -1;
- disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
- enable_irq(dev->irq);
+ disable_irq(hc->irq);
+ irq_set_affinity(hc->irq, cpumask_of(hc->cpu));
+ enable_irq(hc->irq);
- printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
- dev->name, dev->irq);
+ pr_debug("%s irq %u for MSI\n", hc->name, hc->irq);
return 0;
}
-/* This should be called in specific @cpu */
-static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+/* Invoked from the hotplug callback on @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu)
{
- struct clock_event_device *evt = &hdev->evt;
-
- WARN_ON(cpu != smp_processor_id());
- if (!(hdev->flags & HPET_DEV_VALID))
- return;
-
- hdev->cpu = cpu;
- per_cpu(cpu_hpet_dev, cpu) = hdev;
- evt->name = hdev->name;
- hpet_setup_irq(hdev);
- evt->irq = hdev->irq;
+ struct clock_event_device *evt = &hc->evt;
- evt->rating = 110;
- evt->features = CLOCK_EVT_FEAT_ONESHOT;
- if (hdev->flags & HPET_DEV_PERI_CAP) {
- evt->features |= CLOCK_EVT_FEAT_PERIODIC;
- evt->set_state_periodic = hpet_msi_set_periodic;
- }
+ hc->cpu = cpu;
+ per_cpu(cpu_hpet_channel, cpu) = hc;
+ hpet_setup_msi_irq(hc);
- evt->set_state_shutdown = hpet_msi_shutdown;
- evt->set_state_oneshot = hpet_msi_set_oneshot;
- evt->tick_resume = hpet_msi_resume;
- evt->set_next_event = hpet_msi_next_event;
- evt->cpumask = cpumask_of(hdev->cpu);
+ hpet_init_clockevent(hc, 110);
+ evt->tick_resume = hpet_clkevt_msi_resume;
clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
0x7FFFFFFF);
}
-#ifdef CONFIG_HPET
-/* Reserve at least one timer for userspace (/dev/hpet) */
-#define RESERVE_TIMERS 1
-#else
-#define RESERVE_TIMERS 0
-#endif
-
-static void hpet_msi_capability_lookup(unsigned int start_timer)
+static struct hpet_channel *hpet_get_unused_clockevent(void)
{
- unsigned int id;
- unsigned int num_timers;
- unsigned int num_timers_used = 0;
- int i, irq;
-
- if (hpet_msi_disable)
- return;
-
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return;
- id = hpet_readl(HPET_ID);
-
- num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
- num_timers++; /* Value read out starts from 0 */
- hpet_print_config();
-
- hpet_domain = hpet_create_irq_domain(hpet_blockid);
- if (!hpet_domain)
- return;
-
- hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL);
- if (!hpet_devs)
- return;
-
- hpet_num_timers = num_timers;
-
- for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
- struct hpet_dev *hdev = &hpet_devs[num_timers_used];
- unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
-
- /* Only consider HPET timer with MSI support */
- if (!(cfg & HPET_TN_FSB_CAP))
- continue;
+ int i;
- hdev->flags = 0;
- if (cfg & HPET_TN_PERIODIC_CAP)
- hdev->flags |= HPET_DEV_PERI_CAP;
- sprintf(hdev->name, "hpet%d", i);
- hdev->num = i;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
- irq = hpet_assign_irq(hpet_domain, hdev, hdev->num);
- if (irq <= 0)
+ if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use)
continue;
-
- hdev->irq = irq;
- hdev->flags |= HPET_DEV_FSB_CAP;
- hdev->flags |= HPET_DEV_VALID;
- num_timers_used++;
- if (num_timers_used == num_possible_cpus())
- break;
+ hc->in_use = 1;
+ return hc;
}
-
- printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
- num_timers, num_timers_used);
+ return NULL;
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
+static int hpet_cpuhp_online(unsigned int cpu)
{
- int i;
-
- if (!hpet_devs)
- return;
+ struct hpet_channel *hc = hpet_get_unused_clockevent();
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ if (hc)
+ init_one_hpet_msi_clockevent(hc, cpu);
+ return 0;
+}
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
+static int hpet_cpuhp_dead(unsigned int cpu)
+{
+ struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu);
- hd->hd_irq[hdev->num] = hdev->irq;
- hpet_reserve_timer(hd, hdev->num);
- }
+ if (!hc)
+ return 0;
+ free_irq(hc->irq, hc);
+ hc->in_use = 0;
+ per_cpu(cpu_hpet_channel, cpu) = NULL;
+ return 0;
}
-#endif
-static struct hpet_dev *hpet_get_unused_timer(void)
+static void __init hpet_select_clockevents(void)
{
- int i;
+ unsigned int i;
- if (!hpet_devs)
- return NULL;
+ hpet_base.nr_clockevents = 0;
- for (i = 0; i < hpet_num_timers; i++) {
- struct hpet_dev *hdev = &hpet_devs[i];
+ /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */
+ if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT))
+ return;
- if (!(hdev->flags & HPET_DEV_VALID))
- continue;
- if (test_and_set_bit(HPET_DEV_USED_BIT,
- (unsigned long *)&hdev->flags))
- continue;
- return hdev;
- }
- return NULL;
-}
+ hpet_print_config();
-struct hpet_work_struct {
- struct delayed_work work;
- struct completion complete;
-};
+ hpet_domain = hpet_create_irq_domain(hpet_blockid);
+ if (!hpet_domain)
+ return;
-static void hpet_work(struct work_struct *w)
-{
- struct hpet_dev *hdev;
- int cpu = smp_processor_id();
- struct hpet_work_struct *hpet_work;
+ for (i = 0; i < hpet_base.nr_channels; i++) {
+ struct hpet_channel *hc = hpet_base.channels + i;
+ int irq;
- hpet_work = container_of(w, struct hpet_work_struct, work.work);
+ if (hc->mode != HPET_MODE_UNUSED)
+ continue;
- hdev = hpet_get_unused_timer();
- if (hdev)
- init_one_hpet_msi_clockevent(hdev, cpu);
+ /* Only consider HPET channel with MSI support */
+ if (!(hc->boot_cfg & HPET_TN_FSB_CAP))
+ continue;
- complete(&hpet_work->complete);
-}
+ sprintf(hc->name, "hpet%d", i);
-static int hpet_cpuhp_online(unsigned int cpu)
-{
- struct hpet_work_struct work;
-
- INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
- init_completion(&work.complete);
- /* FIXME: add schedule_work_on() */
- schedule_delayed_work_on(cpu, &work.work, 0);
- wait_for_completion(&work.complete);
- destroy_delayed_work_on_stack(&work.work);
- return 0;
-}
+ irq = hpet_assign_irq(hpet_domain, hc, hc->num);
+ if (irq <= 0)
+ continue;
-static int hpet_cpuhp_dead(unsigned int cpu)
-{
- struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+ hc->irq = irq;
+ hc->mode = HPET_MODE_CLOCKEVT;
- if (!hdev)
- return 0;
- free_irq(hdev->irq, hdev);
- hdev->flags &= ~HPET_DEV_USED;
- per_cpu(cpu_hpet_dev, cpu) = NULL;
- return 0;
-}
-#else
+ if (++hpet_base.nr_clockevents == num_possible_cpus())
+ break;
+ }
-static void hpet_msi_capability_lookup(unsigned int start_timer)
-{
- return;
+ pr_info("%d channels of %d reserved for per-cpu timers\n",
+ hpet_base.nr_channels, hpet_base.nr_clockevents);
}
-#ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
- return;
-}
-#endif
+#else
+
+static inline void hpet_select_clockevents(void) { }
#define hpet_cpuhp_online NULL
#define hpet_cpuhp_dead NULL
@@ -754,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
/*
* Reading the HPET counter is a very slow operation. If a large number of
* CPUs are trying to access the HPET counter simultaneously, it can cause
- * massive delay and slow down system performance dramatically. This may
+ * massive delays and slow down system performance dramatically. This may
* happen when HPET is the default clock source instead of TSC. For a
* really large system with hundreds of CPUs, the slowdown may be so
- * severe that it may actually crash the system because of a NMI watchdog
+ * severe, that it can actually crash the system because of a NMI watchdog
* soft lockup, for example.
*
* If multiple CPUs are trying to access the HPET counter at the same time,
@@ -766,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd)
*
* This special feature is only enabled on x86-64 systems. It is unlikely
* that 32-bit x86 systems will have enough CPUs to require this feature
- * with its associated locking overhead. And we also need 64-bit atomic
- * read.
+ * with its associated locking overhead. We also need 64-bit atomic read.
*
- * The lock and the hpet value are stored together and can be read in a
+ * The lock and the HPET value are stored together and can be read in a
* single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
* is 32 bits in size.
*/
@@ -858,15 +753,40 @@ static struct clocksource clocksource_hpet = {
.resume = hpet_resume_counter,
};
-static int hpet_clocksource_register(void)
+/*
+ * AMD SB700 based systems with spread spectrum enabled use a SMM based
+ * HPET emulation to provide proper frequency setting.
+ *
+ * On such systems the SMM code is initialized with the first HPET register
+ * access and takes some time to complete. During this time the config
+ * register reads 0xffffffff. We check for max 1000 loops whether the
+ * config register reads a non-0xffffffff value to make sure that the
+ * HPET is up and running before we proceed any further.
+ *
+ * A counting loop is safe, as the HPET access takes thousands of CPU cycles.
+ *
+ * On non-SB700 based machines this check is only done once and has no
+ * side effects.
+ */
+static bool __init hpet_cfg_working(void)
{
- u64 start, now;
- u64 t1;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (hpet_readl(HPET_CFG) != 0xFFFFFFFF)
+ return true;
+ }
+
+ pr_warn("Config register invalid. Disabling HPET\n");
+ return false;
+}
+
+static bool __init hpet_counting(void)
+{
+ u64 start, now, t1;
- /* Start the counter */
hpet_restart_counter();
- /* Verify whether hpet counter works */
t1 = hpet_readl(HPET_COUNTER);
start = rdtsc();
@@ -877,30 +797,24 @@ static int hpet_clocksource_register(void)
* 1 GHz == 200us
*/
do {
- rep_nop();
+ if (t1 != hpet_readl(HPET_COUNTER))
+ return true;
now = rdtsc();
} while ((now - start) < 200000UL);
- if (t1 == hpet_readl(HPET_COUNTER)) {
- printk(KERN_WARNING
- "HPET counter not counting. HPET disabled\n");
- return -ENODEV;
- }
-
- clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
- return 0;
+ pr_warn("Counter not counting. HPET disabled\n");
+ return false;
}
-static u32 *hpet_boot_cfg;
-
/**
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
*/
int __init hpet_enable(void)
{
- u32 hpet_period, cfg, id;
+ u32 hpet_period, cfg, id, irq;
+ unsigned int i, channels;
+ struct hpet_channel *hc;
u64 freq;
- unsigned int i, last;
if (!is_hpet_capable())
return 0;
@@ -909,40 +823,22 @@ int __init hpet_enable(void)
if (!hpet_virt_address)
return 0;
+ /* Validate that the config register is working */
+ if (!hpet_cfg_working())
+ goto out_nohpet;
+
+ /* Validate that the counter is counting */
+ if (!hpet_counting())
+ goto out_nohpet;
+
/*
* Read the period and check for a sane value:
*/
hpet_period = hpet_readl(HPET_PERIOD);
-
- /*
- * AMD SB700 based systems with spread spectrum enabled use a
- * SMM based HPET emulation to provide proper frequency
- * setting. The SMM code is initialized with the first HPET
- * register access and takes some time to complete. During
- * this time the config register reads 0xffffffff. We check
- * for max. 1000 loops whether the config register reads a non
- * 0xffffffff value to make sure that HPET is up and running
- * before we go further. A counting loop is safe, as the HPET
- * access takes thousands of CPU cycles. On non SB700 based
- * machines this check is only done once and has no side
- * effects.
- */
- for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
- if (i == 1000) {
- printk(KERN_WARNING
- "HPET config register value = 0xFFFFFFFF. "
- "Disabling HPET\n");
- goto out_nohpet;
- }
- }
-
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
- /*
- * The period is a femto seconds value. Convert it to a
- * frequency.
- */
+ /* The period is a femtoseconds value. Convert it to a frequency. */
freq = FSEC_PER_SEC;
do_div(freq, hpet_period);
hpet_freq = freq;
@@ -954,72 +850,90 @@ int __init hpet_enable(void)
id = hpet_readl(HPET_ID);
hpet_print_config();
- last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ /* This is the HPET channel number which is zero based */
+ channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
-#ifdef CONFIG_HPET_EMULATE_RTC
/*
* The legacy routing mode needs at least two channels, tick timer
* and the rtc emulation channel.
*/
- if (!last)
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2)
goto out_nohpet;
-#endif
+ hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL);
+ if (!hc) {
+ pr_warn("Disabling HPET.\n");
+ goto out_nohpet;
+ }
+ hpet_base.channels = hc;
+ hpet_base.nr_channels = channels;
+
+ /* Read, store and sanitize the global configuration */
cfg = hpet_readl(HPET_CFG);
- hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg),
- GFP_KERNEL);
- if (hpet_boot_cfg)
- *hpet_boot_cfg = cfg;
- else
- pr_warn("HPET initial state will not be saved\n");
+ hpet_base.boot_cfg = cfg;
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
- pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
+ pr_warn("Global config: Unknown bits %#x\n", cfg);
+
+ /* Read, store and sanitize the per channel configuration */
+ for (i = 0; i < channels; i++, hc++) {
+ hc->num = i;
- for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i));
- if (hpet_boot_cfg)
- hpet_boot_cfg[i + 1] = cfg;
+ hc->boot_cfg = cfg;
+ irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
+ hc->irq = irq;
+
cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(i));
+
cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg)
- pr_warn("Unrecognized bits %#x set in cfg#%u\n",
- cfg, i);
+ pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg);
}
hpet_print_config();
- if (hpet_clocksource_register())
- goto out_nohpet;
+ clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
- hpet_legacy_clockevent_register();
+ hpet_legacy_clockevent_register(&hpet_base.channels[0]);
+ hpet_base.channels[0].mode = HPET_MODE_LEGACY;
+ if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
+ hpet_base.channels[1].mode = HPET_MODE_LEGACY;
return 1;
}
return 0;
out_nohpet:
+ kfree(hpet_base.channels);
+ hpet_base.channels = NULL;
+ hpet_base.nr_channels = 0;
hpet_clear_mapping();
hpet_address = 0;
return 0;
}
/*
- * Needs to be late, as the reserve_timer code calls kalloc !
+ * The late initialization runs after the PCI quirks have been invoked
+ * which might have detected a system on which the HPET can be enforced.
+ *
+ * Also, the MSI machinery is not working yet when the HPET is initialized
+ * early.
*
- * Not a problem on i386 as hpet_enable is called from late_time_init,
- * but on x86_64 it is necessary !
+ * If the HPET is enabled, then:
+ *
+ * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y
+ * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents
+ * 3) Setup /dev/hpet if CONFIG_HPET=y
+ * 4) Register hotplug callbacks when clockevents are available
*/
static __init int hpet_late_init(void)
{
int ret;
- if (boot_hpet_disable)
- return -ENODEV;
-
if (!hpet_address) {
if (!force_hpet_address)
return -ENODEV;
@@ -1031,21 +945,14 @@ static __init int hpet_late_init(void)
if (!hpet_virt_address)
return -ENODEV;
- if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP)
- hpet_msi_capability_lookup(2);
- else
- hpet_msi_capability_lookup(0);
-
- hpet_reserve_platform_timers(hpet_readl(HPET_ID));
+ hpet_select_device_channel();
+ hpet_select_clockevents();
+ hpet_reserve_platform_timers();
hpet_print_config();
- if (hpet_msi_disable)
+ if (!hpet_base.nr_clockevents)
return 0;
- if (boot_cpu_has(X86_FEATURE_ARAT))
- return 0;
-
- /* This notifier should be called after workqueue is ready */
ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
hpet_cpuhp_online, NULL);
if (ret)
@@ -1064,47 +971,47 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
- if (is_hpet_capable() && hpet_virt_address) {
- unsigned int cfg = hpet_readl(HPET_CFG), id, last;
-
- if (hpet_boot_cfg)
- cfg = *hpet_boot_cfg;
- else if (hpet_legacy_int_enabled) {
- cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = false;
- }
- cfg &= ~HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
+ unsigned int i;
+ u32 cfg;
- if (!hpet_boot_cfg)
- return;
+ if (!is_hpet_capable() || !hpet_virt_address)
+ return;
- id = hpet_readl(HPET_ID);
- last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ /* Restore boot configuration with the enable bit cleared */
+ cfg = hpet_base.boot_cfg;
+ cfg &= ~HPET_CFG_ENABLE;
+ hpet_writel(cfg, HPET_CFG);
- for (id = 0; id <= last; ++id)
- hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id));
+ /* Restore the channel boot configuration */
+ for (i = 0; i < hpet_base.nr_channels; i++)
+ hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i));
- if (*hpet_boot_cfg & HPET_CFG_ENABLE)
- hpet_writel(*hpet_boot_cfg, HPET_CFG);
- }
+ /* If the HPET was enabled at boot time, reenable it */
+ if (hpet_base.boot_cfg & HPET_CFG_ENABLE)
+ hpet_writel(hpet_base.boot_cfg, HPET_CFG);
}
#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+/*
+ * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET
* is enabled, we support RTC interrupt functionality in software.
+ *
* RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
+ *
+ * 1) Update Interrupt - generate an interrupt, every second, when the
+ * RTC clock is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2)
+ *
+ * (1) and (2) above are implemented using polling at a frequency of 64 Hz:
+ * DEFAULT_RTC_INT_FREQ.
+ *
+ * The exact frequency is a tradeoff between accuracy and interrupt overhead.
+ *
+ * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency,
+ * if it's higher.
*/
#include <linux/mc146818rtc.h>
#include <linux/rtc.h>
@@ -1125,7 +1032,7 @@ static unsigned long hpet_pie_limit;
static rtc_irq_handler irq_handler;
/*
- * Check that the hpet counter c1 is ahead of the c2
+ * Check that the HPET counter c1 is ahead of c2
*/
static inline int hpet_cnt_ahead(u32 c1, u32 c2)
{
@@ -1163,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler)
EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
/*
- * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
- * is not supported by all HPET implementations for timer 1.
+ * Channel 1 for RTC emulation. We use one shot mode, as periodic mode
+ * is not supported by all HPET implementations for channel 1.
*
* hpet_rtc_timer_init() is called when the rtc is initialized.
*/
@@ -1177,10 +1084,11 @@ int hpet_rtc_timer_init(void)
return 0;
if (!hpet_default_delta) {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
uint64_t clc;
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
- clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
+ clc >>= evt->shift + DEFAULT_RTC_SHIFT;
hpet_default_delta = clc;
}
@@ -1209,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
u32 cfg = hpet_readl(HPET_T1_CFG);
+
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
@@ -1250,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
}
EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit);
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
- unsigned char sec)
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
{
if (!is_hpet_enabled())
return 0;
@@ -1271,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq)
if (!is_hpet_enabled())
return 0;
- if (freq <= DEFAULT_RTC_INT_FREQ)
+ if (freq <= DEFAULT_RTC_INT_FREQ) {
hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
- else {
- clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
+ } else {
+ struct clock_event_device *evt = &hpet_base.channels[0].evt;
+
+ clc = (uint64_t) evt->mult * NSEC_PER_SEC;
do_div(clc, freq);
- clc >>= hpet_clockevent.shift;
+ clc >>= evt->shift;
hpet_pie_delta = clc;
hpet_pie_limit = 0;
}
+
return 1;
}
EXPORT_SYMBOL_GPL(hpet_set_periodic_freq);
@@ -1317,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void)
if (hpet_rtc_flags & RTC_PIE)
hpet_pie_count += lost_ints;
if (printk_ratelimit())
- printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
- lost_ints);
+ pr_warn("Lost %d RTC interrupts\n", lost_ints);
}
}
@@ -1340,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
hpet_prev_update_sec = curr_time.tm_sec;
}
- if (hpet_rtc_flags & RTC_PIE &&
- ++hpet_pie_count >= hpet_pie_limit) {
+ if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) {
rtc_int_flag |= RTC_PF;
hpet_pie_count = 0;
}
@@ -1350,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
(curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
(curr_time.tm_min == hpet_alarm_time.tm_min) &&
(curr_time.tm_hour == hpet_alarm_time.tm_hour))
- rtc_int_flag |= RTC_AF;
+ rtc_int_flag |= RTC_AF;
if (rtc_int_flag) {
rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 0d307a657abb..2b7999a1a50a 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -8,6 +8,7 @@
#include <linux/timex.h>
#include <linux/i8253.h>
+#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/smp.h>
@@ -18,10 +19,32 @@
*/
struct clock_event_device *global_clock_event;
-void __init setup_pit_timer(void)
+/*
+ * Modern chipsets can disable the PIT clock which makes it unusable. It
+ * would be possible to enable the clock but the registers are chipset
+ * specific and not discoverable. Avoid the whack a mole game.
+ *
+ * These platforms have discoverable TSC/CPU frequencies but this also
+ * requires to know the local APIC timer frequency as it normally is
+ * calibrated against the PIT interrupt.
+ */
+static bool __init use_pit(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
+ return true;
+
+ /* This also returns true when APIC is disabled */
+ return apic_needs_pit();
+}
+
+bool __init pit_timer_init(void)
{
+ if (!use_pit())
+ return false;
+
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
+ return true;
}
#ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d2482bbbe3d0..87ef69a72c52 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -319,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void)
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
set_bit(i, system_vectors);
- set_intr_gate(i, spurious_interrupt);
+ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
+ set_intr_gate(i, entry);
}
#endif
}
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 64b973f0e985..4c407833faca 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -11,10 +11,11 @@ extern struct boot_params boot_params;
static enum efi_secureboot_mode get_sb_mode(void)
{
efi_char16_t efi_SecureBoot_name[] = L"SecureBoot";
+ efi_char16_t efi_SetupMode_name[] = L"SecureBoot";
efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_status_t status;
unsigned long size;
- u8 secboot;
+ u8 secboot, setupmode;
size = sizeof(secboot);
@@ -36,7 +37,14 @@ static enum efi_secureboot_mode get_sb_mode(void)
return efi_secureboot_mode_unknown;
}
- if (secboot == 0) {
+ size = sizeof(setupmode);
+ status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid,
+ NULL, &size, &setupmode);
+
+ if (status != EFI_SUCCESS) /* ignore unknown SetupMode */
+ setupmode = 0;
+
+ if (secboot == 0 || setupmode == 1) {
pr_info("ima: secureboot mode disabled\n");
return efi_secureboot_mode_disabled;
}
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 805b7a341aca..fdb6506ceaaa 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -13,7 +13,22 @@
#include <linux/dmi.h>
#include <linux/io.h>
-int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+#define IO_DELAY_TYPE_0X80 0
+#define IO_DELAY_TYPE_0XED 1
+#define IO_DELAY_TYPE_UDELAY 2
+#define IO_DELAY_TYPE_NONE 3
+
+#if defined(CONFIG_IO_DELAY_0X80)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0X80
+#elif defined(CONFIG_IO_DELAY_0XED)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0XED
+#elif defined(CONFIG_IO_DELAY_UDELAY)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_UDELAY
+#elif defined(CONFIG_IO_DELAY_NONE)
+#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_NONE
+#endif
+
+int io_delay_type __read_mostly = DEFAULT_IO_DELAY_TYPE;
static int __initdata io_delay_override;
@@ -24,13 +39,13 @@ void native_io_delay(void)
{
switch (io_delay_type) {
default:
- case CONFIG_IO_DELAY_TYPE_0X80:
+ case IO_DELAY_TYPE_0X80:
asm volatile ("outb %al, $0x80");
break;
- case CONFIG_IO_DELAY_TYPE_0XED:
+ case IO_DELAY_TYPE_0XED:
asm volatile ("outb %al, $0xed");
break;
- case CONFIG_IO_DELAY_TYPE_UDELAY:
+ case IO_DELAY_TYPE_UDELAY:
/*
* 2 usecs is an upper-bound for the outb delay but
* note that udelay doesn't have the bus-level
@@ -39,7 +54,8 @@ void native_io_delay(void)
* are shorter until calibrated):
*/
udelay(2);
- case CONFIG_IO_DELAY_TYPE_NONE:
+ break;
+ case IO_DELAY_TYPE_NONE:
break;
}
}
@@ -47,9 +63,9 @@ EXPORT_SYMBOL(native_io_delay);
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
- if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+ if (io_delay_type == IO_DELAY_TYPE_0X80) {
pr_notice("%s: using 0xed I/O delay port\n", id->ident);
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
}
return 0;
@@ -115,13 +131,13 @@ static int __init io_delay_param(char *s)
return -EINVAL;
if (!strcmp(s, "0x80"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0X80;
+ io_delay_type = IO_DELAY_TYPE_0X80;
else if (!strcmp(s, "0xed"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ io_delay_type = IO_DELAY_TYPE_0XED;
else if (!strcmp(s, "udelay"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY;
+ io_delay_type = IO_DELAY_TYPE_UDELAY;
else if (!strcmp(s, "none"))
- io_delay_type = CONFIG_IO_DELAY_TYPE_NONE;
+ io_delay_type = IO_DELAY_TYPE_NONE;
else
return -EINVAL;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 9b68b5b00ac9..4215653f8a8e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -135,7 +135,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
-#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
seq_printf(p, "%*s: ", prec, "HYP");
for_each_online_cpu(j)
@@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
- if (desc != VECTOR_RETRIGGERED) {
+ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 1b2ee55a2dfb..6857b4577f17 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
static void __init jailhouse_timer_init(void)
{
- lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
+ lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
}
static unsigned long jailhouse_get_tsc(void)
@@ -203,7 +203,7 @@ bool jailhouse_paravirt(void)
return jailhouse_cpuid_base() != 0;
}
-static bool jailhouse_x2apic_available(void)
+static bool __init jailhouse_x2apic_available(void)
{
/*
* The x2APIC is only available if the root cell enabled it. Jailhouse
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e631c358f7f4..044053235302 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
-static void __ref __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+static void __jump_label_set_jump_code(struct jump_entry *entry,
+ enum jump_label_type type,
+ union jump_code_union *code,
+ int init)
{
- union jump_code_union jmp;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
- const void *expect, *code;
+ const void *expect;
int line;
- jmp.jump = 0xe9;
- jmp.offset = jump_entry_target(entry) -
- (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ code->jump = 0xe9;
+ code->offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
- if (type == JUMP_LABEL_JMP) {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = ideal_nop; line = __LINE__;
- }
-
- code = &jmp.code;
+ if (init) {
+ expect = default_nop; line = __LINE__;
+ } else if (type == JUMP_LABEL_JMP) {
+ expect = ideal_nop; line = __LINE__;
} else {
- if (init) {
- expect = default_nop; line = __LINE__;
- } else {
- expect = &jmp.code; line = __LINE__;
- }
-
- code = ideal_nop;
+ expect = code->code; line = __LINE__;
}
if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE))
bug_at((void *)jump_entry_code(entry), line);
+ if (type == JUMP_LABEL_NOP)
+ memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE);
+}
+
+static void __ref __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
+{
+ union jump_code_union code;
+
+ __jump_label_set_jump_code(entry, type, &code, init);
+
/*
* As long as only a single processor is running and the code is still
* not marked as RO, text_poke_early() can be used; Checking that
@@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
* always nop being the 'currently valid' instruction
*/
if (init || system_state == SYSTEM_BOOTING) {
- text_poke_early((void *)jump_entry_code(entry), code,
+ text_poke_early((void *)jump_entry_code(entry), &code,
JUMP_LABEL_NOP_SIZE);
return;
}
- text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE,
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
}
@@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry,
mutex_unlock(&text_mutex);
}
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct text_poke_loc *tp;
+ void *entry_code;
+
+ if (system_state == SYSTEM_BOOTING) {
+ /*
+ * Fallback to the non-batching mode.
+ */
+ arch_jump_label_transform(entry, type);
+ return true;
+ }
+
+ /*
+ * No more space in the vector, tell upper layer to apply
+ * the queue before continuing.
+ */
+ if (tp_vec_nr == TP_VEC_MAX)
+ return false;
+
+ tp = &tp_vec[tp_vec_nr];
+
+ entry_code = (void *)jump_entry_code(entry);
+
+ /*
+ * The INT3 handler will do a bsearch in the queue, so we need entries
+ * to be sorted. We can survive an unsorted list by rejecting the entry,
+ * forcing the generic jump_label code to apply the queue. Warning once,
+ * to raise the attention to the case of an unsorted entry that is
+ * better not happen, because, in the worst case we will perform in the
+ * same way as we do without batching - with some more overhead.
+ */
+ if (tp_vec_nr > 0) {
+ int prev = tp_vec_nr - 1;
+ struct text_poke_loc *prev_tp = &tp_vec[prev];
+
+ if (WARN_ON_ONCE(prev_tp->addr > entry_code))
+ return false;
+ }
+
+ __jump_label_set_jump_code(entry, type,
+ (union jump_code_union *) &tp->opcode, 0);
+
+ tp->addr = entry_code;
+ tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
+ tp->len = JUMP_LABEL_NOP_SIZE;
+
+ tp_vec_nr++;
+
+ return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+ if (!tp_vec_nr)
+ return;
+
+ mutex_lock(&text_mutex);
+ text_poke_bp_batch(tp_vec, tp_vec_nr);
+ mutex_unlock(&text_mutex);
+
+ tp_vec_nr = 0;
+}
+
static enum {
JL_STATE_START,
JL_STATE_NO_UPDATE,
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 6690c5652aeb..23297ea64f5f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
- case GDB_SS:
- if (!user_mode(regs))
- *(unsigned long *)mem = __KERNEL_DS;
- break;
- case GDB_SP:
- if (!user_mode(regs))
- *(unsigned long *)mem = kernel_stack_pointer(regs);
- break;
case GDB_GS:
case GDB_FS:
*(unsigned long *)mem = 0xFFFF;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2b949f4fd4d8..7d3a2e2daf01 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -5,15 +5,10 @@
/* Kprobes and Optprobes common header */
#include <asm/asm.h>
-
-#ifdef CONFIG_FRAME_POINTER
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
- " mov %" _ASM_SP ", %" _ASM_BP "\n"
-#else
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
-#endif
+#include <asm/frame.h>
#ifdef CONFIG_X86_64
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
" subq $24, %rsp\n" \
@@ -27,11 +22,13 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- SAVE_RBP_STRING \
+ " pushq %rbp\n" \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
- " pushq %r15\n"
+ " pushq %r15\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popq %r15\n" \
" popq %r14\n" \
@@ -51,19 +48,22 @@
/* Skip orig_ax, ip, cs */ \
" addq $24, %rsp\n"
#else
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
+ " subl $4*4, %esp\n" \
" pushl %fs\n" \
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- SAVE_RBP_STRING \
+ " pushl %ebp\n" \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
" pushl %ecx\n" \
- " pushl %ebx\n"
+ " pushl %ebx\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popl %ebx\n" \
" popl %ecx\n" \
@@ -72,8 +72,8 @@
" popl %edi\n" \
" popl %ebp\n" \
" popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
+ /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\
+ " addl $7*4, %esp\n"
#endif
/* Ensure if the instruction can be boostable */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6afd8061dbae..bd17dbb15d6a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -56,7 +56,7 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
+#define stack_addr(regs) ((unsigned long *)regs->sp)
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -718,29 +718,27 @@ asm(
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
-#ifdef CONFIG_X86_64
/* We don't bother saving the ss register */
+#ifdef CONFIG_X86_64
" pushq %rsp\n"
" pushfq\n"
SAVE_REGS_STRING
" movq %rsp, %rdi\n"
" call trampoline_handler\n"
/* Replace saved sp with true return address. */
- " movq %rax, 152(%rsp)\n"
+ " movq %rax, 19*8(%rsp)\n"
RESTORE_REGS_STRING
" popfq\n"
#else
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %eax\n"
" call trampoline_handler\n"
- /* Move flags to cs */
- " movl 56(%esp), %edx\n"
- " movl %edx, 52(%esp)\n"
- /* Replace saved flags with true return address. */
- " movl %eax, 56(%esp)\n"
+ /* Replace saved sp with true return address. */
+ " movl %eax, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " popf\n"
+ " popfl\n"
#endif
" ret\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n"
@@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
- /* On x86-64, we use pt_regs->sp for return address holder. */
- frame_pointer = &regs->sp;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
- /* On x86-32, we use pt_regs->flags for return address holder. */
- frame_pointer = &regs->flags;
#endif
+ /* We use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7c361a24c6df..9d4aedece363 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -102,14 +102,15 @@ asm (
"optprobe_template_call:\n"
ASM_NOP5
/* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
+ " movq 18*8(%rsp), %rdx\n"
+ " movq %rdx, 19*8(%rsp)\n"
RESTORE_REGS_STRING
/* Skip flags entry */
" addq $8, %rsp\n"
" popfq\n"
#else /* CONFIG_X86_32 */
- " pushf\n"
+ " pushl %esp\n"
+ " pushfl\n"
SAVE_REGS_STRING
" movl %esp, %edx\n"
".global optprobe_template_val\n"
@@ -118,9 +119,13 @@ asm (
".global optprobe_template_call\n"
"optprobe_template_call:\n"
ASM_NOP5
+ /* Move flags into esp */
+ " movl 14*4(%esp), %edx\n"
+ " movl %edx, 15*4(%esp)\n"
RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
+ /* Skip flags entry */
+ " addl $4, %esp\n"
+ " popfl\n"
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n"
@@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
} else {
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
-#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
+#ifdef CONFIG_X86_32
+ regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
@@ -418,7 +422,7 @@ err:
void arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
s32 rel = (s32)((long)op->optinsn.insn -
@@ -430,10 +434,10 @@ void arch_optimize_kprobes(struct list_head *oplist)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
+ insn_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
list_del_init(&op->list);
@@ -443,12 +447,12 @@ void arch_optimize_kprobes(struct list_head *oplist)
/* Replace a relative jump with a breakpoint (int3). */
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
- u8 insn_buf[RELATIVEJUMP_SIZE];
+ u8 insn_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ insn_buff[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
op->optinsn.insn);
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 06f6bb48d018..98039d7fb998 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -58,24 +58,24 @@ struct branch {
u32 delta;
} __attribute__((packed));
-static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+static unsigned paravirt_patch_call(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
- unsigned long delta = (unsigned long)target - (addr+5);
-
- if (len < 5) {
-#ifdef CONFIG_RETPOLINE
- WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
-#endif
- return len; /* call too long for patch site */
+ const int call_len = 5;
+ struct branch *b = insn_buff;
+ unsigned long delta = (unsigned long)target - (addr+call_len);
+
+ if (len < call_len) {
+ pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+ /* Kernel might not be viable if patching fails, bail out: */
+ BUG_ON(1);
}
b->opcode = 0xe8; /* call */
b->delta = delta;
- BUILD_BUG_ON(sizeof(*b) != 5);
+ BUILD_BUG_ON(sizeof(*b) != call_len);
- return 5;
+ return call_len;
}
#ifdef CONFIG_PARAVIRT_XXL
@@ -85,10 +85,10 @@ u64 notrace _paravirt_ident_64(u64 x)
return x;
}
-static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
unsigned long addr, unsigned len)
{
- struct branch *b = insnbuf;
+ struct branch *b = insn_buff;
unsigned long delta = (unsigned long)target - (addr+5);
if (len < 5) {
@@ -113,7 +113,7 @@ void __init native_pv_lock_init(void)
static_branch_disable(&virt_spin_lock_key);
}
-unsigned paravirt_patch_default(u8 type, void *insnbuf,
+unsigned paravirt_patch_default(u8 type, void *insn_buff,
unsigned long addr, unsigned len)
{
/*
@@ -125,36 +125,36 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
if (opfunc == NULL)
/* If there's no function, patch it with a ud2a (BUG) */
- ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
+ ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == _paravirt_nop)
ret = 0;
#ifdef CONFIG_PARAVIRT_XXL
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_64)
- ret = paravirt_patch_ident_64(insnbuf, len);
+ ret = paravirt_patch_ident_64(insn_buff, len);
else if (type == PARAVIRT_PATCH(cpu.iret) ||
type == PARAVIRT_PATCH(cpu.usergs_sysret64))
/* If operation requires a jmp, then jmp */
- ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
#endif
else
/* Otherwise call the function. */
- ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
+ ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
return ret;
}
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
const char *start, const char *end)
{
unsigned insn_len = end - start;
- if (insn_len > len || start == NULL)
- insn_len = len;
- else
- memcpy(insnbuf, start, insn_len);
+ /* Alternative instruction is too large for the patch site and we cannot continue: */
+ BUG_ON(insn_len > len || start == NULL);
+
+ memcpy(insn_buff, start, insn_len);
return insn_len;
}
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
new file mode 100644
index 000000000000..3eff63c090d2
--- /dev/null
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+
+#include <asm/paravirt.h>
+#include <asm/asm-offsets.h>
+
+#define PSTART(d, m) \
+ patch_data_##d.m
+
+#define PEND(d, m) \
+ (PSTART(d, m) + sizeof(patch_data_##d.m))
+
+#define PATCH(d, m, insn_buff, len) \
+ paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
+
+#define PATCH_CASE(ops, m, data, insn_buff, len) \
+ case PARAVIRT_PATCH(ops.m): \
+ return PATCH(data, ops##_##m, insn_buff, len)
+
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+ const unsigned char irq_irq_disable[1];
+ const unsigned char irq_irq_enable[1];
+ const unsigned char irq_save_fl[2];
+ const unsigned char mmu_read_cr2[3];
+ const unsigned char mmu_read_cr3[3];
+ const unsigned char mmu_write_cr3[3];
+ const unsigned char irq_restore_fl[2];
+# ifdef CONFIG_X86_64
+ const unsigned char cpu_wbinvd[2];
+ const unsigned char cpu_usergs_sysret64[6];
+ const unsigned char cpu_swapgs[3];
+ const unsigned char mov64[3];
+# else
+ const unsigned char cpu_iret[1];
+# endif
+};
+
+static const struct patch_xxl patch_data_xxl = {
+ .irq_irq_disable = { 0xfa }, // cli
+ .irq_irq_enable = { 0xfb }, // sti
+ .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
+ .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
+ .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
+# ifdef CONFIG_X86_64
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
+ .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
+ .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
+ .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8,
+ 0x48, 0x0f, 0x07 }, // swapgs; sysretq
+ .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
+ .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
+# else
+ .mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
+ .irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf
+ .cpu_iret = { 0xcf }, // iret
+# endif
+};
+
+unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
+{
+#ifdef CONFIG_X86_64
+ return PATCH(xxl, mov64, insn_buff, len);
+#endif
+ return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+struct patch_lock {
+ unsigned char queued_spin_unlock[3];
+ unsigned char vcpu_is_preempted[2];
+};
+
+static const struct patch_lock patch_data_lock = {
+ .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
+
+# ifdef CONFIG_X86_64
+ .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
+# else
+ .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
+# endif
+};
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
+ unsigned int len)
+{
+ switch (type) {
+
+#ifdef CONFIG_PARAVIRT_XXL
+ PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
+ PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
+
+ PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
+ PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
+ PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
+
+# ifdef CONFIG_X86_64
+ PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
+ PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
+ PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
+# else
+ PATCH_CASE(cpu, iret, xxl, insn_buff, len);
+# endif
+#endif
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+ case PARAVIRT_PATCH(lock.queued_spin_unlock):
+ if (pv_is_native_spin_unlock())
+ return PATCH(lock, queued_spin_unlock, insn_buff, len);
+ break;
+
+ case PARAVIRT_PATCH(lock.vcpu_is_preempted):
+ if (pv_is_native_vcpu_is_preempted())
+ return PATCH(lock, vcpu_is_preempted, insn_buff, len);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return paravirt_patch_default(type, insn_buff, addr, len);
+}
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
deleted file mode 100644
index de138d3912e4..000000000000
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- /* arg in %edx:%eax, return in %edx:%eax */
- return 0;
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(cpu, iret);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
deleted file mode 100644
index 9d9e04b31077..000000000000
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
-
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
-{
- return paravirt_patch_insns(insnbuf, len,
- start__mov64, end__mov64);
-}
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
-{
-#define PATCH_SITE(ops, x) \
- case PARAVIRT_PATCH(ops.x): \
- return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x)
-
- switch (type) {
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_SITE(irq, restore_fl);
- PATCH_SITE(irq, save_fl);
- PATCH_SITE(irq, irq_enable);
- PATCH_SITE(irq, irq_disable);
- PATCH_SITE(cpu, usergs_sysret64);
- PATCH_SITE(cpu, swapgs);
- PATCH_SITE(cpu, wbinvd);
- PATCH_SITE(mmu, read_cr2);
- PATCH_SITE(mmu, read_cr3);
- PATCH_SITE(mmu, write_cr3);
-#endif
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return paravirt_patch_insns(ibuf, len,
- start_lock_queued_spin_unlock,
- end_lock_queued_spin_unlock);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return paravirt_patch_insns(ibuf, len,
- start_lock_vcpu_is_preempted,
- end_lock_vcpu_is_preempted);
- break;
-#endif
-
- default:
- break;
- }
-#undef PATCH_SITE
- return paravirt_patch_default(type, ibuf, addr, len);
-}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2399e910d109..b8ceec4974fe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
- unsigned long sp;
- unsigned short ss, gs;
+ unsigned short gs;
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss;
+ if (user_mode(regs))
gs = get_user_gs(regs);
- } else {
- sp = kernel_stack_pointer(regs);
- savesegment(ss, ss);
+ else
savesegment(gs, gs);
- }
show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
- regs->si, regs->di, regs->bp, sp);
+ regs->si, regs->di, regs->bp, regs->sp);
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
- (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
+ (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags);
if (mode != SHOW_REGS_ALL)
return;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a166c960bc9e..71691a8310e7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -25,6 +25,7 @@
#include <linux/rcupdate.h>
#include <linux/export.h>
#include <linux/context_tracking.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -154,35 +155,6 @@ static inline bool invalid_selector(u16 value)
#define FLAG_MASK FLAG_MASK_32
-/*
- * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
- * when it traps. The previous stack will be directly underneath the saved
- * registers, and 'sp/ss' won't even have been saved. Thus the '&regs->sp'.
- *
- * Now, if the stack is empty, '&regs->sp' is out of range. In this
- * case we try to take the previous stack. To always return a non-null
- * stack pointer we fall back to regs as stack if no previous stack
- * exists.
- *
- * This is valid only for kernel mode traps.
- */
-unsigned long kernel_stack_pointer(struct pt_regs *regs)
-{
- unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1);
- unsigned long sp = (unsigned long)&regs->sp;
- u32 *prev_esp;
-
- if (context == (sp & ~(THREAD_SIZE - 1)))
- return sp;
-
- prev_esp = (u32 *)(context);
- if (*prev_esp)
- return (unsigned long)*prev_esp;
-
- return (unsigned long)regs;
-}
-EXPORT_SYMBOL_GPL(kernel_stack_pointer);
-
static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
{
BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
@@ -397,22 +369,12 @@ static int putreg(struct task_struct *child,
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
- /*
- * When changing the FS base, use do_arch_prctl_64()
- * to set the index to zero and to set the base
- * as requested.
- */
- if (child->thread.fsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_FS, value);
+ x86_fsbase_write_task(child, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
- /*
- * Exactly the same here as the %fs handling above.
- */
if (value >= TASK_SIZE_MAX)
return -EIO;
- if (child->thread.gsbase != value)
- return do_arch_prctl_64(child, ARCH_SET_GS, value);
+ x86_gsbase_write_task(child, value);
return 0;
#endif
}
@@ -645,7 +607,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
unsigned long val = 0;
if (n < HBP_NUM) {
- struct perf_event *bp = thread->ptrace_bps[n];
+ int index = array_index_nospec(n, HBP_NUM);
+ struct perf_event *bp = thread->ptrace_bps[index];
if (bp)
val = bp->hw.info.address;
@@ -747,9 +710,6 @@ static int ioperm_get(struct task_struct *target,
void ptrace_disable(struct task_struct *child)
{
user_disable_single_step(child);
-#ifdef TIF_SYSCALL_EMU
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-#endif
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1361,18 +1321,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
{
+ struct task_struct *tsk = current;
+
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
/* Send us the fake SIGTRAP */
force_sig_fault(SIGTRAP, si_code,
- user_mode(regs) ? (void __user *)regs->ip : NULL, tsk);
+ user_mode(regs) ? (void __user *)regs->ip : NULL);
}
void user_single_step_report(struct pt_regs *regs)
{
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ send_sigtrap(regs, 0, TRAP_BRKPT);
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 0ff3e294d0e5..10125358b9c4 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -3,6 +3,7 @@
*/
+#include <linux/clocksource.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 364813cea647..7cf508f78c8c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
pr_cont("\n");
}
- force_sig(SIGSEGV, me);
+ force_sig(SIGSEGV);
}
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 4693e2f3a03e..96421f97e75c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
}
cpumask_copy(allbutself, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), allbutself);
+ __cpumask_clear_cpu(smp_processor_id(), allbutself);
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 362dd8953f48..f78801114ee1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+/* representing HT, core, and die siblings of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
+EXPORT_PER_CPU_SYMBOL(cpu_die_map);
+
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
@@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
+static unsigned int logical_die __read_mostly;
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
@@ -205,13 +210,19 @@ static int enable_start_cpu0;
*/
static void notrace start_secondary(void *unused)
{
+ unsigned long cr4 = __read_cr4();
+
/*
* Don't put *anything* except direct CPU state initialization
* before cpu_init(), SMP booting is too fragile that we want to
* limit the things done here to the most necessary things.
*/
if (boot_cpu_has(X86_FEATURE_PCID))
- __write_cr4(__read_cr4() | X86_CR4_PCIDE);
+ cr4 |= X86_CR4_PCIDE;
+ if (static_branch_likely(&cr_pinning))
+ cr4 |= cr4_pinned_bits;
+
+ __write_cr4(cr4);
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
@@ -300,6 +311,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+/**
+ * topology_phys_to_logical_die - Map a physical die id to logical
+ *
+ * Returns logical die id or -1 if not found
+ */
+int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+{
+ int cpu;
+ int proc_id = cpu_data(cur_cpu).phys_proc_id;
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && c->cpu_die_id == die_id &&
+ c->phys_proc_id == proc_id)
+ return c->logical_die_id;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
@@ -324,6 +355,29 @@ found:
cpu_data(cpu).logical_proc_id = new;
return 0;
}
+/**
+ * topology_update_die_map - Update the physical to logical die map
+ * @die: The die id as retrieved via CPUID
+ * @cpu: The cpu for which this is updated
+ */
+int topology_update_die_map(unsigned int die, unsigned int cpu)
+{
+ int new;
+
+ /* Already available somewhere? */
+ new = topology_phys_to_logical_die(die, cpu);
+ if (new >= 0)
+ goto found;
+
+ new = logical_die++;
+ if (new != die) {
+ pr_info("CPU %u Converting physical %u to logical die %u\n",
+ cpu, die, new);
+ }
+found:
+ cpu_data(cpu).logical_die_id = new;
+ return 0;
+}
void __init smp_store_boot_cpu_info(void)
{
@@ -333,6 +387,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
topology_update_package_map(c->phys_proc_id, id);
+ topology_update_die_map(c->cpu_die_id, id);
c->initialized = true;
}
@@ -387,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
if (c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
@@ -398,6 +454,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
}
} else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id &&
c->cpu_core_id == o->cpu_core_id) {
return topology_sane(c, o, "smt");
}
@@ -460,6 +517,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if ((c->phys_proc_id == o->phys_proc_id) &&
+ (c->cpu_die_id == o->cpu_die_id))
+ return true;
+ return false;
+}
+
+
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
@@ -522,6 +588,7 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
+ cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
return;
}
@@ -570,6 +637,9 @@ void set_cpu_sibling_map(int cpu)
}
if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
+
+ if ((i == cpu) || (has_mp && match_die(c, o)))
+ link_mask(topology_die_cpumask, cpu, i);
}
threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1174,6 +1244,7 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
+ cpumask_set_cpu(0, topology_die_cpumask(0));
}
/*
@@ -1269,6 +1340,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
@@ -1489,6 +1561,8 @@ static void remove_siblinginfo(int cpu)
cpu_data(sibling).booted_cores--;
}
+ for_each_cpu(sibling, topology_die_cpumask(cpu))
+ cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
@@ -1496,6 +1570,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
+ cpumask_clear(topology_die_cpumask(cpu));
c->cpu_core_id = 0;
c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0e14f6c0d35e..7ce29cee9f9e 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
- unsigned long *sp =
- (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *sp = (unsigned long *)regs->sp;
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
@@ -82,8 +81,11 @@ static void __init setup_default_timer_irq(void)
/* Default timer init function */
void __init hpet_time_init(void)
{
- if (!hpet_enable())
- setup_pit_timer();
+ if (!hpet_enable()) {
+ if (!pit_timer_init())
+ return;
+ }
+
setup_default_timer_irq();
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a5b802a12212..71d3fef1edc9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -5,6 +5,7 @@
#include <linux/user.h>
#include <linux/regset.h>
#include <linux/syscalls.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/desc.h>
@@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *u_info)
{
struct user_desc info;
+ int index;
if (idx == -1 && get_user(idx, &u_info->entry_number))
return -EFAULT;
@@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx,
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- fill_user_desc(&info, idx,
- &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]);
+ index = idx - GDT_ENTRY_TLS_MIN;
+ index = array_index_nospec(index,
+ GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1);
+
+ fill_user_desc(&info, idx, &p->thread.tls_array[index]);
if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8b6d03e55d2f..87095a477154 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -254,9 +254,9 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
show_signal(tsk, signr, "trap ", str, regs, error_code);
if (!sicode)
- force_sig(signr, tsk);
+ force_sig(signr);
else
- force_sig_fault(signr, sicode, addr, tsk);
+ force_sig_fault(signr, sicode, addr);
}
NOKPROBE_SYMBOL(do_trap);
@@ -566,7 +566,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
show_signal(tsk, SIGSEGV, "", desc, regs, error_code);
- force_sig(SIGSEGV, tsk);
+ force_sig(SIGSEGV);
}
NOKPROBE_SYMBOL(do_general_protection);
@@ -805,7 +805,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
}
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
- send_sigtrap(tsk, regs, error_code, si_code);
+ send_sigtrap(regs, error_code, si_code);
cond_local_irq_disable(regs);
debug_stack_usage_dec();
@@ -856,7 +856,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
force_sig_fault(SIGFPE, si_code,
- (void __user *)uprobe_get_trap_addr(regs), task);
+ (void __user *)uprobe_get_trap_addr(regs));
}
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0b29e58f288e..57d87f79558f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -59,7 +59,7 @@ struct cyc2ns {
static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
-void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
{
int seq, idx;
@@ -76,7 +76,7 @@ void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
}
-void __always_inline cyc2ns_read_end(void)
+__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
}
@@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void)
crystal_khz = ecx_hz / 1000;
- if (crystal_khz == 0) {
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
- crystal_khz = 24000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_X:
- crystal_khz = 25000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- crystal_khz = 19200; /* 19.2 MHz */
- break;
- }
- }
+ /*
+ * Denverton SoCs don't report crystal clock, and also don't support
+ * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
+ * clock.
+ */
+ if (crystal_khz == 0 &&
+ boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+ crystal_khz = 25000;
- if (crystal_khz == 0)
- return 0;
/*
- * TSC frequency determined by CPUID is a "hardware reported"
+ * TSC frequency reported directly by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
* is considered a known frequency.
*/
- setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ if (crystal_khz != 0)
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * Some Intel SoCs like Skylake and Kabylake don't report the crystal
+ * clock, but we can easily calculate it to a high degree of accuracy
+ * by considering the crystal ratio and the CPU speed.
+ */
+ if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
+ unsigned int eax_base_mhz, ebx, ecx, edx;
+
+ cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
+ crystal_khz = eax_base_mhz * 1000 *
+ eax_denominator / ebx_numerator;
+ }
+
+ if (crystal_khz == 0)
+ return 0;
/*
* For Atom SoCs TSC is the only reliable clocksource.
@@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void)
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * The local APIC appears to be fed by the core crystal clock
+ * (which sounds entirely sensible). We can set the global
+ * lapic_timer_period here to avoid having to calibrate the APIC
+ * timer later.
+ */
+ lapic_timer_period = crystal_khz * 1000 / HZ;
+#endif
+
return crystal_khz * ebx_numerator / eax_denominator;
}
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 3d0e9aeea7c8..067858fe4db8 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
/*
* MSR-based CPU/TSC frequency discovery for certain CPUs.
*
- * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Set global "lapic_timer_period" to bus_clock_cycles/jiffy
* Return processor base frequency in KHz, or 0 on failure.
*/
unsigned long cpu_khz_from_msr(void)
@@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void)
res = freq * ratio;
#ifdef CONFIG_X86_LOCAL_APIC
- lapic_timer_frequency = (freq * 1000) / HZ;
+ lapic_timer_period = (freq * 1000) / HZ;
#endif
/*
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f8f3cfda01ae..5b345add550f 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk);
+ force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
return;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 6106760de716..a224b5ab103f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state)
}
}
-static size_t regs_size(struct pt_regs *regs)
-{
- /* x86_32 regs from kernel mode are two words shorter: */
- if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
- return sizeof(*regs) - 2*sizeof(long);
-
- return sizeof(*regs);
-}
-
static bool in_entry_code(unsigned long ip)
{
char *addr = (char *)ip;
@@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
}
#endif
-#ifdef CONFIG_X86_32
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
-#else
-#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
-#endif
-
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
{
@@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state,
size_t len;
if (state->regs)
- prev_frame_end = (void *)state->regs + regs_size(state->regs);
+ prev_frame_end = (void *)state->regs + sizeof(*state->regs);
else
prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE;
@@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = KERNEL_REGS_SIZE;
+ len = sizeof(*regs);
state->got_irq = true;
} else {
frame = next_bp;
@@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
- /*
- * On 32-bit with user mode regs, make sure the last two regs are safe
- * to access:
- */
- if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
- !on_stack(info, frame, len + 2*sizeof(long)))
- return false;
-
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
* Pretend that the frame is complete and that BP points to it, but save
* the real BP so that we can use it when looking for the next frame.
*/
- if (regs && regs->ip == 0 &&
- (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
+ if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) {
state->next_bp = bp;
- bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
+ bp = ((unsigned long *)regs->sp) - 1;
}
/* Initialize stack info and make sure the frame data is accessible: */
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 72b997eaa1fc..332ae6530fa8 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -598,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
goto done;
state->ip = regs->ip;
- state->sp = kernel_stack_pointer(regs);
+ state->sp = regs->sp;
state->bp = regs->bp;
state->regs = regs;
state->full_regs = true;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 918b5092a85f..d8359ebeea70 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1074,7 +1074,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
current->pid, regs->sp, regs->ip);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
return -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6a38717d179c..a76c12b38e92 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
return 1; /* we let this handle by the calling routine */
current->thread.trap_nr = trapno;
current->thread.error_code = error_code;
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
return 0;
}
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5b6d1f..d78a61408243 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
- [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
- [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX},
[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},
[CPUID_6_EAX] = { 6, 0, CPUID_EAX},
[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a21c440ff356..4dabc318adb8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2339,7 +2339,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!apic_enabled(apic))
+ if (!kvm_apic_hw_enabled(apic))
return -1;
__apic_update_ppr(apic, &ppr);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 132d149494d6..ab73a9a639ae 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -261,10 +261,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
ctr_val = rdtsc();
break;
case VMWARE_BACKDOOR_PMC_REAL_TIME:
- ctr_val = ktime_get_boot_ns();
+ ctr_val = ktime_get_boottime_ns();
break;
case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
- ctr_val = ktime_get_boot_ns() +
+ ctr_val = ktime_get_boottime_ns() +
vcpu->kvm->arch.kvmclock_offset;
break;
default:
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 5f9c1a200201..46af3a5e9209 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5240,9 +5240,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
- kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
-
if (nested_vmx_allowed(vcpu) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
@@ -5251,6 +5248,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx_has_valid_vmcs12(vcpu)) {
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
+ if (vmx->nested.hv_evmcs)
+ kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
+
if (is_guest_mode(vcpu) &&
nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull)
@@ -5350,6 +5350,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
return -EINVAL;
+ /*
+ * KVM_STATE_NESTED_EVMCS used to signal that KVM should
+ * enable eVMCS capability on vCPU. However, since then
+ * code was changed such that flag signals vmcs12 should
+ * be copied into eVMCS in guest memory.
+ *
+ * To preserve backwards compatability, allow user
+ * to set this flag even when there is no VMXON region.
+ */
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
@@ -5358,7 +5367,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
return -EINVAL;
- }
+ }
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
@@ -5373,20 +5382,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
* must be zero.
*/
- if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags)
+ if (is_smm(vcpu) ?
+ (kvm_state->flags &
+ (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
+ : kvm_state->hdr.vmx.smm.flags)
return -EINVAL;
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
!(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
- vmx_leave_nested(vcpu);
- if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
- if (!nested_vmx_allowed(vcpu))
+ if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
+ (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;
- nested_enable_evmcs(vcpu, NULL);
- }
+ vmx_leave_nested(vcpu);
if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9857992d4e58..63bb1ee8258e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
+#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -1554,7 +1555,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
vcpu->arch.tsc_always_catchup = 1;
return 0;
} else {
- WARN(1, "user requested TSC rate below hardware speed\n");
+ pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
return -1;
}
}
@@ -1564,8 +1565,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
user_tsc_khz, tsc_khz);
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
- WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
+ pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
return -1;
}
@@ -1728,7 +1729,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = ktime_get_boot_ns();
+ ns = ktime_get_boottime_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
@@ -2070,7 +2071,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
- return ktime_get_boot_ns() + ka->kvmclock_offset;
+ return ktime_get_boottime_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
@@ -2086,7 +2087,7 @@ u64 get_kvmclock_ns(struct kvm *kvm)
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
- ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+ ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
put_cpu();
@@ -2185,7 +2186,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
if (!use_master_clock) {
host_tsc = rdtsc();
- kernel_ns = ktime_get_boot_ns();
+ kernel_ns = ktime_get_boottime_ns();
}
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@ -9015,7 +9016,7 @@ int kvm_arch_hardware_enable(void)
* before any KVM threads can be running. Unfortunately, we can't
* bring the TSCs fully up to date with real time, as we aren't yet far
* enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boot_ns() will be using boot
+ * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
* variables that haven't been updated yet.
*
* So we simply find the maximum observed TSC above, then record the
@@ -9243,7 +9244,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
- kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
+ kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
pvclock_update_vm_gtod_copy(kvm);
kvm->arch.guest_can_read_msr_platform_info = true;
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
index 1811fa4a1b1a..7c48ff4ae8d1 100644
--- a/arch/x86/lib/cache-smp.c
+++ b/arch/x86/lib/cache-smp.c
@@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu);
int wbinvd_on_all_cpus(void)
{
- return on_each_cpu(__wbinvd, NULL, 1);
+ on_each_cpu(__wbinvd, NULL, 1);
+ return 0;
}
EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6aae46..794f364cb882 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address,
* To avoid leaking information about the kernel page
* table layout, pretend that user-mode accesses to
* kernel addresses are always protection faults.
+ *
+ * NB: This means that failed vsyscalls with vsyscall=none
+ * will have the PROT bit. This doesn't leak any
+ * information and does not appear to cause any problems.
*/
if (address >= TASK_SIZE_MAX)
error_code |= X86_PF_PROT;
@@ -756,8 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
set_signal_archinfo(address, error_code);
/* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address,
- tsk);
+ force_sig_fault(signal, si_code, (void __user *)address);
}
/*
@@ -918,7 +921,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (si_code == SEGV_PKUERR)
force_sig_pkuerr((void __user *)address, pkey);
- force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
+ force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
}
@@ -1015,8 +1018,6 @@ static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
vm_fault_t fault)
{
- struct task_struct *tsk = current;
-
/* Kernel mode? Handle exceptions or die: */
if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
@@ -1031,6 +1032,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
#ifdef CONFIG_MEMORY_FAILURE
if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ struct task_struct *tsk = current;
unsigned lsb = 0;
pr_err(
@@ -1040,11 +1042,11 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
if (fault & VM_FAULT_HWPOISON)
lsb = PAGE_SHIFT;
- force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk);
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
return;
}
#endif
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
}
static noinline void
@@ -1369,16 +1371,18 @@ void do_user_addr_fault(struct pt_regs *regs,
#ifdef CONFIG_X86_64
/*
- * Instruction fetch faults in the vsyscall page might need
- * emulation. The vsyscall page is at a high address
- * (>PAGE_OFFSET), but is considered to be part of the user
- * address space.
+ * Faults in the vsyscall page might need emulation. The
+ * vsyscall page is at a high address (>PAGE_OFFSET), but is
+ * considered to be part of the user address space.
*
* The vsyscall page does not have a "real" VMA, so do this
* emulation before we go searching for VMAs.
+ *
+ * PKRU never rejects instruction fetches, so we don't need
+ * to consider the PF_PK bit.
*/
- if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) {
- if (emulate_vsyscall(regs, address))
+ if (is_vsyscall_vaddr(address)) {
+ if (emulate_vsyscall(hw_error_code, regs, address))
return;
}
#endif
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 0d1c47cbbdd6..895fb7a9294d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -912,7 +912,7 @@ void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
ret = mpx_unmap_tables(mm, start, end);
if (ret)
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
}
/* MPX cannot handle addresses above 47 bits yet. */
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 8d4daca81eda..c33f744b5388 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -20,7 +20,6 @@
#include <linux/moduleparam.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index 136974ec9a90..73a3f49b4eb6 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
#include <linux/dmi.h>
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 2c24d8d30436..163e1b545517 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -18,7 +18,6 @@
#include <linux/string.h>
#include <linux/leds.h>
#include <linux/platform_device.h>
-#include <linux/gpio.h>
#include <linux/input.h>
#include <linux/gpio_keys.h>
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index a9c3db125222..9ad6842de4b4 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -11,3 +11,13 @@ config RAS_CEC
Bear in mind that this is absolutely useless if your platform doesn't
have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
+
+config RAS_CEC_DEBUG
+ bool "CEC debugging machinery"
+ default n
+ depends on RAS_CEC
+ help
+ Add extra files to (debugfs)/ras/cec to test the correctable error
+ collector feature. "pfn" is a writable file that allows user to
+ simulate an error in a particular page frame. "array" is a read-only
+ file that dumps out the current state of all pages logged so far.
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index e455349e0ab5..34eda63c124b 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -111,7 +111,7 @@ static void parse_args(int argc, char **argv)
int main(int argc, char **argv)
{
char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
- unsigned char insn_buf[16];
+ unsigned char insn_buff[16];
struct insn insn;
int insns = 0;
int warnings = 0;
@@ -130,7 +130,7 @@ int main(int argc, char **argv)
}
insns++;
- memset(insn_buf, 0, 16);
+ memset(insn_buff, 0, 16);
strcpy(copy, line);
tab1 = strchr(copy, '\t');
if (!tab1)
@@ -143,13 +143,13 @@ int main(int argc, char **argv)
*tab2 = '\0'; /* Characters beyond tab2 aren't examined */
while (s < tab2) {
if (sscanf(s, "%x", &b) == 1) {
- insn_buf[nb++] = (unsigned char) b;
+ insn_buff[nb++] = (unsigned char) b;
s += 3;
} else
break;
}
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.length != nb) {
warnings++;
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 14cf07916081..185ceba9d289 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -83,7 +83,7 @@ static void dump_insn(FILE *fp, struct insn *insn)
}
static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
- unsigned char *insn_buf, struct insn *insn)
+ unsigned char *insn_buff, struct insn *insn)
{
int i;
@@ -96,7 +96,7 @@ static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
/* Input a decoded instruction sequence directly */
fprintf(fp, " $ echo ");
for (i = 0; i < MAX_INSN_SIZE; i++)
- fprintf(fp, " %02x", insn_buf[i]);
+ fprintf(fp, " %02x", insn_buff[i]);
fprintf(fp, " | %s -i -\n", prog);
if (!input_file) {
@@ -124,7 +124,7 @@ fail:
}
/* Read given instruction sequence from the input file */
-static int read_next_insn(unsigned char *insn_buf)
+static int read_next_insn(unsigned char *insn_buff)
{
char buf[256] = "", *tmp;
int i;
@@ -134,7 +134,7 @@ static int read_next_insn(unsigned char *insn_buf)
return 0;
for (i = 0; i < MAX_INSN_SIZE; i++) {
- insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+ insn_buff[i] = (unsigned char)strtoul(tmp, &tmp, 16);
if (*tmp != ' ')
break;
}
@@ -142,19 +142,19 @@ static int read_next_insn(unsigned char *insn_buf)
return i;
}
-static int generate_insn(unsigned char *insn_buf)
+static int generate_insn(unsigned char *insn_buff)
{
int i;
if (input_file)
- return read_next_insn(insn_buf);
+ return read_next_insn(insn_buff);
/* Fills buffer with random binary up to MAX_INSN_SIZE */
for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
- *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+ *(unsigned short *)(&insn_buff[i]) = random() & 0xffff;
while (i < MAX_INSN_SIZE)
- insn_buf[i++] = random() & 0xff;
+ insn_buff[i++] = random() & 0xff;
return i;
}
@@ -226,31 +226,31 @@ int main(int argc, char **argv)
int insns = 0;
int errors = 0;
unsigned long i;
- unsigned char insn_buf[MAX_INSN_SIZE * 2];
+ unsigned char insn_buff[MAX_INSN_SIZE * 2];
parse_args(argc, argv);
/* Prepare stop bytes with NOPs */
- memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+ memset(insn_buff + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
for (i = 0; i < iter_end; i++) {
- if (generate_insn(insn_buf) <= 0)
+ if (generate_insn(insn_buff) <= 0)
break;
if (i < iter_start) /* Skip to given iteration number */
continue;
/* Decode an instruction */
- insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+ insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
insn_get_length(&insn);
if (insn.next_byte <= insn.kaddr ||
insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
/* Access out-of-range memory */
- dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+ dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
errors++;
} else if (verbose && !insn_complete(&insn))
- dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+ dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
else if (verbose >= 2)
dump_insn(stdout, &insn);
insns++;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 8b4a71efe7ee..7c11c9e5d7ea 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -471,7 +471,7 @@ long sys_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
@@ -577,6 +577,6 @@ long sys_rt_sigreturn(void)
return PT_REGS_SYSCALL_RET(&current->thread.regs);
segfault:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e07abefd3d26..ba5a41828e9d 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,6 +7,7 @@ config XEN
bool "Xen guest support"
depends on PARAVIRT
select PARAVIRT_CLOCK
+ select X86_HV_CALLBACK_VECTOR
depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_LOCAL_APIC && X86_TSC
help
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 590fcf863006..77d81c1a63e9 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -251,6 +251,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index dc22a238ed9c..fbedf2aba09d 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -270,7 +270,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
return ret;
badframe:
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 454d53096bc9..f060348c1b23 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -184,7 +184,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
"\tEXCCAUSE is %ld\n",
current->comm, task_pid_nr(current), regs->pc,
exccause);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
/*
@@ -306,7 +306,7 @@ do_illegal_instruction(struct pt_regs *regs)
pr_info_ratelimited("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
current->comm, task_pid_nr(current), regs->pc);
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -330,7 +330,7 @@ do_unaligned_user (struct pt_regs *regs)
"(pid = %d, pc = %#010lx)\n",
regs->excvaddr, current->comm,
task_pid_nr(current), regs->pc);
- force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current);
+ force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr);
}
#endif
@@ -354,7 +354,7 @@ do_debug(struct pt_regs *regs)
/* If in user mode, send SIGTRAP signal to current process */
- force_sig(SIGTRAP, current);
+ force_sig(SIGTRAP);
}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 2ab0e0dcd166..f81b1478da61 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -157,7 +157,7 @@ bad_area:
if (user_mode(regs)) {
current->thread.bad_vaddr = address;
current->thread.error_code = is_write;
- force_sig_fault(SIGSEGV, code, (void *) address, current);
+ force_sig_fault(SIGSEGV, code, (void *) address);
return;
}
bad_page_fault(regs, address, SIGSEGV);
@@ -182,7 +182,7 @@ do_sigbus:
* or user mode.
*/
current->thread.bad_vaddr = address;
- force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address);
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs))
diff --git a/block/Kconfig b/block/Kconfig
index 2466dcc3ef1d..56cb1695cd87 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -89,7 +89,7 @@ config BLK_DEV_THROTTLING
one needs to mount and use blkio cgroup controller for creating
cgroups and specifying per device IO rate policies.
- See Documentation/cgroup-v1/blkio-controller.txt for more information.
+ See Documentation/cgroup-v1/blkio-controller.rst for more information.
config BLK_DEV_THROTTLING_LOW
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 2489ddbb21db..3afe327f816f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -934,6 +934,13 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
{
struct elevator_type *e = q->elevator->type;
+ /*
+ * If the parent directory has not been created yet, return, we will be
+ * called again later on and the directory/files will be created then.
+ */
+ if (!q->debugfs_dir)
+ return;
+
if (!e->queue_debugfs_attrs)
return;
diff --git a/certs/blacklist.c b/certs/blacklist.c
index f1a8672123c3..93d70b885f8e 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -89,8 +89,7 @@ int mark_hash_blacklisted(const char *hash)
hash,
NULL,
0,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW),
+ &internal_key_acl,
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN);
if (IS_ERR(key)) {
@@ -124,7 +123,7 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
*p = 0;
kref = keyring_search(make_key_ref(blacklist_keyring, true),
- &key_type_blacklist, buffer);
+ &key_type_blacklist, buffer, false);
if (!IS_ERR(kref)) {
key_ref_put(kref);
ret = -EKEYREJECTED;
@@ -149,9 +148,7 @@ static int __init blacklist_init(void)
keyring_alloc(".blacklist",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ |
- KEY_USR_SEARCH,
+ &internal_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA |
KEY_FLAG_KEEP,
NULL, NULL);
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 1eba08a1af82..57be78b5fdfc 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -99,9 +99,7 @@ static __init int system_trusted_keyring_init(void)
builtin_trusted_keys =
keyring_alloc(".builtin_trusted_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
- KEY_ALLOC_NOT_IN_QUOTA,
+ &internal_key_acl, KEY_ALLOC_NOT_IN_QUOTA,
NULL, NULL);
if (IS_ERR(builtin_trusted_keys))
panic("Can't allocate builtin trusted keyring\n");
@@ -110,10 +108,7 @@ static __init int system_trusted_keyring_init(void)
secondary_trusted_keys =
keyring_alloc(".secondary_trusted_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
- KEY_USR_WRITE),
- KEY_ALLOC_NOT_IN_QUOTA,
+ &internal_writable_keyring_acl, KEY_ALLOC_NOT_IN_QUOTA,
get_builtin_and_secondary_restriction(),
NULL);
if (IS_ERR(secondary_trusted_keys))
@@ -163,8 +158,7 @@ static __init int load_system_certificate_list(void)
NULL,
p,
plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
+ &internal_key_acl,
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN |
KEY_ALLOC_BYPASS_RESTRICTION);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 3d056e7da65f..e801450bcb1c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -61,7 +61,6 @@ config CRYPTO_BLKCIPHER2
tristate
select CRYPTO_ALGAPI2
select CRYPTO_RNG2
- select CRYPTO_WORKQUEUE
config CRYPTO_HASH
tristate
@@ -137,10 +136,11 @@ config CRYPTO_USER
Userspace configuration for cryptographic instantiations such as
cbc(aes).
+if CRYPTO_MANAGER2
+
config CRYPTO_MANAGER_DISABLE_TESTS
bool "Disable run-time self tests"
default y
- depends on CRYPTO_MANAGER2
help
Disable run-time self tests that normally take place at
algorithm registration.
@@ -155,14 +155,10 @@ config CRYPTO_MANAGER_EXTRA_TESTS
This is intended for developer use only, as these tests take much
longer to run than the normal self tests.
+endif # if CRYPTO_MANAGER2
+
config CRYPTO_GF128MUL
- tristate "GF(2^128) multiplication functions"
- help
- Efficient table driven implementation of multiplications in the
- field GF(2^128). This is needed by some cypher modes. This
- option will be selected automatically if you select such a
- cipher mode. Only select this option by hand if you expect to load
- an external module that requires these functions.
+ tristate
config CRYPTO_NULL
tristate "Null algorithms"
@@ -186,15 +182,11 @@ config CRYPTO_PCRYPT
This converts an arbitrary crypto algorithm into a parallel
algorithm that executes in kernel threads.
-config CRYPTO_WORKQUEUE
- tristate
-
config CRYPTO_CRYPTD
tristate "Software async crypto daemon"
select CRYPTO_BLKCIPHER
select CRYPTO_HASH
select CRYPTO_MANAGER
- select CRYPTO_WORKQUEUE
help
This is a generic software asynchronous crypto daemon that
converts an arbitrary synchronous software crypto algorithm
@@ -279,6 +271,7 @@ config CRYPTO_CCM
select CRYPTO_CTR
select CRYPTO_HASH
select CRYPTO_AEAD
+ select CRYPTO_MANAGER
help
Support for Counter with CBC MAC. Required for IPsec.
@@ -288,6 +281,7 @@ config CRYPTO_GCM
select CRYPTO_AEAD
select CRYPTO_GHASH
select CRYPTO_NULL
+ select CRYPTO_MANAGER
help
Support for Galois/Counter Mode (GCM) and Galois Message
Authentication Code (GMAC). Required for IPSec.
@@ -297,6 +291,7 @@ config CRYPTO_CHACHA20POLY1305
select CRYPTO_CHACHA20
select CRYPTO_POLY1305
select CRYPTO_AEAD
+ select CRYPTO_MANAGER
help
ChaCha20-Poly1305 AEAD support, RFC7539.
@@ -411,6 +406,7 @@ config CRYPTO_SEQIV
select CRYPTO_BLKCIPHER
select CRYPTO_NULL
select CRYPTO_RNG_DEFAULT
+ select CRYPTO_MANAGER
help
This IV generator generates an IV based on a sequence number by
xoring it with a salt. This algorithm is mainly useful for CTR
@@ -420,7 +416,7 @@ config CRYPTO_ECHAINIV
select CRYPTO_AEAD
select CRYPTO_NULL
select CRYPTO_RNG_DEFAULT
- default m
+ select CRYPTO_MANAGER
help
This IV generator generates an IV based on the encryption of
a sequence number xored with a salt. This is the default
@@ -456,6 +452,7 @@ config CRYPTO_CTR
config CRYPTO_CTS
tristate "CTS support"
select CRYPTO_BLKCIPHER
+ select CRYPTO_MANAGER
help
CTS: Cipher Text Stealing
This is the Cipher Text Stealing mode as described by
@@ -521,6 +518,7 @@ config CRYPTO_XTS
config CRYPTO_KEYWRAP
tristate "Key wrapping support"
select CRYPTO_BLKCIPHER
+ select CRYPTO_MANAGER
help
Support for key wrapping (NIST SP800-38F / RFC3394) without
padding.
@@ -551,6 +549,7 @@ config CRYPTO_ADIANTUM
select CRYPTO_CHACHA20
select CRYPTO_POLY1305
select CRYPTO_NHPOLY1305
+ select CRYPTO_MANAGER
help
Adiantum is a tweakable, length-preserving encryption mode
designed for fast and secure disk encryption, especially on
@@ -684,6 +683,14 @@ config CRYPTO_CRC32_MIPS
instructions, when available.
+config CRYPTO_XXHASH
+ tristate "xxHash hash algorithm"
+ select CRYPTO_HASH
+ select XXHASH
+ help
+ xxHash non-cryptographic hash algorithm. Extremely fast, working at
+ speeds close to RAM limits.
+
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF algorithm"
select CRYPTO_HASH
@@ -1230,9 +1237,13 @@ config CRYPTO_ANUBIS
<https://www.cosic.esat.kuleuven.be/nessie/reports/>
<http://www.larc.usp.br/~pbarreto/AnubisPage.html>
+config CRYPTO_LIB_ARC4
+ tristate
+
config CRYPTO_ARC4
tristate "ARC4 cipher algorithm"
select CRYPTO_BLKCIPHER
+ select CRYPTO_LIB_ARC4
help
ARC4 cipher algorithm.
diff --git a/crypto/Makefile b/crypto/Makefile
index 266a4cdbb9e2..9479e1a45d8c 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -6,8 +6,6 @@
obj-$(CONFIG_CRYPTO) += crypto.o
crypto-y := api.o cipher.o compress.o memneq.o
-obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o
-
obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
@@ -131,6 +129,7 @@ obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
+obj-$(CONFIG_CRYPTO_XXHASH) += xxhash_generic.o
obj-$(CONFIG_CRYPTO_842) += 842.o
obj-$(CONFIG_CRYPTO_RNG2) += rng.o
obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
diff --git a/crypto/aead.c b/crypto/aead.c
index c3c158ba9883..fbf0ec93bc8e 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -84,6 +84,42 @@ int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
}
EXPORT_SYMBOL_GPL(crypto_aead_setauthsize);
+int crypto_aead_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_alg *alg = aead->base.__crt_alg;
+ unsigned int cryptlen = req->cryptlen;
+ int ret;
+
+ crypto_stats_get(alg);
+ if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
+ ret = -ENOKEY;
+ else
+ ret = crypto_aead_alg(aead)->encrypt(req);
+ crypto_stats_aead_encrypt(cryptlen, alg, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_aead_encrypt);
+
+int crypto_aead_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_alg *alg = aead->base.__crt_alg;
+ unsigned int cryptlen = req->cryptlen;
+ int ret;
+
+ crypto_stats_get(alg);
+ if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
+ ret = -ENOKEY;
+ else if (req->cryptlen < crypto_aead_authsize(aead))
+ ret = -EINVAL;
+ else
+ ret = crypto_aead_alg(aead)->decrypt(req);
+ crypto_stats_aead_decrypt(cryptlen, alg, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_aead_decrypt);
+
static void crypto_aead_exit_tfm(struct crypto_tfm *tfm)
{
struct crypto_aead *aead = __crypto_aead_cast(tfm);
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 313a7682cef1..de30ddc952d8 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -21,23 +21,6 @@
static LIST_HEAD(crypto_template_list);
-static inline int crypto_set_driver_name(struct crypto_alg *alg)
-{
- static const char suffix[] = "-generic";
- char *driver_name = alg->cra_driver_name;
- int len;
-
- if (*driver_name)
- return 0;
-
- len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
- if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
- return -ENAMETOOLONG;
-
- memcpy(driver_name + len, suffix, sizeof(suffix));
- return 0;
-}
-
static inline void crypto_check_module_sig(struct module *mod)
{
if (fips_enabled && mod && !module_sig_ok(mod))
@@ -49,6 +32,9 @@ static int crypto_check_alg(struct crypto_alg *alg)
{
crypto_check_module_sig(alg->cra_module);
+ if (!alg->cra_name[0] || !alg->cra_driver_name[0])
+ return -EINVAL;
+
if (alg->cra_alignmask & (alg->cra_alignmask + 1))
return -EINVAL;
@@ -74,7 +60,7 @@ static int crypto_check_alg(struct crypto_alg *alg)
refcount_set(&alg->cra_refcnt, 1);
- return crypto_set_driver_name(alg);
+ return 0;
}
static void crypto_free_instance(struct crypto_instance *inst)
@@ -947,19 +933,6 @@ struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
}
EXPORT_SYMBOL_GPL(crypto_dequeue_request);
-int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm)
-{
- struct crypto_async_request *req;
-
- list_for_each_entry(req, &queue->list, list) {
- if (req->tfm == tfm)
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_tfm_in_queue);
-
static inline void crypto_inc_byte(u8 *a, unsigned int size)
{
u8 *b = (a + size);
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 673927de0eb9..f9ce78fde6ee 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -673,6 +673,7 @@ static void anubis_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
static struct crypto_alg anubis_alg = {
.cra_name = "anubis",
+ .cra_driver_name = "anubis-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = ANUBIS_BLOCK_SIZE,
.cra_ctxsize = sizeof (struct anubis_ctx),
diff --git a/crypto/arc4.c b/crypto/arc4.c
index a2120e06bf84..aa79571dbd49 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -13,84 +13,15 @@
#include <linux/init.h>
#include <linux/module.h>
-struct arc4_ctx {
- u32 S[256];
- u32 x, y;
-};
-
-static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- struct arc4_ctx *ctx = crypto_tfm_ctx(tfm);
- int i, j = 0, k = 0;
-
- ctx->x = 1;
- ctx->y = 0;
-
- for (i = 0; i < 256; i++)
- ctx->S[i] = i;
-
- for (i = 0; i < 256; i++) {
- u32 a = ctx->S[i];
- j = (j + in_key[k] + a) & 0xff;
- ctx->S[i] = ctx->S[j];
- ctx->S[j] = a;
- if (++k >= key_len)
- k = 0;
- }
-
- return 0;
-}
-
-static int arc4_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int crypto_arc4_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
{
- return arc4_set_key(&tfm->base, in_key, key_len);
-}
-
-static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in,
- unsigned int len)
-{
- u32 *const S = ctx->S;
- u32 x, y, a, b;
- u32 ty, ta, tb;
-
- if (len == 0)
- return;
-
- x = ctx->x;
- y = ctx->y;
-
- a = S[x];
- y = (y + a) & 0xff;
- b = S[y];
-
- do {
- S[y] = a;
- a = (a + b) & 0xff;
- S[x] = b;
- x = (x + 1) & 0xff;
- ta = S[x];
- ty = (y + ta) & 0xff;
- tb = S[ty];
- *out++ = *in++ ^ S[a];
- if (--len == 0)
- break;
- y = ty;
- a = ta;
- b = tb;
- } while (true);
-
- ctx->x = x;
- ctx->y = y;
-}
+ struct arc4_ctx *ctx = crypto_skcipher_ctx(tfm);
-static void arc4_crypt_one(struct crypto_tfm *tfm, u8 *out, const u8 *in)
-{
- arc4_crypt(crypto_tfm_ctx(tfm), out, in, 1);
+ return arc4_setkey(ctx, in_key, key_len);
}
-static int ecb_arc4_crypt(struct skcipher_request *req)
+static int crypto_arc4_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct arc4_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -108,54 +39,32 @@ static int ecb_arc4_crypt(struct skcipher_request *req)
return err;
}
-static struct crypto_alg arc4_cipher = {
- .cra_name = "arc4",
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = ARC4_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct arc4_ctx),
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = ARC4_MIN_KEY_SIZE,
- .cia_max_keysize = ARC4_MAX_KEY_SIZE,
- .cia_setkey = arc4_set_key,
- .cia_encrypt = arc4_crypt_one,
- .cia_decrypt = arc4_crypt_one,
- },
- },
-};
-
-static struct skcipher_alg arc4_skcipher = {
+static struct skcipher_alg arc4_alg = {
+ /*
+ * For legacy reasons, this is named "ecb(arc4)", not "arc4".
+ * Nevertheless it's actually a stream cipher, not a block cipher.
+ */
.base.cra_name = "ecb(arc4)",
+ .base.cra_driver_name = "ecb(arc4)-generic",
.base.cra_priority = 100,
.base.cra_blocksize = ARC4_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct arc4_ctx),
.base.cra_module = THIS_MODULE,
.min_keysize = ARC4_MIN_KEY_SIZE,
.max_keysize = ARC4_MAX_KEY_SIZE,
- .setkey = arc4_set_key_skcipher,
- .encrypt = ecb_arc4_crypt,
- .decrypt = ecb_arc4_crypt,
+ .setkey = crypto_arc4_setkey,
+ .encrypt = crypto_arc4_crypt,
+ .decrypt = crypto_arc4_crypt,
};
static int __init arc4_init(void)
{
- int err;
-
- err = crypto_register_alg(&arc4_cipher);
- if (err)
- return err;
-
- err = crypto_register_skcipher(&arc4_skcipher);
- if (err)
- crypto_unregister_alg(&arc4_cipher);
- return err;
+ return crypto_register_skcipher(&arc4_alg);
}
static void __exit arc4_exit(void)
{
- crypto_unregister_alg(&arc4_cipher);
- crypto_unregister_skcipher(&arc4_skcipher);
+ crypto_unregister_skcipher(&arc4_alg);
}
subsys_initcall(arc4_init);
@@ -164,4 +73,4 @@ module_exit(arc4_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("ARC4 Cipher Algorithm");
MODULE_AUTHOR("Jon Oberheide <jon@oberheide.org>");
-MODULE_ALIAS_CRYPTO("arc4");
+MODULE_ALIAS_CRYPTO("ecb(arc4)");
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index be70ca6c85d3..1f1f004dc757 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -15,6 +15,7 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
select MPILIB
select CRYPTO_HASH_INFO
select CRYPTO_AKCIPHER
+ select CRYPTO_HASH
help
This option provides support for asymmetric public key type handling.
If signature generation and/or verification are to be used,
@@ -65,6 +66,7 @@ config TPM_KEY_PARSER
config PKCS7_MESSAGE_PARSER
tristate "PKCS#7 message parser"
depends on X509_CERTIFICATE_PARSER
+ select CRYPTO_HASH
select ASN1
select OID_REGISTRY
help
@@ -87,6 +89,7 @@ config SIGNED_PE_FILE_VERIFICATION
bool "Support for PE file signature verification"
depends on PKCS7_MESSAGE_PARSER=y
depends on SYSTEM_DATA_VERIFICATION
+ select CRYPTO_HASH
select ASN1
select OID_REGISTRY
help
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 01945ab46382..6e5fc8e31f01 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -83,7 +83,7 @@ struct key *find_asymmetric_key(struct key *keyring,
pr_debug("Look up: \"%s\"\n", req);
ref = keyring_search(make_key_ref(keyring, 1),
- &key_type_asymmetric, req);
+ &key_type_asymmetric, req, true);
if (IS_ERR(ref))
pr_debug("Request for key '%s' err %ld\n", req, PTR_ERR(ref));
kfree(req);
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 2db7eac4bf3b..74e824e537e6 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -61,6 +61,8 @@ struct chachapoly_req_ctx {
unsigned int cryptlen;
/* Actual AD, excluding IV */
unsigned int assoclen;
+ /* request flags, with MAY_SLEEP cleared if needed */
+ u32 flags;
union {
struct poly_req poly;
struct chacha_req chacha;
@@ -70,8 +72,12 @@ struct chachapoly_req_ctx {
static inline void async_done_continue(struct aead_request *req, int err,
int (*cont)(struct aead_request *))
{
- if (!err)
+ if (!err) {
+ struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+
+ rctx->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
err = cont(req);
+ }
if (err != -EINPROGRESS && err != -EBUSY)
aead_request_complete(req, err);
@@ -129,16 +135,12 @@ static int chacha_decrypt(struct aead_request *req)
chacha_iv(creq->iv, req, 1);
- sg_init_table(rctx->src, 2);
src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen);
dst = src;
-
- if (req->src != req->dst) {
- sg_init_table(rctx->dst, 2);
+ if (req->src != req->dst)
dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
- }
- skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+ skcipher_request_set_callback(&creq->req, rctx->flags,
chacha_decrypt_done, req);
skcipher_request_set_tfm(&creq->req, ctx->chacha);
skcipher_request_set_crypt(&creq->req, src, dst,
@@ -172,17 +174,13 @@ static int poly_tail(struct aead_request *req)
struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
struct poly_req *preq = &rctx->u.poly;
- __le64 len;
int err;
- sg_init_table(preq->src, 1);
- len = cpu_to_le64(rctx->assoclen);
- memcpy(&preq->tail.assoclen, &len, sizeof(len));
- len = cpu_to_le64(rctx->cryptlen);
- memcpy(&preq->tail.cryptlen, &len, sizeof(len));
- sg_set_buf(preq->src, &preq->tail, sizeof(preq->tail));
+ preq->tail.assoclen = cpu_to_le64(rctx->assoclen);
+ preq->tail.cryptlen = cpu_to_le64(rctx->cryptlen);
+ sg_init_one(preq->src, &preq->tail, sizeof(preq->tail));
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_tail_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, preq->src,
@@ -205,15 +203,14 @@ static int poly_cipherpad(struct aead_request *req)
struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
struct poly_req *preq = &rctx->u.poly;
- unsigned int padlen, bs = POLY1305_BLOCK_SIZE;
+ unsigned int padlen;
int err;
- padlen = (bs - (rctx->cryptlen % bs)) % bs;
+ padlen = -rctx->cryptlen % POLY1305_BLOCK_SIZE;
memset(preq->pad, 0, sizeof(preq->pad));
- sg_init_table(preq->src, 1);
- sg_set_buf(preq->src, &preq->pad, padlen);
+ sg_init_one(preq->src, preq->pad, padlen);
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_cipherpad_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
@@ -241,10 +238,9 @@ static int poly_cipher(struct aead_request *req)
if (rctx->cryptlen == req->cryptlen) /* encrypting */
crypt = req->dst;
- sg_init_table(rctx->src, 2);
crypt = scatterwalk_ffwd(rctx->src, crypt, req->assoclen);
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_cipher_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, crypt, NULL, rctx->cryptlen);
@@ -266,15 +262,14 @@ static int poly_adpad(struct aead_request *req)
struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
struct poly_req *preq = &rctx->u.poly;
- unsigned int padlen, bs = POLY1305_BLOCK_SIZE;
+ unsigned int padlen;
int err;
- padlen = (bs - (rctx->assoclen % bs)) % bs;
+ padlen = -rctx->assoclen % POLY1305_BLOCK_SIZE;
memset(preq->pad, 0, sizeof(preq->pad));
- sg_init_table(preq->src, 1);
- sg_set_buf(preq->src, preq->pad, padlen);
+ sg_init_one(preq->src, preq->pad, padlen);
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_adpad_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
@@ -298,7 +293,7 @@ static int poly_ad(struct aead_request *req)
struct poly_req *preq = &rctx->u.poly;
int err;
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_ad_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, req->src, NULL, rctx->assoclen);
@@ -322,10 +317,9 @@ static int poly_setkey(struct aead_request *req)
struct poly_req *preq = &rctx->u.poly;
int err;
- sg_init_table(preq->src, 1);
- sg_set_buf(preq->src, rctx->key, sizeof(rctx->key));
+ sg_init_one(preq->src, rctx->key, sizeof(rctx->key));
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_setkey_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
ahash_request_set_crypt(&preq->req, preq->src, NULL, sizeof(rctx->key));
@@ -349,7 +343,7 @@ static int poly_init(struct aead_request *req)
struct poly_req *preq = &rctx->u.poly;
int err;
- ahash_request_set_callback(&preq->req, aead_request_flags(req),
+ ahash_request_set_callback(&preq->req, rctx->flags,
poly_init_done, req);
ahash_request_set_tfm(&preq->req, ctx->poly);
@@ -381,13 +375,12 @@ static int poly_genkey(struct aead_request *req)
rctx->assoclen -= 8;
}
- sg_init_table(creq->src, 1);
memset(rctx->key, 0, sizeof(rctx->key));
- sg_set_buf(creq->src, rctx->key, sizeof(rctx->key));
+ sg_init_one(creq->src, rctx->key, sizeof(rctx->key));
chacha_iv(creq->iv, req, 0);
- skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+ skcipher_request_set_callback(&creq->req, rctx->flags,
poly_genkey_done, req);
skcipher_request_set_tfm(&creq->req, ctx->chacha);
skcipher_request_set_crypt(&creq->req, creq->src, creq->src,
@@ -418,16 +411,12 @@ static int chacha_encrypt(struct aead_request *req)
chacha_iv(creq->iv, req, 1);
- sg_init_table(rctx->src, 2);
src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen);
dst = src;
-
- if (req->src != req->dst) {
- sg_init_table(rctx->dst, 2);
+ if (req->src != req->dst)
dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
- }
- skcipher_request_set_callback(&creq->req, aead_request_flags(req),
+ skcipher_request_set_callback(&creq->req, rctx->flags,
chacha_encrypt_done, req);
skcipher_request_set_tfm(&creq->req, ctx->chacha);
skcipher_request_set_crypt(&creq->req, src, dst,
@@ -445,6 +434,7 @@ static int chachapoly_encrypt(struct aead_request *req)
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
rctx->cryptlen = req->cryptlen;
+ rctx->flags = aead_request_flags(req);
/* encrypt call chain:
* - chacha_encrypt/done()
@@ -466,6 +456,7 @@ static int chachapoly_decrypt(struct aead_request *req)
struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
rctx->cryptlen = req->cryptlen - POLY1305_DIGEST_SIZE;
+ rctx->flags = aead_request_flags(req);
/* decrypt call chain:
* - poly_genkey/done()
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
index 04404c479e68..085d8d219987 100644
--- a/crypto/chacha_generic.c
+++ b/crypto/chacha_generic.c
@@ -32,7 +32,7 @@ static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
}
static int chacha_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+ const struct chacha_ctx *ctx, const u8 *iv)
{
struct skcipher_walk walk;
u32 state[16];
@@ -56,7 +56,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
return err;
}
-void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv)
+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
{
state[0] = 0x61707865; /* "expa" */
state[1] = 0x3320646e; /* "nd 3" */
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 1ce1bf6d3bab..3748f9b4516d 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -16,7 +16,6 @@
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/cryptd.h>
-#include <crypto/crypto_wq.h>
#include <linux/atomic.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -26,11 +25,14 @@
#include <linux/scatterlist.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
static unsigned int cryptd_max_cpu_qlen = 1000;
module_param(cryptd_max_cpu_qlen, uint, 0);
MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth");
+static struct workqueue_struct *cryptd_wq;
+
struct cryptd_cpu_queue {
struct crypto_queue queue;
struct work_struct work;
@@ -136,7 +138,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
if (err == -ENOSPC)
goto out_put_cpu;
- queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
+ queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
if (!atomic_read(refcnt))
goto out_put_cpu;
@@ -179,7 +181,7 @@ static void cryptd_queue_worker(struct work_struct *work)
req->complete(req, 0);
if (cpu_queue->queue.qlen)
- queue_work(kcrypto_wq, &cpu_queue->work);
+ queue_work(cryptd_wq, &cpu_queue->work);
}
static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
@@ -388,6 +390,7 @@ static void cryptd_skcipher_free(struct skcipher_instance *inst)
struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
crypto_drop_skcipher(&ctx->spawn);
+ kfree(inst);
}
static int cryptd_create_skcipher(struct crypto_template *tmpl,
@@ -918,7 +921,7 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
case CRYPTO_ALG_TYPE_BLKCIPHER:
return cryptd_create_skcipher(tmpl, tb, &queue);
- case CRYPTO_ALG_TYPE_DIGEST:
+ case CRYPTO_ALG_TYPE_HASH:
return cryptd_create_hash(tmpl, tb, &queue);
case CRYPTO_ALG_TYPE_AEAD:
return cryptd_create_aead(tmpl, tb, &queue);
@@ -1118,19 +1121,31 @@ static int __init cryptd_init(void)
{
int err;
+ cryptd_wq = alloc_workqueue("cryptd", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
+ 1);
+ if (!cryptd_wq)
+ return -ENOMEM;
+
err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen);
if (err)
- return err;
+ goto err_destroy_wq;
err = crypto_register_template(&cryptd_tmpl);
if (err)
- cryptd_fini_queue(&queue);
+ goto err_fini_queue;
+ return 0;
+
+err_fini_queue:
+ cryptd_fini_queue(&queue);
+err_destroy_wq:
+ destroy_workqueue(cryptd_wq);
return err;
}
static void __exit cryptd_exit(void)
{
+ destroy_workqueue(cryptd_wq);
cryptd_fini_queue(&queue);
crypto_unregister_template(&cryptd_tmpl);
}
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 0d341ddecd54..5b84b0f7cc17 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -100,6 +100,7 @@ static struct shash_alg digest_null = {
.final = null_final,
.base = {
.cra_name = "digest_null",
+ .cra_driver_name = "digest_null-generic",
.cra_blocksize = NULL_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
@@ -122,6 +123,7 @@ static struct skcipher_alg skcipher_null = {
static struct crypto_alg null_algs[] = { {
.cra_name = "cipher_null",
+ .cra_driver_name = "cipher_null-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = NULL_BLOCK_SIZE,
.cra_ctxsize = 0,
@@ -134,6 +136,7 @@ static struct crypto_alg null_algs[] = { {
.cia_decrypt = null_crypt } }
}, {
.cra_name = "compress_null",
+ .cra_driver_name = "compress_null-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_blocksize = NULL_BLOCK_SIZE,
.cra_ctxsize = 0,
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index d5d5d155340b..c65e39005ce2 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -44,6 +44,9 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
list_for_each_entry(q, &crypto_alg_list, cra_list) {
int match = 0;
+ if (crypto_is_larval(q))
+ continue;
+
if ((q->cra_flags ^ p->cru_type) & p->cru_mask)
continue;
diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c
deleted file mode 100644
index 80501928e0bb..000000000000
--- a/crypto/crypto_wq.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Workqueue for crypto subsystem
- *
- * Copyright (c) 2009 Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- */
-
-#include <linux/workqueue.h>
-#include <linux/module.h>
-#include <crypto/algapi.h>
-#include <crypto/crypto_wq.h>
-
-struct workqueue_struct *kcrypto_wq;
-EXPORT_SYMBOL_GPL(kcrypto_wq);
-
-static int __init crypto_wq_init(void)
-{
- kcrypto_wq = alloc_workqueue("crypto",
- WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
- if (unlikely(!kcrypto_wq))
- return -ENOMEM;
- return 0;
-}
-
-static void __exit crypto_wq_exit(void)
-{
- destroy_workqueue(kcrypto_wq);
-}
-
-subsys_initcall(crypto_wq_init);
-module_exit(crypto_wq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Workqueue for crypto subsystem");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 65be51456398..4c0e6c9d942a 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -275,6 +275,7 @@ static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src,
static struct crypto_alg alg = {
.cra_name = "deflate",
+ .cra_driver_name = "deflate-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct deflate_ctx),
.cra_module = THIS_MODULE,
diff --git a/crypto/drbg.c b/crypto/drbg.c
index 2a5b16bb000c..b6929eb5f565 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -220,6 +220,57 @@ static inline unsigned short drbg_sec_strength(drbg_flag_t flags)
}
/*
+ * FIPS 140-2 continuous self test for the noise source
+ * The test is performed on the noise source input data. Thus, the function
+ * implicitly knows the size of the buffer to be equal to the security
+ * strength.
+ *
+ * Note, this function disregards the nonce trailing the entropy data during
+ * initial seeding.
+ *
+ * drbg->drbg_mutex must have been taken.
+ *
+ * @drbg DRBG handle
+ * @entropy buffer of seed data to be checked
+ *
+ * return:
+ * 0 on success
+ * -EAGAIN on when the CTRNG is not yet primed
+ * < 0 on error
+ */
+static int drbg_fips_continuous_test(struct drbg_state *drbg,
+ const unsigned char *entropy)
+{
+ unsigned short entropylen = drbg_sec_strength(drbg->core->flags);
+ int ret = 0;
+
+ if (!IS_ENABLED(CONFIG_CRYPTO_FIPS))
+ return 0;
+
+ /* skip test if we test the overall system */
+ if (list_empty(&drbg->test_data.list))
+ return 0;
+ /* only perform test in FIPS mode */
+ if (!fips_enabled)
+ return 0;
+
+ if (!drbg->fips_primed) {
+ /* Priming of FIPS test */
+ memcpy(drbg->prev, entropy, entropylen);
+ drbg->fips_primed = true;
+ /* priming: another round is needed */
+ return -EAGAIN;
+ }
+ ret = memcmp(drbg->prev, entropy, entropylen);
+ if (!ret)
+ panic("DRBG continuous self test failed\n");
+ memcpy(drbg->prev, entropy, entropylen);
+
+ /* the test shall pass when the two values are not equal */
+ return 0;
+}
+
+/*
* Convert an integer into a byte representation of this integer.
* The byte representation is big-endian
*
@@ -998,6 +1049,22 @@ static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed,
return ret;
}
+static inline int drbg_get_random_bytes(struct drbg_state *drbg,
+ unsigned char *entropy,
+ unsigned int entropylen)
+{
+ int ret;
+
+ do {
+ get_random_bytes(entropy, entropylen);
+ ret = drbg_fips_continuous_test(drbg, entropy);
+ if (ret && ret != -EAGAIN)
+ return ret;
+ } while (ret);
+
+ return 0;
+}
+
static void drbg_async_seed(struct work_struct *work)
{
struct drbg_string data;
@@ -1006,16 +1073,20 @@ static void drbg_async_seed(struct work_struct *work)
seed_work);
unsigned int entropylen = drbg_sec_strength(drbg->core->flags);
unsigned char entropy[32];
+ int ret;
BUG_ON(!entropylen);
BUG_ON(entropylen > sizeof(entropy));
- get_random_bytes(entropy, entropylen);
drbg_string_fill(&data, entropy, entropylen);
list_add_tail(&data.list, &seedlist);
mutex_lock(&drbg->drbg_mutex);
+ ret = drbg_get_random_bytes(drbg, entropy, entropylen);
+ if (ret)
+ goto unlock;
+
/* If nonblocking pool is initialized, deactivate Jitter RNG */
crypto_free_rng(drbg->jent);
drbg->jent = NULL;
@@ -1030,6 +1101,7 @@ static void drbg_async_seed(struct work_struct *work)
if (drbg->seeded)
drbg->reseed_threshold = drbg_max_requests(drbg);
+unlock:
mutex_unlock(&drbg->drbg_mutex);
memzero_explicit(entropy, entropylen);
@@ -1081,7 +1153,9 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
BUG_ON((entropylen * 2) > sizeof(entropy));
/* Get seed from in-kernel /dev/urandom */
- get_random_bytes(entropy, entropylen);
+ ret = drbg_get_random_bytes(drbg, entropy, entropylen);
+ if (ret)
+ goto out;
if (!drbg->jent) {
drbg_string_fill(&data1, entropy, entropylen);
@@ -1094,7 +1168,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
entropylen);
if (ret) {
pr_devel("DRBG: jent failed with %d\n", ret);
- return ret;
+ goto out;
}
drbg_string_fill(&data1, entropy, entropylen * 2);
@@ -1121,6 +1195,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
ret = __drbg_seed(drbg, &seedlist, reseed);
+out:
memzero_explicit(entropy, entropylen * 2);
return ret;
@@ -1142,6 +1217,11 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
drbg->reseed_ctr = 0;
drbg->d_ops = NULL;
drbg->core = NULL;
+ if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) {
+ kzfree(drbg->prev);
+ drbg->prev = NULL;
+ drbg->fips_primed = false;
+ }
}
/*
@@ -1211,6 +1291,14 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
}
+ if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) {
+ drbg->prev = kzalloc(drbg_sec_strength(drbg->core->flags),
+ GFP_KERNEL);
+ if (!drbg->prev)
+ goto fini;
+ drbg->fips_primed = false;
+ }
+
return 0;
fini:
diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c
index 4e8704405a3b..58f935315cf8 100644
--- a/crypto/fcrypt.c
+++ b/crypto/fcrypt.c
@@ -391,6 +391,7 @@ static int fcrypt_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key
static struct crypto_alg fcrypt_alg = {
.cra_name = "fcrypt",
+ .cra_driver_name = "fcrypt-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = 8,
.cra_ctxsize = sizeof(struct fcrypt_ctx),
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index 6425b9cd718e..dad9e1f91a78 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -31,6 +31,7 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 k;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -39,7 +40,12 @@ static int ghash_setkey(struct crypto_shash *tfm,
if (ctx->gf128)
gf128mul_free_4k(ctx->gf128);
- ctx->gf128 = gf128mul_init_4k_lle((be128 *)key);
+
+ BUILD_BUG_ON(sizeof(k) != GHASH_BLOCK_SIZE);
+ memcpy(&k, key, GHASH_BLOCK_SIZE); /* avoid violating alignment rules */
+ ctx->gf128 = gf128mul_init_4k_lle(&k);
+ memzero_explicit(&k, GHASH_BLOCK_SIZE);
+
if (!ctx->gf128)
return -ENOMEM;
diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c
index 787dccca3715..701b8d86ab49 100644
--- a/crypto/jitterentropy-kcapi.c
+++ b/crypto/jitterentropy-kcapi.c
@@ -56,11 +56,6 @@ void jent_entropy_collector_free(struct rand_data *entropy_collector);
* Helper function
***************************************************************************/
-__u64 jent_rol64(__u64 word, unsigned int shift)
-{
- return rol64(word, shift);
-}
-
void *jent_zalloc(unsigned int len)
{
return kzalloc(len, GFP_KERNEL);
diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
index acf44b2d2d1d..77fa2120fe0c 100644
--- a/crypto/jitterentropy.c
+++ b/crypto/jitterentropy.c
@@ -2,7 +2,7 @@
* Non-physical true random number generator based on timing jitter --
* Jitter RNG standalone code.
*
- * Copyright Stephan Mueller <smueller@chronox.de>, 2015
+ * Copyright Stephan Mueller <smueller@chronox.de>, 2015 - 2019
*
* Design
* ======
@@ -47,7 +47,7 @@
/*
* This Jitterentropy RNG is based on the jitterentropy library
- * version 1.1.0 provided at http://www.chronox.de/jent.html
+ * version 2.1.2 provided at http://www.chronox.de/jent.html
*/
#ifdef __OPTIMIZE__
@@ -71,10 +71,7 @@ struct rand_data {
#define DATA_SIZE_BITS ((sizeof(__u64)) * 8)
__u64 last_delta; /* SENSITIVE stuck test */
__s64 last_delta2; /* SENSITIVE stuck test */
- unsigned int stuck:1; /* Time measurement stuck */
unsigned int osr; /* Oversample rate */
- unsigned int stir:1; /* Post-processing stirring */
- unsigned int disable_unbias:1; /* Deactivate Von-Neuman unbias */
#define JENT_MEMORY_BLOCKS 64
#define JENT_MEMORY_BLOCKSIZE 32
#define JENT_MEMORY_ACCESSLOOPS 128
@@ -89,8 +86,6 @@ struct rand_data {
};
/* Flags that can be used to initialize the RNG */
-#define JENT_DISABLE_STIR (1<<0) /* Disable stirring the entropy pool */
-#define JENT_DISABLE_UNBIAS (1<<1) /* Disable the Von-Neuman Unbiaser */
#define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more
* entropy, saves MEMORY_SIZE RAM for
* entropy collector */
@@ -99,19 +94,16 @@ struct rand_data {
#define JENT_ENOTIME 1 /* Timer service not available */
#define JENT_ECOARSETIME 2 /* Timer too coarse for RNG */
#define JENT_ENOMONOTONIC 3 /* Timer is not monotonic increasing */
-#define JENT_EMINVARIATION 4 /* Timer variations too small for RNG */
#define JENT_EVARVAR 5 /* Timer does not produce variations of
* variations (2nd derivation of time is
* zero). */
-#define JENT_EMINVARVAR 6 /* Timer variations of variations is tooi
- * small. */
+#define JENT_ESTUCK 8 /* Too many stuck results during init. */
/***************************************************************************
* Helper functions
***************************************************************************/
void jent_get_nstime(__u64 *out);
-__u64 jent_rol64(__u64 word, unsigned int shift);
void *jent_zalloc(unsigned int len);
void jent_zfree(void *ptr);
int jent_fips_enabled(void);
@@ -140,16 +132,16 @@ static __u64 jent_loop_shuffle(struct rand_data *ec,
jent_get_nstime(&time);
/*
- * mix the current state of the random number into the shuffle
- * calculation to balance that shuffle a bit more
+ * Mix the current state of the random number into the shuffle
+ * calculation to balance that shuffle a bit more.
*/
if (ec)
time ^= ec->data;
/*
- * we fold the time value as much as possible to ensure that as many
- * bits of the time stamp are included as possible
+ * We fold the time value as much as possible to ensure that as many
+ * bits of the time stamp are included as possible.
*/
- for (i = 0; (DATA_SIZE_BITS / bits) > i; i++) {
+ for (i = 0; ((DATA_SIZE_BITS + bits - 1) / bits) > i; i++) {
shuffle ^= time & mask;
time = time >> bits;
}
@@ -169,38 +161,28 @@ static __u64 jent_loop_shuffle(struct rand_data *ec,
* CPU Jitter noise source -- this is the noise source based on the CPU
* execution time jitter
*
- * This function folds the time into one bit units by iterating
- * through the DATA_SIZE_BITS bit time value as follows: assume our time value
- * is 0xabcd
- * 1st loop, 1st shift generates 0xd000
- * 1st loop, 2nd shift generates 0x000d
- * 2nd loop, 1st shift generates 0xcd00
- * 2nd loop, 2nd shift generates 0x000c
- * 3rd loop, 1st shift generates 0xbcd0
- * 3rd loop, 2nd shift generates 0x000b
- * 4th loop, 1st shift generates 0xabcd
- * 4th loop, 2nd shift generates 0x000a
- * Now, the values at the end of the 2nd shifts are XORed together.
+ * This function injects the individual bits of the time value into the
+ * entropy pool using an LFSR.
*
- * The code is deliberately inefficient and shall stay that way. This function
- * is the root cause why the code shall be compiled without optimization. This
- * function not only acts as folding operation, but this function's execution
- * is used to measure the CPU execution time jitter. Any change to the loop in
- * this function implies that careful retesting must be done.
+ * The code is deliberately inefficient with respect to the bit shifting
+ * and shall stay that way. This function is the root cause why the code
+ * shall be compiled without optimization. This function not only acts as
+ * folding operation, but this function's execution is used to measure
+ * the CPU execution time jitter. Any change to the loop in this function
+ * implies that careful retesting must be done.
*
* Input:
* @ec entropy collector struct -- may be NULL
- * @time time stamp to be folded
+ * @time time stamp to be injected
* @loop_cnt if a value not equal to 0 is set, use the given value as number of
* loops to perform the folding
*
* Output:
- * @folded result of folding operation
+ * updated ec->data
*
* @return Number of loops the folding operation is performed
*/
-static __u64 jent_fold_time(struct rand_data *ec, __u64 time,
- __u64 *folded, __u64 loop_cnt)
+static __u64 jent_lfsr_time(struct rand_data *ec, __u64 time, __u64 loop_cnt)
{
unsigned int i;
__u64 j = 0;
@@ -217,15 +199,34 @@ static __u64 jent_fold_time(struct rand_data *ec, __u64 time,
if (loop_cnt)
fold_loop_cnt = loop_cnt;
for (j = 0; j < fold_loop_cnt; j++) {
- new = 0;
+ new = ec->data;
for (i = 1; (DATA_SIZE_BITS) >= i; i++) {
__u64 tmp = time << (DATA_SIZE_BITS - i);
tmp = tmp >> (DATA_SIZE_BITS - 1);
+
+ /*
+ * Fibonacci LSFR with polynomial of
+ * x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is
+ * primitive according to
+ * http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf
+ * (the shift values are the polynomial values minus one
+ * due to counting bits from 0 to 63). As the current
+ * position is always the LSB, the polynomial only needs
+ * to shift data in from the left without wrap.
+ */
+ tmp ^= ((new >> 63) & 1);
+ tmp ^= ((new >> 60) & 1);
+ tmp ^= ((new >> 55) & 1);
+ tmp ^= ((new >> 30) & 1);
+ tmp ^= ((new >> 27) & 1);
+ tmp ^= ((new >> 22) & 1);
+ new <<= 1;
new ^= tmp;
}
}
- *folded = new;
+ ec->data = new;
+
return fold_loop_cnt;
}
@@ -258,7 +259,6 @@ static __u64 jent_fold_time(struct rand_data *ec, __u64 time,
*/
static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
{
- unsigned char *tmpval = NULL;
unsigned int wrap = 0;
__u64 i = 0;
#define MAX_ACC_LOOP_BIT 7
@@ -278,7 +278,7 @@ static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
acc_loop_cnt = loop_cnt;
for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) {
- tmpval = ec->mem + ec->memlocation;
+ unsigned char *tmpval = ec->mem + ec->memlocation;
/*
* memory access: just add 1 to one byte,
* wrap at 255 -- memory access implies read
@@ -316,7 +316,7 @@ static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
* 0 jitter measurement not stuck (good bit)
* 1 jitter measurement stuck (reject bit)
*/
-static void jent_stuck(struct rand_data *ec, __u64 current_delta)
+static int jent_stuck(struct rand_data *ec, __u64 current_delta)
{
__s64 delta2 = ec->last_delta - current_delta;
__s64 delta3 = delta2 - ec->last_delta2;
@@ -325,14 +325,15 @@ static void jent_stuck(struct rand_data *ec, __u64 current_delta)
ec->last_delta2 = delta2;
if (!current_delta || !delta2 || !delta3)
- ec->stuck = 1;
+ return 1;
+
+ return 0;
}
/**
* This is the heart of the entropy generation: calculate time deltas and
- * use the CPU jitter in the time deltas. The jitter is folded into one
- * bit. You can call this function the "random bit generator" as it
- * produces one random bit per invocation.
+ * use the CPU jitter in the time deltas. The jitter is injected into the
+ * entropy pool.
*
* WARNING: ensure that ->prev_time is primed before using the output
* of this function! This can be done by calling this function
@@ -341,12 +342,11 @@ static void jent_stuck(struct rand_data *ec, __u64 current_delta)
* Input:
* @entropy_collector Reference to entropy collector
*
- * @return One random bit
+ * @return result of stuck test
*/
-static __u64 jent_measure_jitter(struct rand_data *ec)
+static int jent_measure_jitter(struct rand_data *ec)
{
__u64 time = 0;
- __u64 data = 0;
__u64 current_delta = 0;
/* Invoke one noise source before time measurement to add variations */
@@ -360,109 +360,11 @@ static __u64 jent_measure_jitter(struct rand_data *ec)
current_delta = time - ec->prev_time;
ec->prev_time = time;
- /* Now call the next noise sources which also folds the data */
- jent_fold_time(ec, current_delta, &data, 0);
-
- /*
- * Check whether we have a stuck measurement. The enforcement
- * is performed after the stuck value has been mixed into the
- * entropy pool.
- */
- jent_stuck(ec, current_delta);
-
- return data;
-}
-
-/**
- * Von Neuman unbias as explained in RFC 4086 section 4.2. As shown in the
- * documentation of that RNG, the bits from jent_measure_jitter are considered
- * independent which implies that the Von Neuman unbias operation is applicable.
- * A proof of the Von-Neumann unbias operation to remove skews is given in the
- * document "A proposal for: Functionality classes for random number
- * generators", version 2.0 by Werner Schindler, section 5.4.1.
- *
- * Input:
- * @entropy_collector Reference to entropy collector
- *
- * @return One random bit
- */
-static __u64 jent_unbiased_bit(struct rand_data *entropy_collector)
-{
- do {
- __u64 a = jent_measure_jitter(entropy_collector);
- __u64 b = jent_measure_jitter(entropy_collector);
-
- if (a == b)
- continue;
- if (1 == a)
- return 1;
- else
- return 0;
- } while (1);
-}
-
-/**
- * Shuffle the pool a bit by mixing some value with a bijective function (XOR)
- * into the pool.
- *
- * The function generates a mixer value that depends on the bits set and the
- * location of the set bits in the random number generated by the entropy
- * source. Therefore, based on the generated random number, this mixer value
- * can have 2**64 different values. That mixer value is initialized with the
- * first two SHA-1 constants. After obtaining the mixer value, it is XORed into
- * the random number.
- *
- * The mixer value is not assumed to contain any entropy. But due to the XOR
- * operation, it can also not destroy any entropy present in the entropy pool.
- *
- * Input:
- * @entropy_collector Reference to entropy collector
- */
-static void jent_stir_pool(struct rand_data *entropy_collector)
-{
- /*
- * to shut up GCC on 32 bit, we have to initialize the 64 variable
- * with two 32 bit variables
- */
- union c {
- __u64 u64;
- __u32 u32[2];
- };
- /*
- * This constant is derived from the first two 32 bit initialization
- * vectors of SHA-1 as defined in FIPS 180-4 section 5.3.1
- */
- union c constant;
- /*
- * The start value of the mixer variable is derived from the third
- * and fourth 32 bit initialization vector of SHA-1 as defined in
- * FIPS 180-4 section 5.3.1
- */
- union c mixer;
- unsigned int i = 0;
-
- /*
- * Store the SHA-1 constants in reverse order to make up the 64 bit
- * value -- this applies to a little endian system, on a big endian
- * system, it reverses as expected. But this really does not matter
- * as we do not rely on the specific numbers. We just pick the SHA-1
- * constants as they have a good mix of bit set and unset.
- */
- constant.u32[1] = 0x67452301;
- constant.u32[0] = 0xefcdab89;
- mixer.u32[1] = 0x98badcfe;
- mixer.u32[0] = 0x10325476;
+ /* Now call the next noise sources which also injects the data */
+ jent_lfsr_time(ec, current_delta, 0);
- for (i = 0; i < DATA_SIZE_BITS; i++) {
- /*
- * get the i-th bit of the input random number and only XOR
- * the constant into the mixer value when that bit is set
- */
- if ((entropy_collector->data >> i) & 1)
- mixer.u64 ^= constant.u64;
- mixer.u64 = jent_rol64(mixer.u64, 1);
- }
- entropy_collector->data ^= mixer.u64;
+ /* Check whether we have a stuck measurement. */
+ return jent_stuck(ec, current_delta);
}
/**
@@ -480,48 +382,9 @@ static void jent_gen_entropy(struct rand_data *ec)
jent_measure_jitter(ec);
while (1) {
- __u64 data = 0;
-
- if (ec->disable_unbias == 1)
- data = jent_measure_jitter(ec);
- else
- data = jent_unbiased_bit(ec);
-
- /* enforcement of the jent_stuck test */
- if (ec->stuck) {
- /*
- * We only mix in the bit considered not appropriate
- * without the LSFR. The reason is that if we apply
- * the LSFR and we do not rotate, the 2nd bit with LSFR
- * will cancel out the first LSFR application on the
- * bad bit.
- *
- * And we do not rotate as we apply the next bit to the
- * current bit location again.
- */
- ec->data ^= data;
- ec->stuck = 0;
+ /* If a stuck measurement is received, repeat measurement */
+ if (jent_measure_jitter(ec))
continue;
- }
-
- /*
- * Fibonacci LSFR with polynom of
- * x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is
- * primitive according to
- * http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf
- * (the shift values are the polynom values minus one
- * due to counting bits from 0 to 63). As the current
- * position is always the LSB, the polynom only needs
- * to shift data in from the left without wrap.
- */
- ec->data ^= data;
- ec->data ^= ((ec->data >> 63) & 1);
- ec->data ^= ((ec->data >> 60) & 1);
- ec->data ^= ((ec->data >> 55) & 1);
- ec->data ^= ((ec->data >> 30) & 1);
- ec->data ^= ((ec->data >> 27) & 1);
- ec->data ^= ((ec->data >> 22) & 1);
- ec->data = jent_rol64(ec->data, 1);
/*
* We multiply the loop value with ->osr to obtain the
@@ -530,8 +393,6 @@ static void jent_gen_entropy(struct rand_data *ec)
if (++k >= (DATA_SIZE_BITS * ec->osr))
break;
}
- if (ec->stir)
- jent_stir_pool(ec);
}
/**
@@ -639,12 +500,6 @@ struct rand_data *jent_entropy_collector_alloc(unsigned int osr,
osr = 1; /* minimum sampling rate is 1 */
entropy_collector->osr = osr;
- entropy_collector->stir = 1;
- if (flags & JENT_DISABLE_STIR)
- entropy_collector->stir = 0;
- if (flags & JENT_DISABLE_UNBIAS)
- entropy_collector->disable_unbias = 1;
-
/* fill the data pad with non-zero values */
jent_gen_entropy(entropy_collector);
@@ -656,7 +511,6 @@ void jent_entropy_collector_free(struct rand_data *entropy_collector)
jent_zfree(entropy_collector->mem);
entropy_collector->mem = NULL;
jent_zfree(entropy_collector);
- entropy_collector = NULL;
}
int jent_entropy_init(void)
@@ -665,8 +519,9 @@ int jent_entropy_init(void)
__u64 delta_sum = 0;
__u64 old_delta = 0;
int time_backwards = 0;
- int count_var = 0;
int count_mod = 0;
+ int count_stuck = 0;
+ struct rand_data ec = { 0 };
/* We could perform statistical tests here, but the problem is
* that we only have a few loop counts to do testing. These
@@ -695,12 +550,14 @@ int jent_entropy_init(void)
for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) {
__u64 time = 0;
__u64 time2 = 0;
- __u64 folded = 0;
__u64 delta = 0;
unsigned int lowdelta = 0;
+ int stuck;
+ /* Invoke core entropy collection logic */
jent_get_nstime(&time);
- jent_fold_time(NULL, time, &folded, 1<<MIN_FOLD_LOOP_BIT);
+ ec.prev_time = time;
+ jent_lfsr_time(&ec, time, 0);
jent_get_nstime(&time2);
/* test whether timer works */
@@ -715,6 +572,8 @@ int jent_entropy_init(void)
if (!delta)
return JENT_ECOARSETIME;
+ stuck = jent_stuck(&ec, delta);
+
/*
* up to here we did not modify any variable that will be
* evaluated later, but we already performed some work. Thus we
@@ -725,14 +584,14 @@ int jent_entropy_init(void)
if (CLEARCACHE > i)
continue;
+ if (stuck)
+ count_stuck++;
+
/* test whether we have an increasing timer */
if (!(time2 > time))
time_backwards++;
- /*
- * Avoid modulo of 64 bit integer to allow code to compile
- * on 32 bit architectures.
- */
+ /* use 32 bit value to ensure compilation on 32 bit arches */
lowdelta = time2 - time;
if (!(lowdelta % 100))
count_mod++;
@@ -743,14 +602,10 @@ int jent_entropy_init(void)
* only after the first loop is executed as we need to prime
* the old_data value
*/
- if (i) {
- if (delta != old_delta)
- count_var++;
- if (delta > old_delta)
- delta_sum += (delta - old_delta);
- else
- delta_sum += (old_delta - delta);
- }
+ if (delta > old_delta)
+ delta_sum += (delta - old_delta);
+ else
+ delta_sum += (old_delta - delta);
old_delta = delta;
}
@@ -763,25 +618,29 @@ int jent_entropy_init(void)
*/
if (3 < time_backwards)
return JENT_ENOMONOTONIC;
- /* Error if the time variances are always identical */
- if (!delta_sum)
- return JENT_EVARVAR;
/*
* Variations of deltas of time must on average be larger
* than 1 to ensure the entropy estimation
* implied with 1 is preserved
*/
- if (delta_sum <= 1)
- return JENT_EMINVARVAR;
+ if ((delta_sum) <= 1)
+ return JENT_EVARVAR;
/*
* Ensure that we have variations in the time stamp below 10 for at
- * least 10% of all checks -- on some platforms, the counter
- * increments in multiples of 100, but not always
+ * least 10% of all checks -- on some platforms, the counter increments
+ * in multiples of 100, but not always
*/
if ((TESTLOOPCOUNT/10 * 9) < count_mod)
return JENT_ECOARSETIME;
+ /*
+ * If we have more than 90% stuck results, then this Jitter RNG is
+ * likely to not work well.
+ */
+ if ((TESTLOOPCOUNT/10 * 9) < count_stuck)
+ return JENT_ESTUCK;
+
return 0;
}
diff --git a/crypto/khazad.c b/crypto/khazad.c
index b50aa8a3ab4c..14ca7f1631c7 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -848,6 +848,7 @@ static void khazad_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
static struct crypto_alg khazad_alg = {
.cra_name = "khazad",
+ .cra_driver_name = "khazad-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = KHAZAD_BLOCK_SIZE,
.cra_ctxsize = sizeof (struct khazad_ctx),
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 58009cf63a6e..be829f6afc8e 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -384,7 +384,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_priority = alg->base.cra_priority;
inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
- (__alignof__(__be32) - 1);
+ (__alignof__(be128) - 1);
inst->alg.ivsize = LRW_BLOCK_SIZE;
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 54673cf88385..0606f8862e78 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -106,6 +106,7 @@ static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
static struct crypto_alg alg_lz4 = {
.cra_name = "lz4",
+ .cra_driver_name = "lz4-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lz4_ctx),
.cra_module = THIS_MODULE,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index daae7bfb385d..d7cc94aa2fcf 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -107,6 +107,7 @@ static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
static struct crypto_alg alg_lz4hc = {
.cra_name = "lz4hc",
+ .cra_driver_name = "lz4hc-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lz4hc_ctx),
.cra_module = THIS_MODULE,
diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c
index c4303e96f2b1..0631d975bfac 100644
--- a/crypto/lzo-rle.c
+++ b/crypto/lzo-rle.c
@@ -109,6 +109,7 @@ static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
static struct crypto_alg alg = {
.cra_name = "lzo-rle",
+ .cra_driver_name = "lzo-rle-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lzorle_ctx),
.cra_module = THIS_MODULE,
diff --git a/crypto/lzo.c b/crypto/lzo.c
index 97051a2ca08e..ebda132dd22b 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -109,6 +109,7 @@ static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src,
static struct crypto_alg alg = {
.cra_name = "lzo",
+ .cra_driver_name = "lzo-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lzo_ctx),
.cra_module = THIS_MODULE,
diff --git a/crypto/md4.c b/crypto/md4.c
index 9a1a228a0c69..2e7f2f319f95 100644
--- a/crypto/md4.c
+++ b/crypto/md4.c
@@ -216,9 +216,10 @@ static struct shash_alg alg = {
.final = md4_final,
.descsize = sizeof(struct md4_ctx),
.base = {
- .cra_name = "md4",
- .cra_blocksize = MD4_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "md4",
+ .cra_driver_name = "md4-generic",
+ .cra_blocksize = MD4_HMAC_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
};
diff --git a/crypto/md5.c b/crypto/md5.c
index 221c2c0932f8..22dc60bc0437 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -228,9 +228,10 @@ static struct shash_alg alg = {
.descsize = sizeof(struct md5_state),
.statesize = sizeof(struct md5_state),
.base = {
- .cra_name = "md5",
- .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "md5",
+ .cra_driver_name = "md5-generic",
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
};
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index b3d83ff709d3..20e6220f46f6 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -156,6 +156,7 @@ static struct shash_alg alg = {
.descsize = sizeof(struct michael_mic_desc_ctx),
.base = {
.cra_name = "michael_mic",
+ .cra_driver_name = "michael_mic-generic",
.cra_blocksize = 8,
.cra_alignmask = 3,
.cra_ctxsize = sizeof(struct michael_mic_ctx),
diff --git a/crypto/rmd128.c b/crypto/rmd128.c
index d6c031a9fd14..29308fb97e7e 100644
--- a/crypto/rmd128.c
+++ b/crypto/rmd128.c
@@ -298,6 +298,7 @@ static struct shash_alg alg = {
.descsize = sizeof(struct rmd128_ctx),
.base = {
.cra_name = "rmd128",
+ .cra_driver_name = "rmd128-generic",
.cra_blocksize = RMD128_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/crypto/rmd160.c b/crypto/rmd160.c
index f3add4d54a22..c5fe4034b153 100644
--- a/crypto/rmd160.c
+++ b/crypto/rmd160.c
@@ -342,6 +342,7 @@ static struct shash_alg alg = {
.descsize = sizeof(struct rmd160_ctx),
.base = {
.cra_name = "rmd160",
+ .cra_driver_name = "rmd160-generic",
.cra_blocksize = RMD160_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/crypto/rmd256.c b/crypto/rmd256.c
index 79ca3029848f..3c730e9de5fd 100644
--- a/crypto/rmd256.c
+++ b/crypto/rmd256.c
@@ -317,6 +317,7 @@ static struct shash_alg alg = {
.descsize = sizeof(struct rmd256_ctx),
.base = {
.cra_name = "rmd256",
+ .cra_driver_name = "rmd256-generic",
.cra_blocksize = RMD256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/crypto/rmd320.c b/crypto/rmd320.c
index b2392ef7467b..c919ad6c4705 100644
--- a/crypto/rmd320.c
+++ b/crypto/rmd320.c
@@ -366,6 +366,7 @@ static struct shash_alg alg = {
.descsize = sizeof(struct rmd320_ctx),
.base = {
.cra_name = "rmd320",
+ .cra_driver_name = "rmd320-generic",
.cra_blocksize = RMD320_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 16f612b6dbca..56fa665a4f01 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -225,7 +225,13 @@
x4 ^= x2; \
})
-static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k)
+/*
+ * both gcc and clang have misoptimized this function in the past,
+ * producing horrible object code from spilling temporary variables
+ * on the stack. Forcing this part out of line avoids that.
+ */
+static noinline void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2,
+ u32 r3, u32 r4, u32 *k)
{
k += 100;
S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
@@ -637,6 +643,7 @@ static struct crypto_alg srp_algs[2] = { {
.cia_decrypt = serpent_decrypt } }
}, {
.cra_name = "tnepres",
+ .cra_driver_name = "tnepres-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index df735148000f..5d836fc3df3e 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -837,6 +837,40 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
return 0;
}
+int crypto_skcipher_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_alg *alg = tfm->base.__crt_alg;
+ unsigned int cryptlen = req->cryptlen;
+ int ret;
+
+ crypto_stats_get(alg);
+ if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+ ret = -ENOKEY;
+ else
+ ret = tfm->encrypt(req);
+ crypto_stats_skcipher_encrypt(cryptlen, ret, alg);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_skcipher_encrypt);
+
+int crypto_skcipher_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_alg *alg = tfm->base.__crt_alg;
+ unsigned int cryptlen = req->cryptlen;
+ int ret;
+
+ crypto_stats_get(alg);
+ if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+ ret = -ENOKEY;
+ else
+ ret = tfm->decrypt(req);
+ crypto_stats_skcipher_decrypt(cryptlen, ret, alg);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_skcipher_decrypt);
+
static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
{
struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm);
diff --git a/crypto/tea.c b/crypto/tea.c
index 37a18a9be2f4..02efc5d81690 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -216,6 +216,7 @@ static void xeta_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
static struct crypto_alg tea_algs[3] = { {
.cra_name = "tea",
+ .cra_driver_name = "tea-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TEA_BLOCK_SIZE,
.cra_ctxsize = sizeof (struct tea_ctx),
@@ -229,6 +230,7 @@ static struct crypto_alg tea_algs[3] = { {
.cia_decrypt = tea_decrypt } }
}, {
.cra_name = "xtea",
+ .cra_driver_name = "xtea-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = XTEA_BLOCK_SIZE,
.cra_ctxsize = sizeof (struct xtea_ctx),
@@ -242,6 +244,7 @@ static struct crypto_alg tea_algs[3] = { {
.cia_decrypt = xtea_decrypt } }
}, {
.cra_name = "xeta",
+ .cra_driver_name = "xeta-generic",
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = XTEA_BLOCK_SIZE,
.cra_ctxsize = sizeof (struct xtea_ctx),
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 658a7eeebab2..d0b5b33806a6 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1032,6 +1032,205 @@ static void crypto_reenable_simd_for_test(void)
}
#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
+static int build_hash_sglist(struct test_sglist *tsgl,
+ const struct hash_testvec *vec,
+ const struct testvec_config *cfg,
+ unsigned int alignmask,
+ const struct test_sg_division *divs[XBUFSIZE])
+{
+ struct kvec kv;
+ struct iov_iter input;
+
+ kv.iov_base = (void *)vec->plaintext;
+ kv.iov_len = vec->psize;
+ iov_iter_kvec(&input, WRITE, &kv, 1, vec->psize);
+ return build_test_sglist(tsgl, cfg->src_divs, alignmask, vec->psize,
+ &input, divs);
+}
+
+static int check_hash_result(const char *type,
+ const u8 *result, unsigned int digestsize,
+ const struct hash_testvec *vec,
+ const char *vec_name,
+ const char *driver,
+ const struct testvec_config *cfg)
+{
+ if (memcmp(result, vec->digest, digestsize) != 0) {
+ pr_err("alg: %s: %s test failed (wrong result) on test vector %s, cfg=\"%s\"\n",
+ type, driver, vec_name, cfg->name);
+ return -EINVAL;
+ }
+ if (!testmgr_is_poison(&result[digestsize], TESTMGR_POISON_LEN)) {
+ pr_err("alg: %s: %s overran result buffer on test vector %s, cfg=\"%s\"\n",
+ type, driver, vec_name, cfg->name);
+ return -EOVERFLOW;
+ }
+ return 0;
+}
+
+static inline int check_shash_op(const char *op, int err,
+ const char *driver, const char *vec_name,
+ const struct testvec_config *cfg)
+{
+ if (err)
+ pr_err("alg: shash: %s %s() failed with err %d on test vector %s, cfg=\"%s\"\n",
+ driver, op, err, vec_name, cfg->name);
+ return err;
+}
+
+static inline const void *sg_data(struct scatterlist *sg)
+{
+ return page_address(sg_page(sg)) + sg->offset;
+}
+
+/* Test one hash test vector in one configuration, using the shash API */
+static int test_shash_vec_cfg(const char *driver,
+ const struct hash_testvec *vec,
+ const char *vec_name,
+ const struct testvec_config *cfg,
+ struct shash_desc *desc,
+ struct test_sglist *tsgl,
+ u8 *hashstate)
+{
+ struct crypto_shash *tfm = desc->tfm;
+ const unsigned int alignmask = crypto_shash_alignmask(tfm);
+ const unsigned int digestsize = crypto_shash_digestsize(tfm);
+ const unsigned int statesize = crypto_shash_statesize(tfm);
+ const struct test_sg_division *divs[XBUFSIZE];
+ unsigned int i;
+ u8 result[HASH_MAX_DIGESTSIZE + TESTMGR_POISON_LEN];
+ int err;
+
+ /* Set the key, if specified */
+ if (vec->ksize) {
+ err = crypto_shash_setkey(tfm, vec->key, vec->ksize);
+ if (err) {
+ if (err == vec->setkey_error)
+ return 0;
+ pr_err("alg: shash: %s setkey failed on test vector %s; expected_error=%d, actual_error=%d, flags=%#x\n",
+ driver, vec_name, vec->setkey_error, err,
+ crypto_shash_get_flags(tfm));
+ return err;
+ }
+ if (vec->setkey_error) {
+ pr_err("alg: shash: %s setkey unexpectedly succeeded on test vector %s; expected_error=%d\n",
+ driver, vec_name, vec->setkey_error);
+ return -EINVAL;
+ }
+ }
+
+ /* Build the scatterlist for the source data */
+ err = build_hash_sglist(tsgl, vec, cfg, alignmask, divs);
+ if (err) {
+ pr_err("alg: shash: %s: error preparing scatterlist for test vector %s, cfg=\"%s\"\n",
+ driver, vec_name, cfg->name);
+ return err;
+ }
+
+ /* Do the actual hashing */
+
+ testmgr_poison(desc->__ctx, crypto_shash_descsize(tfm));
+ testmgr_poison(result, digestsize + TESTMGR_POISON_LEN);
+
+ if (cfg->finalization_type == FINALIZATION_TYPE_DIGEST ||
+ vec->digest_error) {
+ /* Just using digest() */
+ if (tsgl->nents != 1)
+ return 0;
+ if (cfg->nosimd)
+ crypto_disable_simd_for_test();
+ err = crypto_shash_digest(desc, sg_data(&tsgl->sgl[0]),
+ tsgl->sgl[0].length, result);
+ if (cfg->nosimd)
+ crypto_reenable_simd_for_test();
+ if (err) {
+ if (err == vec->digest_error)
+ return 0;
+ pr_err("alg: shash: %s digest() failed on test vector %s; expected_error=%d, actual_error=%d, cfg=\"%s\"\n",
+ driver, vec_name, vec->digest_error, err,
+ cfg->name);
+ return err;
+ }
+ if (vec->digest_error) {
+ pr_err("alg: shash: %s digest() unexpectedly succeeded on test vector %s; expected_error=%d, cfg=\"%s\"\n",
+ driver, vec_name, vec->digest_error, cfg->name);
+ return -EINVAL;
+ }
+ goto result_ready;
+ }
+
+ /* Using init(), zero or more update(), then final() or finup() */
+
+ if (cfg->nosimd)
+ crypto_disable_simd_for_test();
+ err = crypto_shash_init(desc);
+ if (cfg->nosimd)
+ crypto_reenable_simd_for_test();
+ err = check_shash_op("init", err, driver, vec_name, cfg);
+ if (err)
+ return err;
+
+ for (i = 0; i < tsgl->nents; i++) {
+ if (i + 1 == tsgl->nents &&
+ cfg->finalization_type == FINALIZATION_TYPE_FINUP) {
+ if (divs[i]->nosimd)
+ crypto_disable_simd_for_test();
+ err = crypto_shash_finup(desc, sg_data(&tsgl->sgl[i]),
+ tsgl->sgl[i].length, result);
+ if (divs[i]->nosimd)
+ crypto_reenable_simd_for_test();
+ err = check_shash_op("finup", err, driver, vec_name,
+ cfg);
+ if (err)
+ return err;
+ goto result_ready;
+ }
+ if (divs[i]->nosimd)
+ crypto_disable_simd_for_test();
+ err = crypto_shash_update(desc, sg_data(&tsgl->sgl[i]),
+ tsgl->sgl[i].length);
+ if (divs[i]->nosimd)
+ crypto_reenable_simd_for_test();
+ err = check_shash_op("update", err, driver, vec_name, cfg);
+ if (err)
+ return err;
+ if (divs[i]->flush_type == FLUSH_TYPE_REIMPORT) {
+ /* Test ->export() and ->import() */
+ testmgr_poison(hashstate + statesize,
+ TESTMGR_POISON_LEN);
+ err = crypto_shash_export(desc, hashstate);
+ err = check_shash_op("export", err, driver, vec_name,
+ cfg);
+ if (err)
+ return err;
+ if (!testmgr_is_poison(hashstate + statesize,
+ TESTMGR_POISON_LEN)) {
+ pr_err("alg: shash: %s export() overran state buffer on test vector %s, cfg=\"%s\"\n",
+ driver, vec_name, cfg->name);
+ return -EOVERFLOW;
+ }
+ testmgr_poison(desc->__ctx, crypto_shash_descsize(tfm));
+ err = crypto_shash_import(desc, hashstate);
+ err = check_shash_op("import", err, driver, vec_name,
+ cfg);
+ if (err)
+ return err;
+ }
+ }
+
+ if (cfg->nosimd)
+ crypto_disable_simd_for_test();
+ err = crypto_shash_final(desc, result);
+ if (cfg->nosimd)
+ crypto_reenable_simd_for_test();
+ err = check_shash_op("final", err, driver, vec_name, cfg);
+ if (err)
+ return err;
+result_ready:
+ return check_hash_result("shash", result, digestsize, vec, vec_name,
+ driver, cfg);
+}
+
static int do_ahash_op(int (*op)(struct ahash_request *req),
struct ahash_request *req,
struct crypto_wait *wait, bool nosimd)
@@ -1049,31 +1248,32 @@ static int do_ahash_op(int (*op)(struct ahash_request *req),
return crypto_wait_req(err, wait);
}
-static int check_nonfinal_hash_op(const char *op, int err,
- u8 *result, unsigned int digestsize,
- const char *driver, const char *vec_name,
- const struct testvec_config *cfg)
+static int check_nonfinal_ahash_op(const char *op, int err,
+ u8 *result, unsigned int digestsize,
+ const char *driver, const char *vec_name,
+ const struct testvec_config *cfg)
{
if (err) {
- pr_err("alg: hash: %s %s() failed with err %d on test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s %s() failed with err %d on test vector %s, cfg=\"%s\"\n",
driver, op, err, vec_name, cfg->name);
return err;
}
if (!testmgr_is_poison(result, digestsize)) {
- pr_err("alg: hash: %s %s() used result buffer on test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s %s() used result buffer on test vector %s, cfg=\"%s\"\n",
driver, op, vec_name, cfg->name);
return -EINVAL;
}
return 0;
}
-static int test_hash_vec_cfg(const char *driver,
- const struct hash_testvec *vec,
- const char *vec_name,
- const struct testvec_config *cfg,
- struct ahash_request *req,
- struct test_sglist *tsgl,
- u8 *hashstate)
+/* Test one hash test vector in one configuration, using the ahash API */
+static int test_ahash_vec_cfg(const char *driver,
+ const struct hash_testvec *vec,
+ const char *vec_name,
+ const struct testvec_config *cfg,
+ struct ahash_request *req,
+ struct test_sglist *tsgl,
+ u8 *hashstate)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
const unsigned int alignmask = crypto_ahash_alignmask(tfm);
@@ -1082,8 +1282,6 @@ static int test_hash_vec_cfg(const char *driver,
const u32 req_flags = CRYPTO_TFM_REQ_MAY_BACKLOG | cfg->req_flags;
const struct test_sg_division *divs[XBUFSIZE];
DECLARE_CRYPTO_WAIT(wait);
- struct kvec _input;
- struct iov_iter input;
unsigned int i;
struct scatterlist *pending_sgl;
unsigned int pending_len;
@@ -1096,26 +1294,22 @@ static int test_hash_vec_cfg(const char *driver,
if (err) {
if (err == vec->setkey_error)
return 0;
- pr_err("alg: hash: %s setkey failed on test vector %s; expected_error=%d, actual_error=%d, flags=%#x\n",
+ pr_err("alg: ahash: %s setkey failed on test vector %s; expected_error=%d, actual_error=%d, flags=%#x\n",
driver, vec_name, vec->setkey_error, err,
crypto_ahash_get_flags(tfm));
return err;
}
if (vec->setkey_error) {
- pr_err("alg: hash: %s setkey unexpectedly succeeded on test vector %s; expected_error=%d\n",
+ pr_err("alg: ahash: %s setkey unexpectedly succeeded on test vector %s; expected_error=%d\n",
driver, vec_name, vec->setkey_error);
return -EINVAL;
}
}
/* Build the scatterlist for the source data */
- _input.iov_base = (void *)vec->plaintext;
- _input.iov_len = vec->psize;
- iov_iter_kvec(&input, WRITE, &_input, 1, vec->psize);
- err = build_test_sglist(tsgl, cfg->src_divs, alignmask, vec->psize,
- &input, divs);
+ err = build_hash_sglist(tsgl, vec, cfg, alignmask, divs);
if (err) {
- pr_err("alg: hash: %s: error preparing scatterlist for test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s: error preparing scatterlist for test vector %s, cfg=\"%s\"\n",
driver, vec_name, cfg->name);
return err;
}
@@ -1135,13 +1329,13 @@ static int test_hash_vec_cfg(const char *driver,
if (err) {
if (err == vec->digest_error)
return 0;
- pr_err("alg: hash: %s digest() failed on test vector %s; expected_error=%d, actual_error=%d, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s digest() failed on test vector %s; expected_error=%d, actual_error=%d, cfg=\"%s\"\n",
driver, vec_name, vec->digest_error, err,
cfg->name);
return err;
}
if (vec->digest_error) {
- pr_err("alg: hash: %s digest() unexpectedly succeeded on test vector %s; expected_error=%d, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s digest() unexpectedly succeeded on test vector %s; expected_error=%d, cfg=\"%s\"\n",
driver, vec_name, vec->digest_error, cfg->name);
return -EINVAL;
}
@@ -1153,8 +1347,8 @@ static int test_hash_vec_cfg(const char *driver,
ahash_request_set_callback(req, req_flags, crypto_req_done, &wait);
ahash_request_set_crypt(req, NULL, result, 0);
err = do_ahash_op(crypto_ahash_init, req, &wait, cfg->nosimd);
- err = check_nonfinal_hash_op("init", err, result, digestsize,
- driver, vec_name, cfg);
+ err = check_nonfinal_ahash_op("init", err, result, digestsize,
+ driver, vec_name, cfg);
if (err)
return err;
@@ -1170,9 +1364,9 @@ static int test_hash_vec_cfg(const char *driver,
pending_len);
err = do_ahash_op(crypto_ahash_update, req, &wait,
divs[i]->nosimd);
- err = check_nonfinal_hash_op("update", err,
- result, digestsize,
- driver, vec_name, cfg);
+ err = check_nonfinal_ahash_op("update", err,
+ result, digestsize,
+ driver, vec_name, cfg);
if (err)
return err;
pending_sgl = NULL;
@@ -1183,23 +1377,23 @@ static int test_hash_vec_cfg(const char *driver,
testmgr_poison(hashstate + statesize,
TESTMGR_POISON_LEN);
err = crypto_ahash_export(req, hashstate);
- err = check_nonfinal_hash_op("export", err,
- result, digestsize,
- driver, vec_name, cfg);
+ err = check_nonfinal_ahash_op("export", err,
+ result, digestsize,
+ driver, vec_name, cfg);
if (err)
return err;
if (!testmgr_is_poison(hashstate + statesize,
TESTMGR_POISON_LEN)) {
- pr_err("alg: hash: %s export() overran state buffer on test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s export() overran state buffer on test vector %s, cfg=\"%s\"\n",
driver, vec_name, cfg->name);
return -EOVERFLOW;
}
testmgr_poison(req->__ctx, crypto_ahash_reqsize(tfm));
err = crypto_ahash_import(req, hashstate);
- err = check_nonfinal_hash_op("import", err,
- result, digestsize,
- driver, vec_name, cfg);
+ err = check_nonfinal_ahash_op("import", err,
+ result, digestsize,
+ driver, vec_name, cfg);
if (err)
return err;
}
@@ -1213,13 +1407,13 @@ static int test_hash_vec_cfg(const char *driver,
if (cfg->finalization_type == FINALIZATION_TYPE_FINAL) {
/* finish with update() and final() */
err = do_ahash_op(crypto_ahash_update, req, &wait, cfg->nosimd);
- err = check_nonfinal_hash_op("update", err, result, digestsize,
- driver, vec_name, cfg);
+ err = check_nonfinal_ahash_op("update", err, result, digestsize,
+ driver, vec_name, cfg);
if (err)
return err;
err = do_ahash_op(crypto_ahash_final, req, &wait, cfg->nosimd);
if (err) {
- pr_err("alg: hash: %s final() failed with err %d on test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s final() failed with err %d on test vector %s, cfg=\"%s\"\n",
driver, err, vec_name, cfg->name);
return err;
}
@@ -1227,31 +1421,49 @@ static int test_hash_vec_cfg(const char *driver,
/* finish with finup() */
err = do_ahash_op(crypto_ahash_finup, req, &wait, cfg->nosimd);
if (err) {
- pr_err("alg: hash: %s finup() failed with err %d on test vector %s, cfg=\"%s\"\n",
+ pr_err("alg: ahash: %s finup() failed with err %d on test vector %s, cfg=\"%s\"\n",
driver, err, vec_name, cfg->name);
return err;
}
}
result_ready:
- /* Check that the algorithm produced the correct digest */
- if (memcmp(result, vec->digest, digestsize) != 0) {
- pr_err("alg: hash: %s test failed (wrong result) on test vector %s, cfg=\"%s\"\n",
- driver, vec_name, cfg->name);
- return -EINVAL;
- }
- if (!testmgr_is_poison(&result[digestsize], TESTMGR_POISON_LEN)) {
- pr_err("alg: hash: %s overran result buffer on test vector %s, cfg=\"%s\"\n",
- driver, vec_name, cfg->name);
- return -EOVERFLOW;
+ return check_hash_result("ahash", result, digestsize, vec, vec_name,
+ driver, cfg);
+}
+
+static int test_hash_vec_cfg(const char *driver,
+ const struct hash_testvec *vec,
+ const char *vec_name,
+ const struct testvec_config *cfg,
+ struct ahash_request *req,
+ struct shash_desc *desc,
+ struct test_sglist *tsgl,
+ u8 *hashstate)
+{
+ int err;
+
+ /*
+ * For algorithms implemented as "shash", most bugs will be detected by
+ * both the shash and ahash tests. Test the shash API first so that the
+ * failures involve less indirection, so are easier to debug.
+ */
+
+ if (desc) {
+ err = test_shash_vec_cfg(driver, vec, vec_name, cfg, desc, tsgl,
+ hashstate);
+ if (err)
+ return err;
}
- return 0;
+ return test_ahash_vec_cfg(driver, vec, vec_name, cfg, req, tsgl,
+ hashstate);
}
static int test_hash_vec(const char *driver, const struct hash_testvec *vec,
unsigned int vec_num, struct ahash_request *req,
- struct test_sglist *tsgl, u8 *hashstate)
+ struct shash_desc *desc, struct test_sglist *tsgl,
+ u8 *hashstate)
{
char vec_name[16];
unsigned int i;
@@ -1262,7 +1474,7 @@ static int test_hash_vec(const char *driver, const struct hash_testvec *vec,
for (i = 0; i < ARRAY_SIZE(default_hash_testvec_configs); i++) {
err = test_hash_vec_cfg(driver, vec, vec_name,
&default_hash_testvec_configs[i],
- req, tsgl, hashstate);
+ req, desc, tsgl, hashstate);
if (err)
return err;
}
@@ -1276,9 +1488,10 @@ static int test_hash_vec(const char *driver, const struct hash_testvec *vec,
generate_random_testvec_config(&cfg, cfgname,
sizeof(cfgname));
err = test_hash_vec_cfg(driver, vec, vec_name, &cfg,
- req, tsgl, hashstate);
+ req, desc, tsgl, hashstate);
if (err)
return err;
+ cond_resched();
}
}
#endif
@@ -1290,14 +1503,12 @@ static int test_hash_vec(const char *driver, const struct hash_testvec *vec,
* Generate a hash test vector from the given implementation.
* Assumes the buffers in 'vec' were already allocated.
*/
-static void generate_random_hash_testvec(struct crypto_shash *tfm,
+static void generate_random_hash_testvec(struct shash_desc *desc,
struct hash_testvec *vec,
unsigned int maxkeysize,
unsigned int maxdatasize,
char *name, size_t max_namelen)
{
- SHASH_DESC_ON_STACK(desc, tfm);
-
/* Data */
vec->psize = generate_random_length(maxdatasize);
generate_random_bytes((u8 *)vec->plaintext, vec->psize);
@@ -1314,7 +1525,7 @@ static void generate_random_hash_testvec(struct crypto_shash *tfm,
vec->ksize = 1 + (prandom_u32() % maxkeysize);
generate_random_bytes((u8 *)vec->key, vec->ksize);
- vec->setkey_error = crypto_shash_setkey(tfm, vec->key,
+ vec->setkey_error = crypto_shash_setkey(desc->tfm, vec->key,
vec->ksize);
/* If the key couldn't be set, no need to continue to digest. */
if (vec->setkey_error)
@@ -1322,7 +1533,6 @@ static void generate_random_hash_testvec(struct crypto_shash *tfm,
}
/* Digest */
- desc->tfm = tfm;
vec->digest_error = crypto_shash_digest(desc, vec->plaintext,
vec->psize, (u8 *)vec->digest);
done:
@@ -1338,6 +1548,7 @@ static int test_hash_vs_generic_impl(const char *driver,
const char *generic_driver,
unsigned int maxkeysize,
struct ahash_request *req,
+ struct shash_desc *desc,
struct test_sglist *tsgl,
u8 *hashstate)
{
@@ -1348,10 +1559,11 @@ static int test_hash_vs_generic_impl(const char *driver,
const char *algname = crypto_hash_alg_common(tfm)->base.cra_name;
char _generic_driver[CRYPTO_MAX_ALG_NAME];
struct crypto_shash *generic_tfm = NULL;
+ struct shash_desc *generic_desc = NULL;
unsigned int i;
struct hash_testvec vec = { 0 };
char vec_name[64];
- struct testvec_config cfg;
+ struct testvec_config *cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
int err;
@@ -1381,6 +1593,20 @@ static int test_hash_vs_generic_impl(const char *driver,
return err;
}
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ generic_desc = kzalloc(sizeof(*desc) +
+ crypto_shash_descsize(generic_tfm), GFP_KERNEL);
+ if (!generic_desc) {
+ err = -ENOMEM;
+ goto out;
+ }
+ generic_desc->tfm = generic_tfm;
+
/* Check the algorithm properties for consistency. */
if (digestsize != crypto_shash_digestsize(generic_tfm)) {
@@ -1412,23 +1638,25 @@ static int test_hash_vs_generic_impl(const char *driver,
}
for (i = 0; i < fuzz_iterations * 8; i++) {
- generate_random_hash_testvec(generic_tfm, &vec,
+ generate_random_hash_testvec(generic_desc, &vec,
maxkeysize, maxdatasize,
vec_name, sizeof(vec_name));
- generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname));
+ generate_random_testvec_config(cfg, cfgname, sizeof(cfgname));
- err = test_hash_vec_cfg(driver, &vec, vec_name, &cfg,
- req, tsgl, hashstate);
+ err = test_hash_vec_cfg(driver, &vec, vec_name, cfg,
+ req, desc, tsgl, hashstate);
if (err)
goto out;
cond_resched();
}
err = 0;
out:
+ kfree(cfg);
kfree(vec.key);
kfree(vec.plaintext);
kfree(vec.digest);
crypto_free_shash(generic_tfm);
+ kzfree(generic_desc);
return err;
}
#else /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
@@ -1436,6 +1664,7 @@ static int test_hash_vs_generic_impl(const char *driver,
const char *generic_driver,
unsigned int maxkeysize,
struct ahash_request *req,
+ struct shash_desc *desc,
struct test_sglist *tsgl,
u8 *hashstate)
{
@@ -1443,26 +1672,67 @@ static int test_hash_vs_generic_impl(const char *driver,
}
#endif /* !CONFIG_CRYPTO_MANAGER_EXTRA_TESTS */
+static int alloc_shash(const char *driver, u32 type, u32 mask,
+ struct crypto_shash **tfm_ret,
+ struct shash_desc **desc_ret)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
+
+ tfm = crypto_alloc_shash(driver, type, mask);
+ if (IS_ERR(tfm)) {
+ if (PTR_ERR(tfm) == -ENOENT) {
+ /*
+ * This algorithm is only available through the ahash
+ * API, not the shash API, so skip the shash tests.
+ */
+ return 0;
+ }
+ pr_err("alg: hash: failed to allocate shash transform for %s: %ld\n",
+ driver, PTR_ERR(tfm));
+ return PTR_ERR(tfm);
+ }
+
+ desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(tfm), GFP_KERNEL);
+ if (!desc) {
+ crypto_free_shash(tfm);
+ return -ENOMEM;
+ }
+ desc->tfm = tfm;
+
+ *tfm_ret = tfm;
+ *desc_ret = desc;
+ return 0;
+}
+
static int __alg_test_hash(const struct hash_testvec *vecs,
unsigned int num_vecs, const char *driver,
u32 type, u32 mask,
const char *generic_driver, unsigned int maxkeysize)
{
- struct crypto_ahash *tfm;
+ struct crypto_ahash *atfm = NULL;
struct ahash_request *req = NULL;
+ struct crypto_shash *stfm = NULL;
+ struct shash_desc *desc = NULL;
struct test_sglist *tsgl = NULL;
u8 *hashstate = NULL;
+ unsigned int statesize;
unsigned int i;
int err;
- tfm = crypto_alloc_ahash(driver, type, mask);
- if (IS_ERR(tfm)) {
+ /*
+ * Always test the ahash API. This works regardless of whether the
+ * algorithm is implemented as ahash or shash.
+ */
+
+ atfm = crypto_alloc_ahash(driver, type, mask);
+ if (IS_ERR(atfm)) {
pr_err("alg: hash: failed to allocate transform for %s: %ld\n",
- driver, PTR_ERR(tfm));
- return PTR_ERR(tfm);
+ driver, PTR_ERR(atfm));
+ return PTR_ERR(atfm);
}
- req = ahash_request_alloc(tfm, GFP_KERNEL);
+ req = ahash_request_alloc(atfm, GFP_KERNEL);
if (!req) {
pr_err("alg: hash: failed to allocate request for %s\n",
driver);
@@ -1470,6 +1740,14 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
goto out;
}
+ /*
+ * If available also test the shash API, to cover corner cases that may
+ * be missed by testing the ahash API only.
+ */
+ err = alloc_shash(driver, type, mask, &stfm, &desc);
+ if (err)
+ goto out;
+
tsgl = kmalloc(sizeof(*tsgl), GFP_KERNEL);
if (!tsgl || init_test_sglist(tsgl) != 0) {
pr_err("alg: hash: failed to allocate test buffers for %s\n",
@@ -1480,8 +1758,10 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
goto out;
}
- hashstate = kmalloc(crypto_ahash_statesize(tfm) + TESTMGR_POISON_LEN,
- GFP_KERNEL);
+ statesize = crypto_ahash_statesize(atfm);
+ if (stfm)
+ statesize = max(statesize, crypto_shash_statesize(stfm));
+ hashstate = kmalloc(statesize + TESTMGR_POISON_LEN, GFP_KERNEL);
if (!hashstate) {
pr_err("alg: hash: failed to allocate hash state buffer for %s\n",
driver);
@@ -1490,20 +1770,24 @@ static int __alg_test_hash(const struct hash_testvec *vecs,
}
for (i = 0; i < num_vecs; i++) {
- err = test_hash_vec(driver, &vecs[i], i, req, tsgl, hashstate);
+ err = test_hash_vec(driver, &vecs[i], i, req, desc, tsgl,
+ hashstate);
if (err)
goto out;
+ cond_resched();
}
err = test_hash_vs_generic_impl(driver, generic_driver, maxkeysize, req,
- tsgl, hashstate);
+ desc, tsgl, hashstate);
out:
kfree(hashstate);
if (tsgl) {
destroy_test_sglist(tsgl);
kfree(tsgl);
}
+ kfree(desc);
+ crypto_free_shash(stfm);
ahash_request_free(req);
- crypto_free_ahash(tfm);
+ crypto_free_ahash(atfm);
return err;
}
@@ -1755,6 +2039,7 @@ static int test_aead_vec(const char *driver, int enc,
&cfg, req, tsgls);
if (err)
return err;
+ cond_resched();
}
}
#endif
@@ -1864,7 +2149,7 @@ static int test_aead_vs_generic_impl(const char *driver,
unsigned int i;
struct aead_testvec vec = { 0 };
char vec_name[64];
- struct testvec_config cfg;
+ struct testvec_config *cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
int err;
@@ -1894,6 +2179,12 @@ static int test_aead_vs_generic_impl(const char *driver,
return err;
}
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
generic_req = aead_request_alloc(generic_tfm, GFP_KERNEL);
if (!generic_req) {
err = -ENOMEM;
@@ -1948,13 +2239,13 @@ static int test_aead_vs_generic_impl(const char *driver,
generate_random_aead_testvec(generic_req, &vec,
maxkeysize, maxdatasize,
vec_name, sizeof(vec_name));
- generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname));
+ generate_random_testvec_config(cfg, cfgname, sizeof(cfgname));
- err = test_aead_vec_cfg(driver, ENCRYPT, &vec, vec_name, &cfg,
+ err = test_aead_vec_cfg(driver, ENCRYPT, &vec, vec_name, cfg,
req, tsgls);
if (err)
goto out;
- err = test_aead_vec_cfg(driver, DECRYPT, &vec, vec_name, &cfg,
+ err = test_aead_vec_cfg(driver, DECRYPT, &vec, vec_name, cfg,
req, tsgls);
if (err)
goto out;
@@ -1962,6 +2253,7 @@ static int test_aead_vs_generic_impl(const char *driver,
}
err = 0;
out:
+ kfree(cfg);
kfree(vec.key);
kfree(vec.iv);
kfree(vec.assoc);
@@ -1994,6 +2286,7 @@ static int test_aead(const char *driver, int enc,
tsgls);
if (err)
return err;
+ cond_resched();
}
return 0;
}
@@ -2336,6 +2629,7 @@ static int test_skcipher_vec(const char *driver, int enc,
&cfg, req, tsgls);
if (err)
return err;
+ cond_resched();
}
}
#endif
@@ -2409,7 +2703,7 @@ static int test_skcipher_vs_generic_impl(const char *driver,
unsigned int i;
struct cipher_testvec vec = { 0 };
char vec_name[64];
- struct testvec_config cfg;
+ struct testvec_config *cfg;
char cfgname[TESTVEC_CONFIG_NAMELEN];
int err;
@@ -2443,6 +2737,12 @@ static int test_skcipher_vs_generic_impl(const char *driver,
return err;
}
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
generic_req = skcipher_request_alloc(generic_tfm, GFP_KERNEL);
if (!generic_req) {
err = -ENOMEM;
@@ -2490,20 +2790,21 @@ static int test_skcipher_vs_generic_impl(const char *driver,
for (i = 0; i < fuzz_iterations * 8; i++) {
generate_random_cipher_testvec(generic_req, &vec, maxdatasize,
vec_name, sizeof(vec_name));
- generate_random_testvec_config(&cfg, cfgname, sizeof(cfgname));
+ generate_random_testvec_config(cfg, cfgname, sizeof(cfgname));
err = test_skcipher_vec_cfg(driver, ENCRYPT, &vec, vec_name,
- &cfg, req, tsgls);
+ cfg, req, tsgls);
if (err)
goto out;
err = test_skcipher_vec_cfg(driver, DECRYPT, &vec, vec_name,
- &cfg, req, tsgls);
+ cfg, req, tsgls);
if (err)
goto out;
cond_resched();
}
err = 0;
out:
+ kfree(cfg);
kfree(vec.key);
kfree(vec.iv);
kfree(vec.ptext);
@@ -2535,6 +2836,7 @@ static int test_skcipher(const char *driver, int enc,
tsgls);
if (err)
return err;
+ cond_resched();
}
return 0;
}
@@ -4125,6 +4427,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}, {
.alg = "ecb(arc4)",
+ .generic_driver = "ecb(arc4)-generic",
.test = alg_test_skcipher,
.suite = {
.cipher = __VECS(arc4_tv_template)
@@ -4790,6 +5093,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.test = alg_test_null,
.fips_allowed = 1,
}, {
+ .alg = "xxhash64",
+ .test = alg_test_hash,
+ .fips_allowed = 1,
+ .suite = {
+ .hash = __VECS(xxhash64_tv_template)
+ }
+ }, {
.alg = "zlib-deflate",
.test = alg_test_comp,
.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 1fdae5993bc3..073bd2efafca 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -38,7 +38,7 @@ struct hash_testvec {
const char *key;
const char *plaintext;
const char *digest;
- unsigned short psize;
+ unsigned int psize;
unsigned short ksize;
int setkey_error;
int digest_error;
@@ -69,7 +69,7 @@ struct cipher_testvec {
const char *ctext;
unsigned char wk; /* weak key flag */
unsigned short klen;
- unsigned short len;
+ unsigned int len;
bool fips_skip;
bool generates_iv;
int setkey_error;
@@ -105,9 +105,9 @@ struct aead_testvec {
unsigned char novrfy;
unsigned char wk;
unsigned char klen;
- unsigned short plen;
- unsigned short clen;
- unsigned short alen;
+ unsigned int plen;
+ unsigned int clen;
+ unsigned int alen;
int setkey_error;
int setauthsize_error;
int crypt_error;
@@ -33382,6 +33382,112 @@ static const struct hash_testvec crc32c_tv_template[] = {
}
};
+static const struct hash_testvec xxhash64_tv_template[] = {
+ {
+ .psize = 0,
+ .digest = "\x99\xe9\xd8\x51\x37\xdb\x46\xef",
+ },
+ {
+ .plaintext = "\x40",
+ .psize = 1,
+ .digest = "\x20\x5c\x91\xaa\x88\xeb\x59\xd0",
+ },
+ {
+ .plaintext = "\x40\x8b\xb8\x41\xe4\x42\x15\x2d"
+ "\x88\xc7\x9a\x09\x1a\x9b",
+ .psize = 14,
+ .digest = "\xa8\xe8\x2b\xa9\x92\xa1\x37\x4a",
+ },
+ {
+ .plaintext = "\x40\x8b\xb8\x41\xe4\x42\x15\x2d"
+ "\x88\xc7\x9a\x09\x1a\x9b\x42\xe0"
+ "\xd4\x38\xa5\x2a\x26\xa5\x19\x4b"
+ "\x57\x65\x7f\xad\xc3\x7d\xca\x40"
+ "\x31\x65\x05\xbb\x31\xae\x51\x11"
+ "\xa8\xc0\xb3\x28\x42\xeb\x3c\x46"
+ "\xc8\xed\xed\x0f\x8d\x0b\xfa\x6e"
+ "\xbc\xe3\x88\x53\xca\x8f\xc8\xd9"
+ "\x41\x26\x7a\x3d\x21\xdb\x1a\x3c"
+ "\x01\x1d\xc9\xe9\xb7\x3a\x78\x67"
+ "\x57\x20\x94\xf1\x1e\xfd\xce\x39"
+ "\x99\x57\x69\x39\xa5\xd0\x8d\xd9"
+ "\x43\xfe\x1d\x66\x04\x3c\x27\x6a"
+ "\xe1\x0d\xe7\xc9\xfa\xc9\x07\x56"
+ "\xa5\xb3\xec\xd9\x1f\x42\x65\x66"
+ "\xaa\xbf\x87\x9b\xc5\x41\x9c\x27"
+ "\x3f\x2f\xa9\x55\x93\x01\x27\x33"
+ "\x43\x99\x4d\x81\x85\xae\x82\x00"
+ "\x6c\xd0\xd1\xa3\x57\x18\x06\xcc"
+ "\xec\x72\xf7\x8e\x87\x2d\x1f\x5e"
+ "\xd7\x5b\x1f\x36\x4c\xfa\xfd\x18"
+ "\x89\x76\xd3\x5e\xb5\x5a\xc0\x01"
+ "\xd2\xa1\x9a\x50\xe6\x08\xb4\x76"
+ "\x56\x4f\x0e\xbc\x54\xfc\x67\xe6"
+ "\xb9\xc0\x28\x4b\xb5\xc3\xff\x79"
+ "\x52\xea\xa1\x90\xc3\xaf\x08\x70"
+ "\x12\x02\x0c\xdb\x94\x00\x38\x95"
+ "\xed\xfd\x08\xf7\xe8\x04",
+ .psize = 222,
+ .digest = "\x41\xfc\xd4\x29\xfe\xe7\x85\x17",
+ },
+ {
+ .psize = 0,
+ .key = "\xb1\x79\x37\x9e\x00\x00\x00\x00",
+ .ksize = 8,
+ .digest = "\xef\x17\x9b\x92\xa2\xfd\x75\xac",
+ },
+
+ {
+ .plaintext = "\x40",
+ .psize = 1,
+ .key = "\xb1\x79\x37\x9e\x00\x00\x00\x00",
+ .ksize = 8,
+ .digest = "\xd1\x70\x4f\x14\x02\xc4\x9e\x71",
+ },
+ {
+ .plaintext = "\x40\x8b\xb8\x41\xe4\x42\x15\x2d"
+ "\x88\xc7\x9a\x09\x1a\x9b",
+ .psize = 14,
+ .key = "\xb1\x79\x37\x9e\x00\x00\x00\x00",
+ .ksize = 8,
+ .digest = "\xa4\xcd\xfe\x8e\x37\xe2\x1c\x64"
+ },
+ {
+ .plaintext = "\x40\x8b\xb8\x41\xe4\x42\x15\x2d"
+ "\x88\xc7\x9a\x09\x1a\x9b\x42\xe0"
+ "\xd4\x38\xa5\x2a\x26\xa5\x19\x4b"
+ "\x57\x65\x7f\xad\xc3\x7d\xca\x40"
+ "\x31\x65\x05\xbb\x31\xae\x51\x11"
+ "\xa8\xc0\xb3\x28\x42\xeb\x3c\x46"
+ "\xc8\xed\xed\x0f\x8d\x0b\xfa\x6e"
+ "\xbc\xe3\x88\x53\xca\x8f\xc8\xd9"
+ "\x41\x26\x7a\x3d\x21\xdb\x1a\x3c"
+ "\x01\x1d\xc9\xe9\xb7\x3a\x78\x67"
+ "\x57\x20\x94\xf1\x1e\xfd\xce\x39"
+ "\x99\x57\x69\x39\xa5\xd0\x8d\xd9"
+ "\x43\xfe\x1d\x66\x04\x3c\x27\x6a"
+ "\xe1\x0d\xe7\xc9\xfa\xc9\x07\x56"
+ "\xa5\xb3\xec\xd9\x1f\x42\x65\x66"
+ "\xaa\xbf\x87\x9b\xc5\x41\x9c\x27"
+ "\x3f\x2f\xa9\x55\x93\x01\x27\x33"
+ "\x43\x99\x4d\x81\x85\xae\x82\x00"
+ "\x6c\xd0\xd1\xa3\x57\x18\x06\xcc"
+ "\xec\x72\xf7\x8e\x87\x2d\x1f\x5e"
+ "\xd7\x5b\x1f\x36\x4c\xfa\xfd\x18"
+ "\x89\x76\xd3\x5e\xb5\x5a\xc0\x01"
+ "\xd2\xa1\x9a\x50\xe6\x08\xb4\x76"
+ "\x56\x4f\x0e\xbc\x54\xfc\x67\xe6"
+ "\xb9\xc0\x28\x4b\xb5\xc3\xff\x79"
+ "\x52\xea\xa1\x90\xc3\xaf\x08\x70"
+ "\x12\x02\x0c\xdb\x94\x00\x38\x95"
+ "\xed\xfd\x08\xf7\xe8\x04",
+ .psize = 222,
+ .key = "\xb1\x79\x37\x9e\x00\x00\x00\x00",
+ .ksize = 8,
+ .digest = "\x58\xbc\x55\xf2\x42\x81\x5c\xf0"
+ },
+};
+
static const struct comp_testvec lz4_comp_tv_template[] = {
{
.inlen = 255,
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 702c2c89c7a1..052648e24909 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -630,9 +630,10 @@ static struct shash_alg tgr_algs[3] = { {
.final = tgr192_final,
.descsize = sizeof(struct tgr192_ctx),
.base = {
- .cra_name = "tgr192",
- .cra_blocksize = TGR192_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "tgr192",
+ .cra_driver_name = "tgr192-generic",
+ .cra_blocksize = TGR192_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
}, {
.digestsize = TGR160_DIGEST_SIZE,
@@ -641,9 +642,10 @@ static struct shash_alg tgr_algs[3] = { {
.final = tgr160_final,
.descsize = sizeof(struct tgr192_ctx),
.base = {
- .cra_name = "tgr160",
- .cra_blocksize = TGR192_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "tgr160",
+ .cra_driver_name = "tgr160-generic",
+ .cra_blocksize = TGR192_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
}, {
.digestsize = TGR128_DIGEST_SIZE,
@@ -652,9 +654,10 @@ static struct shash_alg tgr_algs[3] = { {
.final = tgr128_final,
.descsize = sizeof(struct tgr192_ctx),
.base = {
- .cra_name = "tgr128",
- .cra_blocksize = TGR192_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "tgr128",
+ .cra_driver_name = "tgr128-generic",
+ .cra_blocksize = TGR192_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
} };
diff --git a/crypto/wp512.c b/crypto/wp512.c
index 1b8e502d999f..feadc13ccae0 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1126,9 +1126,10 @@ static struct shash_alg wp_algs[3] = { {
.final = wp512_final,
.descsize = sizeof(struct wp512_ctx),
.base = {
- .cra_name = "wp512",
- .cra_blocksize = WP512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "wp512",
+ .cra_driver_name = "wp512-generic",
+ .cra_blocksize = WP512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
}, {
.digestsize = WP384_DIGEST_SIZE,
@@ -1137,9 +1138,10 @@ static struct shash_alg wp_algs[3] = { {
.final = wp384_final,
.descsize = sizeof(struct wp512_ctx),
.base = {
- .cra_name = "wp384",
- .cra_blocksize = WP512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "wp384",
+ .cra_driver_name = "wp384-generic",
+ .cra_blocksize = WP512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
}, {
.digestsize = WP256_DIGEST_SIZE,
@@ -1148,9 +1150,10 @@ static struct shash_alg wp_algs[3] = { {
.final = wp256_final,
.descsize = sizeof(struct wp512_ctx),
.base = {
- .cra_name = "wp256",
- .cra_blocksize = WP512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
+ .cra_name = "wp256",
+ .cra_driver_name = "wp256-generic",
+ .cra_blocksize = WP512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
}
} };
diff --git a/crypto/xxhash_generic.c b/crypto/xxhash_generic.c
new file mode 100644
index 000000000000..4aad2c0f40a9
--- /dev/null
+++ b/crypto/xxhash_generic.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/xxhash.h>
+#include <asm/unaligned.h>
+
+#define XXHASH64_BLOCK_SIZE 32
+#define XXHASH64_DIGEST_SIZE 8
+
+struct xxhash64_tfm_ctx {
+ u64 seed;
+};
+
+struct xxhash64_desc_ctx {
+ struct xxh64_state xxhstate;
+};
+
+static int xxhash64_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct xxhash64_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen != sizeof(tctx->seed)) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+ tctx->seed = get_unaligned_le64(key);
+ return 0;
+}
+
+static int xxhash64_init(struct shash_desc *desc)
+{
+ struct xxhash64_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct xxhash64_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ xxh64_reset(&dctx->xxhstate, tctx->seed);
+
+ return 0;
+}
+
+static int xxhash64_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ struct xxhash64_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ xxh64_update(&dctx->xxhstate, data, length);
+
+ return 0;
+}
+
+static int xxhash64_final(struct shash_desc *desc, u8 *out)
+{
+ struct xxhash64_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ put_unaligned_le64(xxh64_digest(&dctx->xxhstate), out);
+
+ return 0;
+}
+
+static int xxhash64_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int length, u8 *out)
+{
+ struct xxhash64_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+
+ put_unaligned_le64(xxh64(data, length, tctx->seed), out);
+
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = XXHASH64_DIGEST_SIZE,
+ .setkey = xxhash64_setkey,
+ .init = xxhash64_init,
+ .update = xxhash64_update,
+ .final = xxhash64_final,
+ .digest = xxhash64_digest,
+ .descsize = sizeof(struct xxhash64_desc_ctx),
+ .base = {
+ .cra_name = "xxhash64",
+ .cra_driver_name = "xxhash64-generic",
+ .cra_priority = 100,
+ .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .cra_blocksize = XXHASH64_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct xxhash64_tfm_ctx),
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init xxhash_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit xxhash_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+subsys_initcall(xxhash_mod_init);
+module_exit(xxhash_mod_fini);
+
+MODULE_AUTHOR("Nikolay Borisov <nborisov@suse.com>");
+MODULE_DESCRIPTION("xxhash calculations wrapper for lib/xxhash.c");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("xxhash64");
+MODULE_ALIAS_CRYPTO("xxhash64-generic");
diff --git a/crypto/zstd.c b/crypto/zstd.c
index f1e4c70c9d24..5a3ff258d8f7 100644
--- a/crypto/zstd.c
+++ b/crypto/zstd.c
@@ -206,6 +206,7 @@ static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
static struct crypto_alg alg = {
.cra_name = "zstd",
+ .cra_driver_name = "zstd-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct zstd_ctx),
.cra_module = THIS_MODULE,
diff --git a/drivers/Kconfig b/drivers/Kconfig
index e8231663f201..61cf4ea2c229 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -188,8 +188,6 @@ source "drivers/ipack/Kconfig"
source "drivers/reset/Kconfig"
-source "drivers/fmc/Kconfig"
-
source "drivers/phy/Kconfig"
source "drivers/powercap/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 28b030d7988d..6d37564e783c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -168,7 +168,6 @@ obj-$(CONFIG_IIO) += iio/
obj-$(CONFIG_VME_BUS) += vme/
obj-$(CONFIG_IPACK_BUS) += ipack/
obj-$(CONFIG_NTB) += ntb/
-obj-$(CONFIG_FMC) += fmc/
obj-$(CONFIG_POWERCAP) += powercap/
obj-$(CONFIG_MCB) += mcb/
obj-$(CONFIG_PERF_EVENTS) += perf/
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 6b3f1217a237..e7dc0133f817 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -64,6 +64,7 @@ static void power_saving_mwait_init(void)
case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
case X86_VENDOR_INTEL:
+ case X86_VENDOR_ZHAOXIN:
/*
* AMD Fam10h TSC will tick in all
* C/P/S0/S1 states when this bit is set.
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index 89690a471360..e209081d644b 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -292,3 +292,29 @@ void __init acpi_set_irq_model(enum acpi_irq_model_id model,
acpi_irq_model = model;
acpi_gsi_domain_id = fwnode;
}
+
+/**
+ * acpi_irq_create_hierarchy - Create a hierarchical IRQ domain with the default
+ * GSI domain as its parent.
+ * @flags: Irq domain flags associated with the domain
+ * @size: Size of the domain.
+ * @fwnode: Optional fwnode of the interrupt controller
+ * @ops: Pointer to the interrupt domain callbacks
+ * @host_data: Controller private data pointer
+ */
+struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
+ unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
+ DOMAIN_BUS_ANY);
+
+ if (!d)
+ return NULL;
+
+ return irq_domain_create_hierarchy(d, flags, size, fwnode, ops,
+ host_data);
+}
+EXPORT_SYMBOL_GPL(acpi_irq_create_hierarchy);
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index b72e6afaa8fb..1e7ac0bd0d3a 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -432,17 +432,40 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
}
}
+static bool flag_identical(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu)
+{
+ struct acpi_pptt_processor *next;
+
+ /* heterogeneous machines must use PPTT revision > 1 */
+ if (table_hdr->revision < 2)
+ return false;
+
+ /* Locate the last node in the tree with IDENTICAL set */
+ if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) {
+ next = fetch_pptt_node(table_hdr, cpu->parent);
+ if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL))
+ return true;
+ }
+
+ return false;
+}
+
/* Passing level values greater than this will result in search termination */
#define PPTT_ABORT_PACKAGE 0xFF
-static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
- struct acpi_pptt_processor *cpu,
- int level, int flag)
+static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu,
+ int level, int flag)
{
struct acpi_pptt_processor *prev_node;
while (cpu && level) {
- if (cpu->flags & flag)
+ /* special case the identical flag to find last identical */
+ if (flag == ACPI_PPTT_ACPI_IDENTICAL) {
+ if (flag_identical(table_hdr, cpu))
+ break;
+ } else if (cpu->flags & flag)
break;
pr_debug("level %d\n", level);
prev_node = fetch_pptt_node(table_hdr, cpu->parent);
@@ -480,8 +503,8 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
if (cpu_node) {
- cpu_node = acpi_find_processor_package_id(table, cpu_node,
- level, flag);
+ cpu_node = acpi_find_processor_tag(table, cpu_node,
+ level, flag);
/*
* As per specification if the processor structure represents
* an actual processor, then ACPI processor ID must be valid.
@@ -660,3 +683,29 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
ACPI_PPTT_PHYSICAL_PACKAGE);
}
+
+/**
+ * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
+ * @cpu: Kernel logical CPU number
+ *
+ * Determine a unique heterogeneous tag for the given CPU. CPUs with the same
+ * implementation should have matching tags.
+ *
+ * The returned tag can be used to group peers with identical implementation.
+ *
+ * The search terminates when a level is found with the identical implementation
+ * flag set or we reach a root node.
+ *
+ * Due to limitations in the PPTT data structure, there may be rare situations
+ * where two cores in a heterogeneous machine may be identical, but won't have
+ * the same tag.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
+ * Otherwise returns a value which represents a group of identical cores
+ * similar to this CPU.
+ */
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+ return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
+ ACPI_PPTT_ACPI_IDENTICAL);
+}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e387a258d649..ed56c6d20b08 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -196,6 +196,7 @@ static void tsc_check_state(int state)
case X86_VENDOR_AMD:
case X86_VENDOR_INTEL:
case X86_VENDOR_CENTAUR:
+ case X86_VENDOR_ZHAOXIN:
/*
* AMD Fam10h TSC will tick in all
* C/P/S0/S1 states when this bit is set.
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 1739d7e1952a..9b09e31ae82f 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -43,7 +43,7 @@ static ssize_t cpu_capacity_show(struct device *dev,
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
- return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
+ return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
}
static void update_topology_flags_workfn(struct work_struct *work);
@@ -116,7 +116,7 @@ void topology_normalize_cpu_scale(void)
/ capacity_scale;
topology_set_cpu_scale(cpu, capacity);
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
- cpu, topology_get_cpu_scale(NULL, cpu));
+ cpu, topology_get_cpu_scale(cpu));
}
}
@@ -185,7 +185,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,
cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
for_each_cpu(cpu, policy->related_cpus) {
- raw_capacity[cpu] = topology_get_cpu_scale(NULL, cpu) *
+ raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
policy->cpuinfo.max_freq / 1000UL;
capacity_scale = max(raw_capacity[cpu], capacity_scale);
}
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index a7359535caf5..8827c60f51e2 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -213,6 +213,8 @@ int __weak cache_setup_acpi(unsigned int cpu)
return -ENOTSUPP;
}
+unsigned int coherency_max_size;
+
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -251,6 +253,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
}
}
+ /* record the maximum cache line size */
+ if (this_leaf->coherency_line_size > coherency_max_size)
+ coherency_max_size = this_leaf->coherency_line_size;
}
return 0;
diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 6ad5ef48b61e..a4984136c19d 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,7 +4,7 @@
# subsystems should select the appropriate symbols.
config REGMAP
- default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ)
+ default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SCCB || REGMAP_I3C)
select IRQ_DOMAIN if REGMAP_IRQ
bool
@@ -49,3 +49,7 @@ config REGMAP_SOUNDWIRE
config REGMAP_SCCB
tristate
depends on I2C
+
+config REGMAP_I3C
+ tristate
+ depends on I3C
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index f5b4e8851d00..ff6c7d8ec1cd 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
obj-$(CONFIG_REGMAP_W1) += regmap-w1.o
obj-$(CONFIG_REGMAP_SOUNDWIRE) += regmap-sdw.o
obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o
+obj-$(CONFIG_REGMAP_I3C) += regmap-i3c.o
diff --git a/drivers/base/regmap/regcache-lzo.c b/drivers/base/regmap/regcache-lzo.c
index fc14e8b9344f..7886303eb026 100644
--- a/drivers/base/regmap/regcache-lzo.c
+++ b/drivers/base/regmap/regcache-lzo.c
@@ -148,20 +148,18 @@ static int regcache_lzo_init(struct regmap *map)
* that register.
*/
bmp_size = map->num_reg_defaults_raw;
- sync_bmp = kmalloc_array(BITS_TO_LONGS(bmp_size), sizeof(long),
- GFP_KERNEL);
+ sync_bmp = bitmap_zalloc(bmp_size, GFP_KERNEL);
if (!sync_bmp) {
ret = -ENOMEM;
goto err;
}
- bitmap_zero(sync_bmp, bmp_size);
/* allocate the lzo blocks and initialize them */
for (i = 0; i < blkcount; i++) {
lzo_blocks[i] = kzalloc(sizeof **lzo_blocks,
GFP_KERNEL);
if (!lzo_blocks[i]) {
- kfree(sync_bmp);
+ bitmap_free(sync_bmp);
ret = -ENOMEM;
goto err;
}
@@ -213,7 +211,7 @@ static int regcache_lzo_exit(struct regmap *map)
* only once.
*/
if (lzo_blocks[0])
- kfree(lzo_blocks[0]->sync_bmp);
+ bitmap_free(lzo_blocks[0]->sync_bmp);
for (i = 0; i < blkcount; i++) {
if (lzo_blocks[i]) {
kfree(lzo_blocks[i]->wmem);
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 263f82516ff4..e5e1b3a01b1a 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -579,6 +579,8 @@ void regmap_debugfs_init(struct regmap *map, const char *name)
}
if (!strcmp(name, "dummy")) {
+ kfree(map->debugfs_name);
+
map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d",
dummy_index);
name = map->debugfs_name;
diff --git a/drivers/base/regmap/regmap-i3c.c b/drivers/base/regmap/regmap-i3c.c
new file mode 100644
index 000000000000..1578fb506683
--- /dev/null
+++ b/drivers/base/regmap/regmap-i3c.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
+
+#include <linux/regmap.h>
+#include <linux/i3c/device.h>
+#include <linux/i3c/master.h>
+#include <linux/module.h>
+
+static int regmap_i3c_write(void *context, const void *data, size_t count)
+{
+ struct device *dev = context;
+ struct i3c_device *i3c = dev_to_i3cdev(dev);
+ struct i3c_priv_xfer xfers[] = {
+ {
+ .rnw = false,
+ .len = count,
+ .data.out = data,
+ },
+ };
+
+ return i3c_device_do_priv_xfers(i3c, xfers, 1);
+}
+
+static int regmap_i3c_read(void *context,
+ const void *reg, size_t reg_size,
+ void *val, size_t val_size)
+{
+ struct device *dev = context;
+ struct i3c_device *i3c = dev_to_i3cdev(dev);
+ struct i3c_priv_xfer xfers[2];
+
+ xfers[0].rnw = false;
+ xfers[0].len = reg_size;
+ xfers[0].data.out = reg;
+
+ xfers[1].rnw = true;
+ xfers[1].len = val_size;
+ xfers[1].data.in = val;
+
+ return i3c_device_do_priv_xfers(i3c, xfers, 2);
+}
+
+static struct regmap_bus regmap_i3c = {
+ .write = regmap_i3c_write,
+ .read = regmap_i3c_read,
+};
+
+struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c,
+ const struct regmap_config *config,
+ struct lock_class_key *lock_key,
+ const char *lock_name)
+{
+ return __devm_regmap_init(&i3c->dev, &regmap_i3c, &i3c->dev, config,
+ lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_i3c);
+
+MODULE_AUTHOR("Vitor Soares <vitor.soares@synopsys.com>");
+MODULE_DESCRIPTION("Regmap I3C Module");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index f1025452bb39..19f57ccfbe1d 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1637,6 +1637,8 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
map->format.reg_bytes +
map->format.pad_bytes,
val, val_len);
+ else
+ ret = -ENOTSUPP;
/* If that didn't work fall back on linearising by hand. */
if (ret == -ENOTSUPP) {
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 5fd9f167ecc1..4e033d4cc0dc 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -43,6 +43,9 @@ static ssize_t name##_list_show(struct device *dev, \
define_id_show_func(physical_package_id);
static DEVICE_ATTR_RO(physical_package_id);
+define_id_show_func(die_id);
+static DEVICE_ATTR_RO(die_id);
+
define_id_show_func(core_id);
static DEVICE_ATTR_RO(core_id);
@@ -50,10 +53,22 @@ define_siblings_show_func(thread_siblings, sibling_cpumask);
static DEVICE_ATTR_RO(thread_siblings);
static DEVICE_ATTR_RO(thread_siblings_list);
+define_siblings_show_func(core_cpus, sibling_cpumask);
+static DEVICE_ATTR_RO(core_cpus);
+static DEVICE_ATTR_RO(core_cpus_list);
+
define_siblings_show_func(core_siblings, core_cpumask);
static DEVICE_ATTR_RO(core_siblings);
static DEVICE_ATTR_RO(core_siblings_list);
+define_siblings_show_func(die_cpus, die_cpumask);
+static DEVICE_ATTR_RO(die_cpus);
+static DEVICE_ATTR_RO(die_cpus_list);
+
+define_siblings_show_func(package_cpus, core_cpumask);
+static DEVICE_ATTR_RO(package_cpus);
+static DEVICE_ATTR_RO(package_cpus_list);
+
#ifdef CONFIG_SCHED_BOOK
define_id_show_func(book_id);
static DEVICE_ATTR_RO(book_id);
@@ -72,11 +87,18 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
static struct attribute *default_attrs[] = {
&dev_attr_physical_package_id.attr,
+ &dev_attr_die_id.attr,
&dev_attr_core_id.attr,
&dev_attr_thread_siblings.attr,
&dev_attr_thread_siblings_list.attr,
+ &dev_attr_core_cpus.attr,
+ &dev_attr_core_cpus_list.attr,
&dev_attr_core_siblings.attr,
&dev_attr_core_siblings_list.attr,
+ &dev_attr_die_cpus.attr,
+ &dev_attr_die_cpus_list.attr,
+ &dev_attr_package_cpus.attr,
+ &dev_attr_package_cpus_list.attr,
#ifdef CONFIG_SCHED_BOOK
&dev_attr_book_id.attr,
&dev_attr_book_siblings.attr,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 31237f45247a..ddbf56014c51 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1960,7 +1960,7 @@ static inline void wake_ack_receiver(struct drbd_connection *connection)
{
struct task_struct *task = connection->ack_receiver.task;
if (task && get_t_state(&connection->ack_receiver) == RUNNING)
- force_sig(SIGXCPU, task);
+ send_sig(SIGXCPU, task, 1);
}
static inline void request_ping(struct drbd_connection *connection)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 541b31fa42b3..9604127dc839 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -465,7 +465,7 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
smp_mb();
init_completion(&thi->stop);
if (thi->task != current)
- force_sig(DRBD_SIGKILL, thi->task);
+ send_sig(DRBD_SIGKILL, thi->task, 1);
}
spin_unlock_irqrestore(&thi->t_lock, flags);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index cdd748b8116d..5d52a2d32155 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -599,7 +599,7 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
struct task_struct *opa;
kref_get(&connection->kref);
- /* We may just have force_sig()'ed this thread
+ /* We may have just sent a signal to this thread
* to get it out of some blocking network function.
* Clear signals; otherwise kthread_run(), which internally uses
* wait_on_completion_killable(), will mistake our pending signal
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index 658664a5a5aa..df1edb5ec0ad 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -1311,8 +1311,7 @@ static void ipi_handler(void *null)
void global_cache_flush(void)
{
- if (on_each_cpu(ipi_handler, NULL, 1) != 0)
- panic(PFX "timed out waiting for the other CPUs!\n");
+ on_each_cpu(ipi_handler, NULL, 1);
}
EXPORT_SYMBOL(global_cache_flush);
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
index 8b5a20b35293..92be1c0ab99f 100644
--- a/drivers/char/hw_random/iproc-rng200.c
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -220,6 +220,7 @@ static int iproc_rng200_probe(struct platform_device *pdev)
}
static const struct of_device_id iproc_rng200_of_match[] = {
+ { .compatible = "brcm,bcm7211-rng200", },
{ .compatible = "brcm,bcm7278-rng200", },
{ .compatible = "brcm,iproc-rng200", },
{},
diff --git a/drivers/char/hw_random/meson-rng.c b/drivers/char/hw_random/meson-rng.c
index 2e23be802a62..76e693da5dde 100644
--- a/drivers/char/hw_random/meson-rng.c
+++ b/drivers/char/hw_random/meson-rng.c
@@ -1,58 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
* Copyright (c) 2016 BayLibre, SAS.
* Author: Neil Armstrong <narmstrong@baylibre.com>
* Copyright (C) 2014 Amlogic, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- * The full GNU General Public License is included in this distribution
- * in the file called COPYING.
- *
- * BSD LICENSE
- *
- * Copyright (c) 2016 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- * Copyright (C) 2014 Amlogic, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/err.h>
#include <linux/module.h>
diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c
index 3e44362e469c..6bb023de17f1 100644
--- a/drivers/char/tpm/eventlog/efi.c
+++ b/drivers/char/tpm/eventlog/efi.c
@@ -16,10 +16,13 @@
int tpm_read_log_efi(struct tpm_chip *chip)
{
+ struct efi_tcg2_final_events_table *final_tbl = NULL;
struct linux_efi_tpm_eventlog *log_tbl;
struct tpm_bios_log *log;
u32 log_size;
u8 tpm_log_version;
+ void *tmp;
+ int ret;
if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
return -ENODEV;
@@ -47,15 +50,57 @@ int tpm_read_log_efi(struct tpm_chip *chip)
/* malloc EventLog space */
log->bios_event_log = kmemdup(log_tbl->log, log_size, GFP_KERNEL);
- if (!log->bios_event_log)
- goto err_memunmap;
- log->bios_event_log_end = log->bios_event_log + log_size;
+ if (!log->bios_event_log) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ log->bios_event_log_end = log->bios_event_log + log_size;
tpm_log_version = log_tbl->version;
- memunmap(log_tbl);
- return tpm_log_version;
-err_memunmap:
+ ret = tpm_log_version;
+
+ if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR ||
+ efi_tpm_final_log_size == 0 ||
+ tpm_log_version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2)
+ goto out;
+
+ final_tbl = memremap(efi.tpm_final_log,
+ sizeof(*final_tbl) + efi_tpm_final_log_size,
+ MEMREMAP_WB);
+ if (!final_tbl) {
+ pr_err("Could not map UEFI TPM final log\n");
+ kfree(log->bios_event_log);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ efi_tpm_final_log_size -= log_tbl->final_events_preboot_size;
+
+ tmp = krealloc(log->bios_event_log,
+ log_size + efi_tpm_final_log_size,
+ GFP_KERNEL);
+ if (!tmp) {
+ kfree(log->bios_event_log);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ log->bios_event_log = tmp;
+
+ /*
+ * Copy any of the final events log that didn't also end up in the
+ * main log. Events can be logged in both if events are generated
+ * between GetEventLog() and ExitBootServices().
+ */
+ memcpy((void *)log->bios_event_log + log_size,
+ final_tbl->events + log_tbl->final_events_preboot_size,
+ efi_tpm_final_log_size);
+ log->bios_event_log_end = log->bios_event_log +
+ log_size + efi_tpm_final_log_size;
+
+out:
+ memunmap(final_tbl);
memunmap(log_tbl);
- return -ENOMEM;
+ return ret;
}
diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index d506362e046f..b9aeda1cbcd7 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -36,52 +36,7 @@
static size_t calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
struct tcg_pcr_event *event_header)
{
- struct tcg_efi_specid_event_head *efispecid;
- struct tcg_event_field *event_field;
- void *marker;
- void *marker_start;
- u32 halg_size;
- size_t size;
- u16 halg;
- int i;
- int j;
-
- marker = event;
- marker_start = marker;
- marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type)
- + sizeof(event->count);
-
- efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
-
- /* Check if event is malformed. */
- if (event->count > efispecid->num_algs)
- return 0;
-
- for (i = 0; i < event->count; i++) {
- halg_size = sizeof(event->digests[i].alg_id);
- memcpy(&halg, marker, halg_size);
- marker = marker + halg_size;
- for (j = 0; j < efispecid->num_algs; j++) {
- if (halg == efispecid->digest_sizes[j].alg_id) {
- marker +=
- efispecid->digest_sizes[j].digest_size;
- break;
- }
- }
- /* Algorithm without known length. Such event is unparseable. */
- if (j == efispecid->num_algs)
- return 0;
- }
-
- event_field = (struct tcg_event_field *)marker;
- marker = marker + sizeof(event_field->event_size)
- + event_field->event_size;
- size = marker - marker_start;
-
- if ((event->event_type == 0) && (event_field->event_size == 0))
- return 0;
-
- return size;
+ return __calc_tpm2_event_size(event, event_header, false);
}
static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos)
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 90325e1749fb..d47ad10a35fe 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -289,15 +289,15 @@ static int tpm_class_shutdown(struct device *dev)
{
struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
+ down_write(&chip->ops_sem);
if (chip->flags & TPM_CHIP_FLAG_TPM2) {
- down_write(&chip->ops_sem);
if (!tpm_chip_start(chip)) {
tpm2_shutdown(chip, TPM2_SU_CLEAR);
tpm_chip_stop(chip);
}
- chip->ops = NULL;
- up_write(&chip->ops_sem);
}
+ chip->ops = NULL;
+ up_write(&chip->ops_sem);
return 0;
}
diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index 85dcf2654d11..faacbe1ffa1a 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -510,7 +510,7 @@ struct tpm1_get_random_out {
*
* Return:
* * number of bytes read
- * * -errno or a TPM return code otherwise
+ * * -errno (positive TPM return codes are masked to -EIO)
*/
int tpm1_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
{
@@ -531,8 +531,11 @@ int tpm1_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
rc = tpm_transmit_cmd(chip, &buf, sizeof(out->rng_data_len),
"attempting get random");
- if (rc)
+ if (rc) {
+ if (rc > 0)
+ rc = -EIO;
goto out;
+ }
out = (struct tpm1_get_random_out *)&buf.data[TPM_HEADER_SIZE];
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 4de49924cfc4..d103545e4055 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -297,7 +297,7 @@ struct tpm2_get_random_out {
*
* Return:
* size of the buffer on success,
- * -errno otherwise
+ * -errno otherwise (positive TPM return codes are masked to -EIO)
*/
int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
{
@@ -324,8 +324,11 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
offsetof(struct tpm2_get_random_out,
buffer),
"attempting get random");
- if (err)
+ if (err) {
+ if (err > 0)
+ err = -EIO;
goto out;
+ }
out = (struct tpm2_get_random_out *)
&buf.data[TPM_HEADER_SIZE];
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 3300739edce4..5e9317dc3d39 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -43,6 +43,11 @@ config BCM_KONA_TIMER
help
Enables the support for the BCM Kona mobile timer driver.
+config DAVINCI_TIMER
+ bool "Texas Instruments DaVinci timer driver" if COMPILE_TEST
+ help
+ Enables the support for the TI DaVinci timer driver.
+
config DIGICOLOR_TIMER
bool "Digicolor timer driver" if COMPILE_TEST
select CLKSRC_MMIO
@@ -140,7 +145,7 @@ config TEGRA_TIMER
bool "Tegra timer driver" if COMPILE_TEST
select CLKSRC_MMIO
select TIMER_OF
- depends on ARM || ARM64
+ depends on ARCH_TEGRA || COMPILE_TEST
help
Enables support for the Tegra driver.
@@ -617,6 +622,13 @@ config CLKSRC_IMX_TPM
Enable this option to use IMX Timer/PWM Module (TPM) timer as
clocksource.
+config TIMER_IMX_SYS_CTR
+ bool "i.MX system counter timer" if COMPILE_TEST
+ select TIMER_OF
+ help
+ Enable this option to use i.MX system counter timer as a
+ clockevent.
+
config CLKSRC_ST_LPC
bool "Low power clocksource found in the LPC" if COMPILE_TEST
select TIMER_OF if OF
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 236858fa7fbf..2e7936e7833f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o
obj-$(CONFIG_EM_TIMER_STI) += em_sti.o
obj-$(CONFIG_CLKBLD_I8253) += i8253.o
obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
+obj-$(CONFIG_DAVINCI_TIMER) += timer-davinci.o
obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o
obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o
obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
@@ -36,7 +37,7 @@ obj-$(CONFIG_U300_TIMER) += timer-u300.o
obj-$(CONFIG_SUN4I_TIMER) += timer-sun4i.o
obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o
obj-$(CONFIG_MESON6_TIMER) += timer-meson6.o
-obj-$(CONFIG_TEGRA_TIMER) += timer-tegra20.o
+obj-$(CONFIG_TEGRA_TIMER) += timer-tegra.o
obj-$(CONFIG_VT8500_TIMER) += timer-vt8500.o
obj-$(CONFIG_NSPIRE_TIMER) += timer-zevio.o
obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
@@ -74,6 +75,7 @@ obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o
obj-$(CONFIG_CLKSRC_TANGO_XTAL) += timer-tango-xtal.o
obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o
obj-$(CONFIG_CLKSRC_IMX_TPM) += timer-imx-tpm.o
+obj-$(CONFIG_TIMER_IMX_SYS_CTR) += timer-imx-sysctr.o
obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o
obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o
obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o
@@ -84,3 +86,4 @@ obj-$(CONFIG_ATCPIT100_TIMER) += timer-atcpit100.o
obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o
obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o
obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o
+obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index ebfbccefc7b3..b29b5a75333e 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -13,6 +13,7 @@
*/
#include <linux/interrupt.h>
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/clocksource.h>
@@ -139,7 +140,7 @@ static u64 arc_read_rtc(struct clocksource *cs)
l = read_aux_reg(AUX_RTC_LOW);
h = read_aux_reg(AUX_RTC_HIGH);
status = read_aux_reg(AUX_RTC_CTRL);
- } while (!(status & _BITUL(31)));
+ } while (!(status & BIT(31)));
return (((u64)h) << 32) | l;
}
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 07e57a49d1e8..9a5464c625b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -801,14 +801,7 @@ static void arch_timer_evtstrm_enable(int divider)
cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
| ARCH_TIMER_VIRT_EVT_EN;
arch_timer_set_cntkctl(cntkctl);
-#ifdef CONFIG_ARM64
- cpu_set_named_feature(EVTSTRM);
-#else
- elf_hwcap |= HWCAP_EVTSTRM;
-#endif
-#ifdef CONFIG_COMPAT
- compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
-#endif
+ arch_timer_set_evtstrm_feature();
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
@@ -1037,11 +1030,7 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self,
} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
-#ifdef CONFIG_ARM64
- if (cpu_have_named_feature(EVTSTRM))
-#else
- if (elf_hwcap & HWCAP_EVTSTRM)
-#endif
+ if (arch_timer_have_evtstrm_feature())
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
return NOTIFY_OK;
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index e8eab16b154b..74cb299f5089 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -206,7 +206,7 @@ static void exynos4_frc_resume(struct clocksource *cs)
static struct clocksource mct_frc = {
.name = "mct-frc",
- .rating = 400,
+ .rating = 450, /* use value higher than ARM arch timer */
.read = exynos4_frc_read,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -461,7 +461,7 @@ static int exynos4_mct_starting_cpu(unsigned int cpu)
evt->set_state_oneshot_stopped = set_state_shutdown;
evt->tick_resume = set_state_shutdown;
evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
- evt->rating = 450;
+ evt->rating = 500; /* use value higher than ARM arch timer */
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
new file mode 100644
index 000000000000..ba2c79e6a0ee
--- /dev/null
+++ b/drivers/clocksource/hyperv_timer.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Clocksource driver for the synthetic counter and timers
+ * provided by the Hyper-V hypervisor to guest VMs, as described
+ * in the Hyper-V Top Level Functional Spec (TLFS). This driver
+ * is instruction set architecture independent.
+ *
+ * Copyright (C) 2019, Microsoft, Inc.
+ *
+ * Author: Michael Kelley <mikelley@microsoft.com>
+ */
+
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/sched_clock.h>
+#include <linux/mm.h>
+#include <clocksource/hyperv_timer.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+static struct clock_event_device __percpu *hv_clock_event;
+
+/*
+ * If false, we're using the old mechanism for stimer0 interrupts
+ * where it sends a VMbus message when it expires. The old
+ * mechanism is used when running on older versions of Hyper-V
+ * that don't support Direct Mode. While Hyper-V provides
+ * four stimer's per CPU, Linux uses only stimer0.
+ */
+static bool direct_mode_enabled;
+
+static int stimer0_irq;
+static int stimer0_vector;
+static int stimer0_message_sint;
+
+/*
+ * ISR for when stimer0 is operating in Direct Mode. Direct Mode
+ * does not use VMbus or any VMbus messages, so process here and not
+ * in the VMbus driver code.
+ */
+void hv_stimer0_isr(void)
+{
+ struct clock_event_device *ce;
+
+ ce = this_cpu_ptr(hv_clock_event);
+ ce->event_handler(ce);
+}
+EXPORT_SYMBOL_GPL(hv_stimer0_isr);
+
+static int hv_ce_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ u64 current_tick;
+
+ current_tick = hyperv_cs->read(NULL);
+ current_tick += delta;
+ hv_init_timer(0, current_tick);
+ return 0;
+}
+
+static int hv_ce_shutdown(struct clock_event_device *evt)
+{
+ hv_init_timer(0, 0);
+ hv_init_timer_config(0, 0);
+ if (direct_mode_enabled)
+ hv_disable_stimer0_percpu_irq(stimer0_irq);
+
+ return 0;
+}
+
+static int hv_ce_set_oneshot(struct clock_event_device *evt)
+{
+ union hv_stimer_config timer_cfg;
+
+ timer_cfg.as_uint64 = 0;
+ timer_cfg.enable = 1;
+ timer_cfg.auto_enable = 1;
+ if (direct_mode_enabled) {
+ /*
+ * When it expires, the timer will directly interrupt
+ * on the specified hardware vector/IRQ.
+ */
+ timer_cfg.direct_mode = 1;
+ timer_cfg.apic_vector = stimer0_vector;
+ hv_enable_stimer0_percpu_irq(stimer0_irq);
+ } else {
+ /*
+ * When it expires, the timer will generate a VMbus message,
+ * to be handled by the normal VMbus interrupt handler.
+ */
+ timer_cfg.direct_mode = 0;
+ timer_cfg.sintx = stimer0_message_sint;
+ }
+ hv_init_timer_config(0, timer_cfg.as_uint64);
+ return 0;
+}
+
+/*
+ * hv_stimer_init - Per-cpu initialization of the clockevent
+ */
+void hv_stimer_init(unsigned int cpu)
+{
+ struct clock_event_device *ce;
+
+ /*
+ * Synthetic timers are always available except on old versions of
+ * Hyper-V on x86. In that case, just return as Linux will use a
+ * clocksource based on emulated PIT or LAPIC timer hardware.
+ */
+ if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
+ return;
+
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ ce->name = "Hyper-V clockevent";
+ ce->features = CLOCK_EVT_FEAT_ONESHOT;
+ ce->cpumask = cpumask_of(cpu);
+ ce->rating = 1000;
+ ce->set_state_shutdown = hv_ce_shutdown;
+ ce->set_state_oneshot = hv_ce_set_oneshot;
+ ce->set_next_event = hv_ce_set_next_event;
+
+ clockevents_config_and_register(ce,
+ HV_CLOCK_HZ,
+ HV_MIN_DELTA_TICKS,
+ HV_MAX_MAX_DELTA_TICKS);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_init);
+
+/*
+ * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
+ */
+void hv_stimer_cleanup(unsigned int cpu)
+{
+ struct clock_event_device *ce;
+
+ /* Turn off clockevent device */
+ if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ hv_ce_shutdown(ce);
+ }
+}
+EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
+
+/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
+int hv_stimer_alloc(int sint)
+{
+ int ret;
+
+ hv_clock_event = alloc_percpu(struct clock_event_device);
+ if (!hv_clock_event)
+ return -ENOMEM;
+
+ direct_mode_enabled = ms_hyperv.misc_features &
+ HV_STIMER_DIRECT_MODE_AVAILABLE;
+ if (direct_mode_enabled) {
+ ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
+ hv_stimer0_isr);
+ if (ret) {
+ free_percpu(hv_clock_event);
+ hv_clock_event = NULL;
+ return ret;
+ }
+ }
+
+ stimer0_message_sint = sint;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_stimer_alloc);
+
+/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
+void hv_stimer_free(void)
+{
+ if (direct_mode_enabled && (stimer0_irq != 0)) {
+ hv_remove_stimer0_irq(stimer0_irq);
+ stimer0_irq = 0;
+ }
+ free_percpu(hv_clock_event);
+ hv_clock_event = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_stimer_free);
+
+/*
+ * Do a global cleanup of clockevents for the cases of kexec and
+ * vmbus exit
+ */
+void hv_stimer_global_cleanup(void)
+{
+ int cpu;
+ struct clock_event_device *ce;
+
+ if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
+ for_each_present_cpu(cpu) {
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ clockevents_unbind_device(ce, cpu);
+ }
+ }
+ hv_stimer_free();
+}
+EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
+
+/*
+ * Code and definitions for the Hyper-V clocksources. Two
+ * clocksources are defined: one that reads the Hyper-V defined MSR, and
+ * the other that uses the TSC reference page feature as defined in the
+ * TLFS. The MSR version is for compatibility with old versions of
+ * Hyper-V and 32-bit x86. The TSC reference page version is preferred.
+ */
+
+struct clocksource *hyperv_cs;
+EXPORT_SYMBOL_GPL(hyperv_cs);
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+
+static struct ms_hyperv_tsc_page *tsc_pg;
+
+struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+ return tsc_pg;
+}
+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
+
+static u64 notrace read_hv_sched_clock_tsc(void)
+{
+ u64 current_tick = hv_read_tsc_page(tsc_pg);
+
+ if (current_tick == U64_MAX)
+ hv_get_time_ref_count(current_tick);
+
+ return current_tick;
+}
+
+static u64 read_hv_clock_tsc(struct clocksource *arg)
+{
+ return read_hv_sched_clock_tsc();
+}
+
+static struct clocksource hyperv_cs_tsc = {
+ .name = "hyperv_clocksource_tsc_page",
+ .rating = 400,
+ .read = read_hv_clock_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+#endif
+
+static u64 notrace read_hv_sched_clock_msr(void)
+{
+ u64 current_tick;
+ /*
+ * Read the partition counter to get the current tick count. This count
+ * is set to 0 when the partition is created and is incremented in
+ * 100 nanosecond units.
+ */
+ hv_get_time_ref_count(current_tick);
+ return current_tick;
+}
+
+static u64 read_hv_clock_msr(struct clocksource *arg)
+{
+ return read_hv_sched_clock_msr();
+}
+
+static struct clocksource hyperv_cs_msr = {
+ .name = "hyperv_clocksource_msr",
+ .rating = 400,
+ .read = read_hv_clock_msr,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static bool __init hv_init_tsc_clocksource(void)
+{
+ u64 tsc_msr;
+ phys_addr_t phys_addr;
+
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return false;
+
+ tsc_pg = vmalloc(PAGE_SIZE);
+ if (!tsc_pg)
+ return false;
+
+ hyperv_cs = &hyperv_cs_tsc;
+ phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
+
+ /*
+ * The Hyper-V TLFS specifies to preserve the value of reserved
+ * bits in registers. So read the existing value, preserve the
+ * low order 12 bits, and add in the guest physical address
+ * (which already has at least the low 12 bits set to zero since
+ * it is page aligned). Also set the "enable" bit, which is bit 0.
+ */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= GENMASK_ULL(11, 0);
+ tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
+ hv_set_reference_tsc(tsc_msr);
+
+ hv_set_clocksource_vdso(hyperv_cs_tsc);
+ clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
+
+ /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
+ sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ);
+ return true;
+}
+#else
+static bool __init hv_init_tsc_clocksource(void)
+{
+ return false;
+}
+#endif
+
+
+void __init hv_init_clocksource(void)
+{
+ /*
+ * Try to set up the TSC page clocksource. If it succeeds, we're
+ * done. Otherwise, set up the MSR clocksoruce. At least one of
+ * these will always be available except on very old versions of
+ * Hyper-V on x86. In that case we won't have a Hyper-V
+ * clocksource, but Linux will still run with a clocksource based
+ * on the emulated PIT or LAPIC timer.
+ */
+ if (hv_init_tsc_clocksource())
+ return;
+
+ if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
+ return;
+
+ hyperv_cs = &hyperv_cs_msr;
+ clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+ /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
+ sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ);
+}
+EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/drivers/clocksource/timer-davinci.c b/drivers/clocksource/timer-davinci.c
new file mode 100644
index 000000000000..62745c962049
--- /dev/null
+++ b/drivers/clocksource/timer-davinci.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TI DaVinci clocksource driver
+ *
+ * Copyright (C) 2019 Texas Instruments
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ * (with tiny parts adopted from code by Kevin Hilman <khilman@baylibre.com>)
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#include <clocksource/timer-davinci.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "%s: " fmt "\n", __func__
+
+#define DAVINCI_TIMER_REG_TIM12 0x10
+#define DAVINCI_TIMER_REG_TIM34 0x14
+#define DAVINCI_TIMER_REG_PRD12 0x18
+#define DAVINCI_TIMER_REG_PRD34 0x1c
+#define DAVINCI_TIMER_REG_TCR 0x20
+#define DAVINCI_TIMER_REG_TGCR 0x24
+
+#define DAVINCI_TIMER_TIMMODE_MASK GENMASK(3, 2)
+#define DAVINCI_TIMER_RESET_MASK GENMASK(1, 0)
+#define DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED BIT(2)
+#define DAVINCI_TIMER_UNRESET GENMASK(1, 0)
+
+#define DAVINCI_TIMER_ENAMODE_MASK GENMASK(1, 0)
+#define DAVINCI_TIMER_ENAMODE_DISABLED 0x00
+#define DAVINCI_TIMER_ENAMODE_ONESHOT BIT(0)
+#define DAVINCI_TIMER_ENAMODE_PERIODIC BIT(1)
+
+#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM12 6
+#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM34 22
+
+#define DAVINCI_TIMER_MIN_DELTA 0x01
+#define DAVINCI_TIMER_MAX_DELTA 0xfffffffe
+
+#define DAVINCI_TIMER_CLKSRC_BITS 32
+
+#define DAVINCI_TIMER_TGCR_DEFAULT \
+ (DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED | DAVINCI_TIMER_UNRESET)
+
+struct davinci_clockevent {
+ struct clock_event_device dev;
+ void __iomem *base;
+ unsigned int cmp_off;
+};
+
+/*
+ * This must be globally accessible by davinci_timer_read_sched_clock(), so
+ * let's keep it here.
+ */
+static struct {
+ struct clocksource dev;
+ void __iomem *base;
+ unsigned int tim_off;
+} davinci_clocksource;
+
+static struct davinci_clockevent *
+to_davinci_clockevent(struct clock_event_device *clockevent)
+{
+ return container_of(clockevent, struct davinci_clockevent, dev);
+}
+
+static unsigned int
+davinci_clockevent_read(struct davinci_clockevent *clockevent,
+ unsigned int reg)
+{
+ return readl_relaxed(clockevent->base + reg);
+}
+
+static void davinci_clockevent_write(struct davinci_clockevent *clockevent,
+ unsigned int reg, unsigned int val)
+{
+ writel_relaxed(val, clockevent->base + reg);
+}
+
+static void davinci_tim12_shutdown(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_DISABLED <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+ /*
+ * This function is only ever called if we're using both timer
+ * halves. In this case TIM34 runs in periodic mode and we must
+ * not modify it.
+ */
+ tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static void davinci_tim12_set_oneshot(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_ONESHOT <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+ /* Same as above. */
+ tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static int davinci_clockevent_shutdown(struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent;
+
+ clockevent = to_davinci_clockevent(dev);
+
+ davinci_tim12_shutdown(clockevent->base);
+
+ return 0;
+}
+
+static int davinci_clockevent_set_oneshot(struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
+
+ davinci_tim12_set_oneshot(clockevent->base);
+
+ return 0;
+}
+
+static int
+davinci_clockevent_set_next_event_std(unsigned long cycles,
+ struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+
+ davinci_clockevent_shutdown(dev);
+
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_PRD12, cycles);
+
+ davinci_clockevent_set_oneshot(dev);
+
+ return 0;
+}
+
+static int
+davinci_clockevent_set_next_event_cmp(unsigned long cycles,
+ struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+ unsigned int curr_time;
+
+ curr_time = davinci_clockevent_read(clockevent,
+ DAVINCI_TIMER_REG_TIM12);
+ davinci_clockevent_write(clockevent,
+ clockevent->cmp_off, curr_time + cycles);
+
+ return 0;
+}
+
+static irqreturn_t davinci_timer_irq_timer(int irq, void *data)
+{
+ struct davinci_clockevent *clockevent = data;
+
+ if (!clockevent_state_oneshot(&clockevent->dev))
+ davinci_tim12_shutdown(clockevent->base);
+
+ clockevent->dev.event_handler(&clockevent->dev);
+
+ return IRQ_HANDLED;
+}
+
+static u64 notrace davinci_timer_read_sched_clock(void)
+{
+ return readl_relaxed(davinci_clocksource.base +
+ davinci_clocksource.tim_off);
+}
+
+static u64 davinci_clocksource_read(struct clocksource *dev)
+{
+ return davinci_timer_read_sched_clock();
+}
+
+/*
+ * Standard use-case: we're using tim12 for clockevent and tim34 for
+ * clocksource. The default is making the former run in oneshot mode
+ * and the latter in periodic mode.
+ */
+static void davinci_clocksource_init_tim34(void __iomem *base)
+{
+ int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+ tcr |= DAVINCI_TIMER_ENAMODE_ONESHOT <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
+ writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD34);
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+/*
+ * Special use-case on da830: the DSP may use tim34. We're using tim12 for
+ * both clocksource and clockevent. We set tim12 to periodic and don't touch
+ * tim34.
+ */
+static void davinci_clocksource_init_tim12(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
+ writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD12);
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static void davinci_timer_init(void __iomem *base)
+{
+ /* Set clock to internal mode and disable it. */
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TCR);
+ /*
+ * Reset both 32-bit timers, set no prescaler for timer 34, set the
+ * timer to dual 32-bit unchained mode, unreset both 32-bit timers.
+ */
+ writel_relaxed(DAVINCI_TIMER_TGCR_DEFAULT,
+ base + DAVINCI_TIMER_REG_TGCR);
+ /* Init both counters to zero. */
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
+}
+
+int __init davinci_timer_register(struct clk *clk,
+ const struct davinci_timer_cfg *timer_cfg)
+{
+ struct davinci_clockevent *clockevent;
+ unsigned int tick_rate;
+ void __iomem *base;
+ int rv;
+
+ rv = clk_prepare_enable(clk);
+ if (rv) {
+ pr_err("Unable to prepare and enable the timer clock");
+ return rv;
+ }
+
+ if (!request_mem_region(timer_cfg->reg.start,
+ resource_size(&timer_cfg->reg),
+ "davinci-timer")) {
+ pr_err("Unable to request memory region");
+ return -EBUSY;
+ }
+
+ base = ioremap(timer_cfg->reg.start, resource_size(&timer_cfg->reg));
+ if (!base) {
+ pr_err("Unable to map the register range");
+ return -ENOMEM;
+ }
+
+ davinci_timer_init(base);
+ tick_rate = clk_get_rate(clk);
+
+ clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL | __GFP_NOFAIL);
+ if (!clockevent) {
+ pr_err("Error allocating memory for clockevent data");
+ return -ENOMEM;
+ }
+
+ clockevent->dev.name = "tim12";
+ clockevent->dev.features = CLOCK_EVT_FEAT_ONESHOT;
+ clockevent->dev.cpumask = cpumask_of(0);
+ clockevent->base = base;
+
+ if (timer_cfg->cmp_off) {
+ clockevent->cmp_off = timer_cfg->cmp_off;
+ clockevent->dev.set_next_event =
+ davinci_clockevent_set_next_event_cmp;
+ } else {
+ clockevent->dev.set_next_event =
+ davinci_clockevent_set_next_event_std;
+ clockevent->dev.set_state_oneshot =
+ davinci_clockevent_set_oneshot;
+ clockevent->dev.set_state_shutdown =
+ davinci_clockevent_shutdown;
+ }
+
+ rv = request_irq(timer_cfg->irq[DAVINCI_TIMER_CLOCKEVENT_IRQ].start,
+ davinci_timer_irq_timer, IRQF_TIMER,
+ "clockevent/tim12", clockevent);
+ if (rv) {
+ pr_err("Unable to request the clockevent interrupt");
+ return rv;
+ }
+
+ clockevents_config_and_register(&clockevent->dev, tick_rate,
+ DAVINCI_TIMER_MIN_DELTA,
+ DAVINCI_TIMER_MAX_DELTA);
+
+ davinci_clocksource.dev.rating = 300;
+ davinci_clocksource.dev.read = davinci_clocksource_read;
+ davinci_clocksource.dev.mask =
+ CLOCKSOURCE_MASK(DAVINCI_TIMER_CLKSRC_BITS);
+ davinci_clocksource.dev.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ davinci_clocksource.base = base;
+
+ if (timer_cfg->cmp_off) {
+ davinci_clocksource.dev.name = "tim12";
+ davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM12;
+ davinci_clocksource_init_tim12(base);
+ } else {
+ davinci_clocksource.dev.name = "tim34";
+ davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM34;
+ davinci_clocksource_init_tim34(base);
+ }
+
+ rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate);
+ if (rv) {
+ pr_err("Unable to register clocksource");
+ return rv;
+ }
+
+ sched_clock_register(davinci_timer_read_sched_clock,
+ DAVINCI_TIMER_CLKSRC_BITS, tick_rate);
+
+ return 0;
+}
+
+static int __init of_davinci_timer_register(struct device_node *np)
+{
+ struct davinci_timer_cfg timer_cfg = { };
+ struct clk *clk;
+ int rv;
+
+ rv = of_address_to_resource(np, 0, &timer_cfg.reg);
+ if (rv) {
+ pr_err("Unable to get the register range for timer");
+ return rv;
+ }
+
+ rv = of_irq_to_resource_table(np, timer_cfg.irq,
+ DAVINCI_TIMER_NUM_IRQS);
+ if (rv != DAVINCI_TIMER_NUM_IRQS) {
+ pr_err("Unable to get the interrupts for timer");
+ return rv;
+ }
+
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk)) {
+ pr_err("Unable to get the timer clock");
+ return PTR_ERR(clk);
+ }
+
+ rv = davinci_timer_register(clk, &timer_cfg);
+ if (rv)
+ clk_put(clk);
+
+ return rv;
+}
+TIMER_OF_DECLARE(davinci_timer, "ti,da830-timer", of_davinci_timer_register);
diff --git a/drivers/clocksource/timer-imx-sysctr.c b/drivers/clocksource/timer-imx-sysctr.c
new file mode 100644
index 000000000000..fd7d68066efb
--- /dev/null
+++ b/drivers/clocksource/timer-imx-sysctr.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2017-2019 NXP
+
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include "timer-of.h"
+
+#define CMP_OFFSET 0x10000
+
+#define CNTCV_LO 0x8
+#define CNTCV_HI 0xc
+#define CMPCV_LO (CMP_OFFSET + 0x20)
+#define CMPCV_HI (CMP_OFFSET + 0x24)
+#define CMPCR (CMP_OFFSET + 0x2c)
+
+#define SYS_CTR_EN 0x1
+#define SYS_CTR_IRQ_MASK 0x2
+
+static void __iomem *sys_ctr_base;
+static u32 cmpcr;
+
+static void sysctr_timer_enable(bool enable)
+{
+ writel(enable ? cmpcr | SYS_CTR_EN : cmpcr, sys_ctr_base + CMPCR);
+}
+
+static void sysctr_irq_acknowledge(void)
+{
+ /*
+ * clear the enable bit(EN =0) will clear
+ * the status bit(ISTAT = 0), then the interrupt
+ * signal will be negated(acknowledged).
+ */
+ sysctr_timer_enable(false);
+}
+
+static inline u64 sysctr_read_counter(void)
+{
+ u32 cnt_hi, tmp_hi, cnt_lo;
+
+ do {
+ cnt_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
+ cnt_lo = readl_relaxed(sys_ctr_base + CNTCV_LO);
+ tmp_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
+ } while (tmp_hi != cnt_hi);
+
+ return ((u64) cnt_hi << 32) | cnt_lo;
+}
+
+static int sysctr_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ u32 cmp_hi, cmp_lo;
+ u64 next;
+
+ sysctr_timer_enable(false);
+
+ next = sysctr_read_counter();
+
+ next += delta;
+
+ cmp_hi = (next >> 32) & 0x00fffff;
+ cmp_lo = next & 0xffffffff;
+
+ writel_relaxed(cmp_hi, sys_ctr_base + CMPCV_HI);
+ writel_relaxed(cmp_lo, sys_ctr_base + CMPCV_LO);
+
+ sysctr_timer_enable(true);
+
+ return 0;
+}
+
+static int sysctr_set_state_oneshot(struct clock_event_device *evt)
+{
+ return 0;
+}
+
+static int sysctr_set_state_shutdown(struct clock_event_device *evt)
+{
+ sysctr_timer_enable(false);
+
+ return 0;
+}
+
+static irqreturn_t sysctr_timer_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = dev_id;
+
+ sysctr_irq_acknowledge();
+
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+}
+
+static struct timer_of to_sysctr = {
+ .flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE,
+ .clkevt = {
+ .name = "i.MX system counter timer",
+ .features = CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_DYNIRQ,
+ .set_state_oneshot = sysctr_set_state_oneshot,
+ .set_next_event = sysctr_set_next_event,
+ .set_state_shutdown = sysctr_set_state_shutdown,
+ .rating = 200,
+ },
+ .of_irq = {
+ .handler = sysctr_timer_interrupt,
+ .flags = IRQF_TIMER | IRQF_IRQPOLL,
+ },
+ .of_clk = {
+ .name = "per",
+ },
+};
+
+static void __init sysctr_clockevent_init(void)
+{
+ to_sysctr.clkevt.cpumask = cpumask_of(0);
+
+ clockevents_config_and_register(&to_sysctr.clkevt,
+ timer_of_rate(&to_sysctr),
+ 0xff, 0x7fffffff);
+}
+
+static int __init sysctr_timer_init(struct device_node *np)
+{
+ int ret = 0;
+
+ ret = timer_of_init(np, &to_sysctr);
+ if (ret)
+ return ret;
+
+ sys_ctr_base = timer_of_base(&to_sysctr);
+ cmpcr = readl(sys_ctr_base + CMPCR);
+ cmpcr &= ~SYS_CTR_EN;
+
+ sysctr_clockevent_init();
+
+ return 0;
+}
+TIMER_OF_DECLARE(sysctr_timer, "nxp,sysctr-timer", sysctr_timer_init);
diff --git a/drivers/clocksource/timer-ixp4xx.c b/drivers/clocksource/timer-ixp4xx.c
index 5c2190b654cd..9396745e1c17 100644
--- a/drivers/clocksource/timer-ixp4xx.c
+++ b/drivers/clocksource/timer-ixp4xx.c
@@ -75,14 +75,19 @@ to_ixp4xx_timer(struct clock_event_device *evt)
return container_of(evt, struct ixp4xx_timer, clkevt);
}
-static u64 notrace ixp4xx_read_sched_clock(void)
+static unsigned long ixp4xx_read_timer(void)
{
return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET);
}
+static u64 notrace ixp4xx_read_sched_clock(void)
+{
+ return ixp4xx_read_timer();
+}
+
static u64 ixp4xx_clocksource_read(struct clocksource *c)
{
- return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET);
+ return ixp4xx_read_timer();
}
static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id)
@@ -224,6 +229,13 @@ static __init int ixp4xx_timer_register(void __iomem *base,
sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq);
+#ifdef CONFIG_ARM
+ /* Also use this timer for delays */
+ tmr->delay_timer.read_current_timer = ixp4xx_read_timer;
+ tmr->delay_timer.freq = timer_freq;
+ register_current_timer_delay(&tmr->delay_timer);
+#endif
+
return 0;
}
diff --git a/drivers/clocksource/timer-meson6.c b/drivers/clocksource/timer-meson6.c
index 84bd9479c3f8..9e8b467c71da 100644
--- a/drivers/clocksource/timer-meson6.c
+++ b/drivers/clocksource/timer-meson6.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Amlogic Meson6 SoCs timer handling.
*
* Copyright (C) 2014 Carlo Caione <carlo@caione.org>
*
* Based on code from Amlogic, Inc
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
*/
#include <linux/bitfield.h>
diff --git a/drivers/clocksource/timer-tegra.c b/drivers/clocksource/timer-tegra.c
new file mode 100644
index 000000000000..e9635c25eef4
--- /dev/null
+++ b/drivers/clocksource/timer-tegra.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * Author:
+ * Colin Cross <ccross@google.com>
+ */
+
+#define pr_fmt(fmt) "tegra-timer: " fmt
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/percpu.h>
+#include <linux/sched_clock.h>
+#include <linux/time.h>
+
+#include "timer-of.h"
+
+#define RTC_SECONDS 0x08
+#define RTC_SHADOW_SECONDS 0x0c
+#define RTC_MILLISECONDS 0x10
+
+#define TIMERUS_CNTR_1US 0x10
+#define TIMERUS_USEC_CFG 0x14
+#define TIMERUS_CNTR_FREEZE 0x4c
+
+#define TIMER_PTV 0x0
+#define TIMER_PTV_EN BIT(31)
+#define TIMER_PTV_PER BIT(30)
+#define TIMER_PCR 0x4
+#define TIMER_PCR_INTR_CLR BIT(30)
+
+#define TIMER1_BASE 0x00
+#define TIMER2_BASE 0x08
+#define TIMER3_BASE 0x50
+#define TIMER4_BASE 0x58
+#define TIMER10_BASE 0x90
+
+#define TIMER1_IRQ_IDX 0
+#define TIMER10_IRQ_IDX 10
+
+#define TIMER_1MHz 1000000
+
+static u32 usec_config;
+static void __iomem *timer_reg_base;
+
+static int tegra_timer_set_next_event(unsigned long cycles,
+ struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ /*
+ * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
+ * fire after one tick if 0 is loaded.
+ *
+ * The minimum and maximum numbers of oneshot ticks are defined
+ * by clockevents_config_and_register(1, 0x1fffffff + 1) invocation
+ * below in the code. Hence the cycles (ticks) can't be outside of
+ * a range supportable by hardware.
+ */
+ writel_relaxed(TIMER_PTV_EN | (cycles - 1), reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static int tegra_timer_shutdown(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(0, reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static int tegra_timer_set_periodic(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+ unsigned long period = timer_of_period(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PTV_EN | TIMER_PTV_PER | (period - 1),
+ reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = dev_id;
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+}
+
+static void tegra_timer_suspend(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+}
+
+static void tegra_timer_resume(struct clock_event_device *evt)
+{
+ writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
+}
+
+static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
+ .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
+
+ .clkevt = {
+ .name = "tegra_timer",
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+ .set_next_event = tegra_timer_set_next_event,
+ .set_state_shutdown = tegra_timer_shutdown,
+ .set_state_periodic = tegra_timer_set_periodic,
+ .set_state_oneshot = tegra_timer_shutdown,
+ .tick_resume = tegra_timer_shutdown,
+ .suspend = tegra_timer_suspend,
+ .resume = tegra_timer_resume,
+ },
+};
+
+static int tegra_timer_setup(unsigned int cpu)
+{
+ struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+ writel_relaxed(0, timer_of_base(to) + TIMER_PTV);
+ writel_relaxed(TIMER_PCR_INTR_CLR, timer_of_base(to) + TIMER_PCR);
+
+ irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
+ enable_irq(to->clkevt.irq);
+
+ /*
+ * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
+ * fire after one tick if 0 is loaded and thus minimum number of
+ * ticks is 1. In result both of the clocksource's tick limits are
+ * higher than a minimum and maximum that hardware register can
+ * take by 1, this is then taken into account by set_next_event
+ * callback.
+ */
+ clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+ 1, /* min */
+ 0x1fffffff + 1); /* max 29 bits + 1 */
+
+ return 0;
+}
+
+static int tegra_timer_stop(unsigned int cpu)
+{
+ struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+ to->clkevt.set_state_shutdown(&to->clkevt);
+ disable_irq_nosync(to->clkevt.irq);
+
+ return 0;
+}
+
+static u64 notrace tegra_read_sched_clock(void)
+{
+ return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
+}
+
+#ifdef CONFIG_ARM
+static unsigned long tegra_delay_timer_read_counter_long(void)
+{
+ return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
+}
+
+static struct delay_timer tegra_delay_timer = {
+ .read_current_timer = tegra_delay_timer_read_counter_long,
+ .freq = TIMER_1MHz,
+};
+#endif
+
+static struct timer_of suspend_rtc_to = {
+ .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
+};
+
+/*
+ * tegra_rtc_read - Reads the Tegra RTC registers
+ * Care must be taken that this function is not called while the
+ * tegra_rtc driver could be executing to avoid race conditions
+ * on the RTC shadow register
+ */
+static u64 tegra_rtc_read_ms(struct clocksource *cs)
+{
+ void __iomem *reg_base = timer_of_base(&suspend_rtc_to);
+
+ u32 ms = readl_relaxed(reg_base + RTC_MILLISECONDS);
+ u32 s = readl_relaxed(reg_base + RTC_SHADOW_SECONDS);
+
+ return (u64)s * MSEC_PER_SEC + ms;
+}
+
+static struct clocksource suspend_rtc_clocksource = {
+ .name = "tegra_suspend_timer",
+ .rating = 200,
+ .read = tegra_rtc_read_ms,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
+};
+
+static inline unsigned int tegra_base_for_cpu(int cpu, bool tegra20)
+{
+ if (tegra20) {
+ switch (cpu) {
+ case 0:
+ return TIMER1_BASE;
+ case 1:
+ return TIMER2_BASE;
+ case 2:
+ return TIMER3_BASE;
+ default:
+ return TIMER4_BASE;
+ }
+ }
+
+ return TIMER10_BASE + cpu * 8;
+}
+
+static inline unsigned int tegra_irq_idx_for_cpu(int cpu, bool tegra20)
+{
+ if (tegra20)
+ return TIMER1_IRQ_IDX + cpu;
+
+ return TIMER10_IRQ_IDX + cpu;
+}
+
+static inline unsigned long tegra_rate_for_timer(struct timer_of *to,
+ bool tegra20)
+{
+ /*
+ * TIMER1-9 are fixed to 1MHz, TIMER10-13 are running off the
+ * parent clock.
+ */
+ if (tegra20)
+ return TIMER_1MHz;
+
+ return timer_of_rate(to);
+}
+
+static int __init tegra_init_timer(struct device_node *np, bool tegra20,
+ int rating)
+{
+ struct timer_of *to;
+ int cpu, ret;
+
+ to = this_cpu_ptr(&tegra_to);
+ ret = timer_of_init(np, to);
+ if (ret)
+ goto out;
+
+ timer_reg_base = timer_of_base(to);
+
+ /*
+ * Configure microsecond timers to have 1MHz clock
+ * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
+ * Uses n+1 scheme
+ */
+ switch (timer_of_rate(to)) {
+ case 12000000:
+ usec_config = 0x000b; /* (11+1)/(0+1) */
+ break;
+ case 12800000:
+ usec_config = 0x043f; /* (63+1)/(4+1) */
+ break;
+ case 13000000:
+ usec_config = 0x000c; /* (12+1)/(0+1) */
+ break;
+ case 16800000:
+ usec_config = 0x0453; /* (83+1)/(4+1) */
+ break;
+ case 19200000:
+ usec_config = 0x045f; /* (95+1)/(4+1) */
+ break;
+ case 26000000:
+ usec_config = 0x0019; /* (25+1)/(0+1) */
+ break;
+ case 38400000:
+ usec_config = 0x04bf; /* (191+1)/(4+1) */
+ break;
+ case 48000000:
+ usec_config = 0x002f; /* (47+1)/(0+1) */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
+
+ for_each_possible_cpu(cpu) {
+ struct timer_of *cpu_to = per_cpu_ptr(&tegra_to, cpu);
+ unsigned long flags = IRQF_TIMER | IRQF_NOBALANCING;
+ unsigned long rate = tegra_rate_for_timer(to, tegra20);
+ unsigned int base = tegra_base_for_cpu(cpu, tegra20);
+ unsigned int idx = tegra_irq_idx_for_cpu(cpu, tegra20);
+ unsigned int irq = irq_of_parse_and_map(np, idx);
+
+ if (!irq) {
+ pr_err("failed to map irq for cpu%d\n", cpu);
+ ret = -EINVAL;
+ goto out_irq;
+ }
+
+ cpu_to->clkevt.irq = irq;
+ cpu_to->clkevt.rating = rating;
+ cpu_to->clkevt.cpumask = cpumask_of(cpu);
+ cpu_to->of_base.base = timer_reg_base + base;
+ cpu_to->of_clk.period = rate / HZ;
+ cpu_to->of_clk.rate = rate;
+
+ irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
+
+ ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr, flags,
+ cpu_to->clkevt.name, &cpu_to->clkevt);
+ if (ret) {
+ pr_err("failed to set up irq for cpu%d: %d\n",
+ cpu, ret);
+ irq_dispose_mapping(cpu_to->clkevt.irq);
+ cpu_to->clkevt.irq = 0;
+ goto out_irq;
+ }
+ }
+
+ sched_clock_register(tegra_read_sched_clock, 32, TIMER_1MHz);
+
+ ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
+ "timer_us", TIMER_1MHz, 300, 32,
+ clocksource_mmio_readl_up);
+ if (ret)
+ pr_err("failed to register clocksource: %d\n", ret);
+
+#ifdef CONFIG_ARM
+ register_current_timer_delay(&tegra_delay_timer);
+#endif
+
+ ret = cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
+ "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
+ tegra_timer_stop);
+ if (ret)
+ pr_err("failed to set up cpu hp state: %d\n", ret);
+
+ return ret;
+
+out_irq:
+ for_each_possible_cpu(cpu) {
+ struct timer_of *cpu_to;
+
+ cpu_to = per_cpu_ptr(&tegra_to, cpu);
+ if (cpu_to->clkevt.irq) {
+ free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
+ irq_dispose_mapping(cpu_to->clkevt.irq);
+ }
+ }
+
+ to->of_base.base = timer_reg_base;
+out:
+ timer_of_cleanup(to);
+
+ return ret;
+}
+
+static int __init tegra210_init_timer(struct device_node *np)
+{
+ /*
+ * Arch-timer can't survive across power cycle of CPU core and
+ * after CPUPORESET signal due to a system design shortcoming,
+ * hence tegra-timer is more preferable on Tegra210.
+ */
+ return tegra_init_timer(np, false, 460);
+}
+TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra210_init_timer);
+
+static int __init tegra20_init_timer(struct device_node *np)
+{
+ int rating;
+
+ /*
+ * Tegra20 and Tegra30 have Cortex A9 CPU that has a TWD timer,
+ * that timer runs off the CPU clock and hence is subjected to
+ * a jitter caused by DVFS clock rate changes. Tegra-timer is
+ * more preferable for older Tegra's, while later SoC generations
+ * have arch-timer as a main per-CPU timer and it is not affected
+ * by DVFS changes.
+ */
+ if (of_machine_is_compatible("nvidia,tegra20") ||
+ of_machine_is_compatible("nvidia,tegra30"))
+ rating = 460;
+ else
+ rating = 330;
+
+ return tegra_init_timer(np, true, rating);
+}
+TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
+
+static int __init tegra20_init_rtc(struct device_node *np)
+{
+ int ret;
+
+ ret = timer_of_init(np, &suspend_rtc_to);
+ if (ret)
+ return ret;
+
+ return clocksource_register_hz(&suspend_rtc_clocksource, 1000);
+}
+TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/timer-tegra20.c b/drivers/clocksource/timer-tegra20.c
deleted file mode 100644
index 1e7ece279730..000000000000
--- a/drivers/clocksource/timer-tegra20.c
+++ /dev/null
@@ -1,379 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010 Google, Inc.
- *
- * Author:
- * Colin Cross <ccross@google.com>
- */
-
-#include <linux/clk.h>
-#include <linux/clockchips.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/percpu.h>
-#include <linux/sched_clock.h>
-#include <linux/time.h>
-
-#include "timer-of.h"
-
-#ifdef CONFIG_ARM
-#include <asm/mach/time.h>
-#endif
-
-#define RTC_SECONDS 0x08
-#define RTC_SHADOW_SECONDS 0x0c
-#define RTC_MILLISECONDS 0x10
-
-#define TIMERUS_CNTR_1US 0x10
-#define TIMERUS_USEC_CFG 0x14
-#define TIMERUS_CNTR_FREEZE 0x4c
-
-#define TIMER_PTV 0x0
-#define TIMER_PTV_EN BIT(31)
-#define TIMER_PTV_PER BIT(30)
-#define TIMER_PCR 0x4
-#define TIMER_PCR_INTR_CLR BIT(30)
-
-#ifdef CONFIG_ARM
-#define TIMER_CPU0 0x50 /* TIMER3 */
-#else
-#define TIMER_CPU0 0x90 /* TIMER10 */
-#define TIMER10_IRQ_IDX 10
-#define IRQ_IDX_FOR_CPU(cpu) (TIMER10_IRQ_IDX + cpu)
-#endif
-#define TIMER_BASE_FOR_CPU(cpu) (TIMER_CPU0 + (cpu) * 8)
-
-static u32 usec_config;
-static void __iomem *timer_reg_base;
-#ifdef CONFIG_ARM
-static struct delay_timer tegra_delay_timer;
-#endif
-
-static int tegra_timer_set_next_event(unsigned long cycles,
- struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PTV_EN |
- ((cycles > 1) ? (cycles - 1) : 0), /* n+1 scheme */
- reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static int tegra_timer_shutdown(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(0, reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static int tegra_timer_set_periodic(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PTV_EN | TIMER_PTV_PER |
- ((timer_of_rate(to_timer_of(evt)) / HZ) - 1),
- reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
-{
- struct clock_event_device *evt = (struct clock_event_device *)dev_id;
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
- evt->event_handler(evt);
-
- return IRQ_HANDLED;
-}
-
-static void tegra_timer_suspend(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
-}
-
-static void tegra_timer_resume(struct clock_event_device *evt)
-{
- writel(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
-}
-
-#ifdef CONFIG_ARM64
-static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
- .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
-
- .clkevt = {
- .name = "tegra_timer",
- .rating = 460,
- .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
- .set_next_event = tegra_timer_set_next_event,
- .set_state_shutdown = tegra_timer_shutdown,
- .set_state_periodic = tegra_timer_set_periodic,
- .set_state_oneshot = tegra_timer_shutdown,
- .tick_resume = tegra_timer_shutdown,
- .suspend = tegra_timer_suspend,
- .resume = tegra_timer_resume,
- },
-};
-
-static int tegra_timer_setup(unsigned int cpu)
-{
- struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
-
- irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
- enable_irq(to->clkevt.irq);
-
- clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
- 1, /* min */
- 0x1fffffff); /* 29 bits */
-
- return 0;
-}
-
-static int tegra_timer_stop(unsigned int cpu)
-{
- struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
-
- to->clkevt.set_state_shutdown(&to->clkevt);
- disable_irq_nosync(to->clkevt.irq);
-
- return 0;
-}
-#else /* CONFIG_ARM */
-static struct timer_of tegra_to = {
- .flags = TIMER_OF_CLOCK | TIMER_OF_BASE | TIMER_OF_IRQ,
-
- .clkevt = {
- .name = "tegra_timer",
- .rating = 300,
- .features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_DYNIRQ,
- .set_next_event = tegra_timer_set_next_event,
- .set_state_shutdown = tegra_timer_shutdown,
- .set_state_periodic = tegra_timer_set_periodic,
- .set_state_oneshot = tegra_timer_shutdown,
- .tick_resume = tegra_timer_shutdown,
- .suspend = tegra_timer_suspend,
- .resume = tegra_timer_resume,
- .cpumask = cpu_possible_mask,
- },
-
- .of_irq = {
- .index = 2,
- .flags = IRQF_TIMER | IRQF_TRIGGER_HIGH,
- .handler = tegra_timer_isr,
- },
-};
-
-static u64 notrace tegra_read_sched_clock(void)
-{
- return readl(timer_reg_base + TIMERUS_CNTR_1US);
-}
-
-static unsigned long tegra_delay_timer_read_counter_long(void)
-{
- return readl(timer_reg_base + TIMERUS_CNTR_1US);
-}
-
-static struct timer_of suspend_rtc_to = {
- .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
-};
-
-/*
- * tegra_rtc_read - Reads the Tegra RTC registers
- * Care must be taken that this funciton is not called while the
- * tegra_rtc driver could be executing to avoid race conditions
- * on the RTC shadow register
- */
-static u64 tegra_rtc_read_ms(struct clocksource *cs)
-{
- u32 ms = readl(timer_of_base(&suspend_rtc_to) + RTC_MILLISECONDS);
- u32 s = readl(timer_of_base(&suspend_rtc_to) + RTC_SHADOW_SECONDS);
- return (u64)s * MSEC_PER_SEC + ms;
-}
-
-static struct clocksource suspend_rtc_clocksource = {
- .name = "tegra_suspend_timer",
- .rating = 200,
- .read = tegra_rtc_read_ms,
- .mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
-};
-#endif
-
-static int tegra_timer_common_init(struct device_node *np, struct timer_of *to)
-{
- int ret = 0;
-
- ret = timer_of_init(np, to);
- if (ret < 0)
- goto out;
-
- timer_reg_base = timer_of_base(to);
-
- /*
- * Configure microsecond timers to have 1MHz clock
- * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
- * Uses n+1 scheme
- */
- switch (timer_of_rate(to)) {
- case 12000000:
- usec_config = 0x000b; /* (11+1)/(0+1) */
- break;
- case 12800000:
- usec_config = 0x043f; /* (63+1)/(4+1) */
- break;
- case 13000000:
- usec_config = 0x000c; /* (12+1)/(0+1) */
- break;
- case 16800000:
- usec_config = 0x0453; /* (83+1)/(4+1) */
- break;
- case 19200000:
- usec_config = 0x045f; /* (95+1)/(4+1) */
- break;
- case 26000000:
- usec_config = 0x0019; /* (25+1)/(0+1) */
- break;
- case 38400000:
- usec_config = 0x04bf; /* (191+1)/(4+1) */
- break;
- case 48000000:
- usec_config = 0x002f; /* (47+1)/(0+1) */
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
-
- writel(usec_config, timer_of_base(to) + TIMERUS_USEC_CFG);
-
-out:
- return ret;
-}
-
-#ifdef CONFIG_ARM64
-static int __init tegra_init_timer(struct device_node *np)
-{
- int cpu, ret = 0;
- struct timer_of *to;
-
- to = this_cpu_ptr(&tegra_to);
- ret = tegra_timer_common_init(np, to);
- if (ret < 0)
- goto out;
-
- for_each_possible_cpu(cpu) {
- struct timer_of *cpu_to;
-
- cpu_to = per_cpu_ptr(&tegra_to, cpu);
- cpu_to->of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(cpu);
- cpu_to->of_clk.rate = timer_of_rate(to);
- cpu_to->clkevt.cpumask = cpumask_of(cpu);
- cpu_to->clkevt.irq =
- irq_of_parse_and_map(np, IRQ_IDX_FOR_CPU(cpu));
- if (!cpu_to->clkevt.irq) {
- pr_err("%s: can't map IRQ for CPU%d\n",
- __func__, cpu);
- ret = -EINVAL;
- goto out;
- }
-
- irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
- ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr,
- IRQF_TIMER | IRQF_NOBALANCING,
- cpu_to->clkevt.name, &cpu_to->clkevt);
- if (ret) {
- pr_err("%s: cannot setup irq %d for CPU%d\n",
- __func__, cpu_to->clkevt.irq, cpu);
- ret = -EINVAL;
- goto out_irq;
- }
- }
-
- cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
- "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
- tegra_timer_stop);
-
- return ret;
-out_irq:
- for_each_possible_cpu(cpu) {
- struct timer_of *cpu_to;
-
- cpu_to = per_cpu_ptr(&tegra_to, cpu);
- if (cpu_to->clkevt.irq) {
- free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
- irq_dispose_mapping(cpu_to->clkevt.irq);
- }
- }
-out:
- timer_of_cleanup(to);
- return ret;
-}
-#else /* CONFIG_ARM */
-static int __init tegra_init_timer(struct device_node *np)
-{
- int ret = 0;
-
- ret = tegra_timer_common_init(np, &tegra_to);
- if (ret < 0)
- goto out;
-
- tegra_to.of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(0);
- tegra_to.of_clk.rate = 1000000; /* microsecond timer */
-
- sched_clock_register(tegra_read_sched_clock, 32,
- timer_of_rate(&tegra_to));
- ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
- "timer_us", timer_of_rate(&tegra_to),
- 300, 32, clocksource_mmio_readl_up);
- if (ret) {
- pr_err("Failed to register clocksource\n");
- goto out;
- }
-
- tegra_delay_timer.read_current_timer =
- tegra_delay_timer_read_counter_long;
- tegra_delay_timer.freq = timer_of_rate(&tegra_to);
- register_current_timer_delay(&tegra_delay_timer);
-
- clockevents_config_and_register(&tegra_to.clkevt,
- timer_of_rate(&tegra_to),
- 0x1,
- 0x1fffffff);
-
- return ret;
-out:
- timer_of_cleanup(&tegra_to);
-
- return ret;
-}
-
-static int __init tegra20_init_rtc(struct device_node *np)
-{
- int ret;
-
- ret = timer_of_init(np, &suspend_rtc_to);
- if (ret)
- return ret;
-
- clocksource_register_hz(&suspend_rtc_clocksource, 1000);
-
- return 0;
-}
-TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
-#endif
-TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra_init_timer);
-TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra_init_timer);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 0af08081e305..603413f28fa3 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -520,10 +520,13 @@ config CRYPTO_DEV_ATMEL_SHA
To compile this driver as a module, choose M here: the module
will be called atmel-sha.
+config CRYPTO_DEV_ATMEL_I2C
+ tristate
+
config CRYPTO_DEV_ATMEL_ECC
tristate "Support for Microchip / Atmel ECC hw accelerator"
- depends on ARCH_AT91 || COMPILE_TEST
depends on I2C
+ select CRYPTO_DEV_ATMEL_I2C
select CRYPTO_ECDH
select CRC16
help
@@ -534,6 +537,21 @@ config CRYPTO_DEV_ATMEL_ECC
To compile this driver as a module, choose M here: the module
will be called atmel-ecc.
+config CRYPTO_DEV_ATMEL_SHA204A
+ tristate "Support for Microchip / Atmel SHA accelerator and RNG"
+ depends on I2C
+ select CRYPTO_DEV_ATMEL_I2C
+ select HW_RANDOM
+ select CRC16
+ help
+ Microhip / Atmel SHA accelerator and RNG.
+ Select this if you want to use the Microchip / Atmel SHA204A
+ module as a random number generator. (Other functions of the
+ chip are currently not exposed by this driver)
+
+ To compile this driver as a module, choose M here: the module
+ will be called atmel-sha204a.
+
config CRYPTO_DEV_CCP
bool "Support for AMD Secure Processor"
depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index a23a7197fcd7..afc4753b5d28 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -2,7 +2,9 @@
obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_I2C) += atmel-i2c.o
obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA204A) += atmel-sha204a.o
obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index 49f3e0ce242c..cbfc607282f4 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -67,12 +67,16 @@ static void set_dynamic_sa_command_1(struct dynamic_sa_ctl *sa, u32 cm,
}
static inline int crypto4xx_crypt(struct skcipher_request *req,
- const unsigned int ivlen, bool decrypt)
+ const unsigned int ivlen, bool decrypt,
+ bool check_blocksize)
{
struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
__le32 iv[AES_IV_SIZE];
+ if (check_blocksize && !IS_ALIGNED(req->cryptlen, AES_BLOCK_SIZE))
+ return -EINVAL;
+
if (ivlen)
crypto4xx_memcpy_to_le32(iv, req->iv, ivlen);
@@ -81,24 +85,34 @@ static inline int crypto4xx_crypt(struct skcipher_request *req,
ctx->sa_len, 0, NULL);
}
-int crypto4xx_encrypt_noiv(struct skcipher_request *req)
+int crypto4xx_encrypt_noiv_block(struct skcipher_request *req)
+{
+ return crypto4xx_crypt(req, 0, false, true);
+}
+
+int crypto4xx_encrypt_iv_stream(struct skcipher_request *req)
+{
+ return crypto4xx_crypt(req, AES_IV_SIZE, false, false);
+}
+
+int crypto4xx_decrypt_noiv_block(struct skcipher_request *req)
{
- return crypto4xx_crypt(req, 0, false);
+ return crypto4xx_crypt(req, 0, true, true);
}
-int crypto4xx_encrypt_iv(struct skcipher_request *req)
+int crypto4xx_decrypt_iv_stream(struct skcipher_request *req)
{
- return crypto4xx_crypt(req, AES_IV_SIZE, false);
+ return crypto4xx_crypt(req, AES_IV_SIZE, true, false);
}
-int crypto4xx_decrypt_noiv(struct skcipher_request *req)
+int crypto4xx_encrypt_iv_block(struct skcipher_request *req)
{
- return crypto4xx_crypt(req, 0, true);
+ return crypto4xx_crypt(req, AES_IV_SIZE, false, true);
}
-int crypto4xx_decrypt_iv(struct skcipher_request *req)
+int crypto4xx_decrypt_iv_block(struct skcipher_request *req)
{
- return crypto4xx_crypt(req, AES_IV_SIZE, true);
+ return crypto4xx_crypt(req, AES_IV_SIZE, true, true);
}
/**
@@ -269,8 +283,8 @@ crypto4xx_ctr_crypt(struct skcipher_request *req, bool encrypt)
return ret;
}
- return encrypt ? crypto4xx_encrypt_iv(req)
- : crypto4xx_decrypt_iv(req);
+ return encrypt ? crypto4xx_encrypt_iv_stream(req)
+ : crypto4xx_decrypt_iv_stream(req);
}
static int crypto4xx_sk_setup_fallback(struct crypto4xx_ctx *ctx,
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index 16d911aaa508..de5e9352e920 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -182,7 +182,6 @@ static u32 crypto4xx_build_pdr(struct crypto4xx_device *dev)
dev->pdr_pa);
return -ENOMEM;
}
- memset(dev->pdr, 0, sizeof(struct ce_pd) * PPC4XX_NUM_PD);
dev->shadow_sa_pool = dma_alloc_coherent(dev->core_dev->device,
sizeof(union shadow_sa_buf) * PPC4XX_NUM_PD,
&dev->shadow_sa_pool_pa,
@@ -1210,8 +1209,8 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cbc,
- .encrypt = crypto4xx_encrypt_iv,
- .decrypt = crypto4xx_decrypt_iv,
+ .encrypt = crypto4xx_encrypt_iv_block,
+ .decrypt = crypto4xx_decrypt_iv_block,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
@@ -1222,7 +1221,7 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
@@ -1230,8 +1229,8 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_cfb,
- .encrypt = crypto4xx_encrypt_iv,
- .decrypt = crypto4xx_decrypt_iv,
+ .encrypt = crypto4xx_encrypt_iv_stream,
+ .decrypt = crypto4xx_decrypt_iv_stream,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
@@ -1243,7 +1242,7 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.cra_flags = CRYPTO_ALG_NEED_FALLBACK |
CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
@@ -1263,7 +1262,7 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
@@ -1290,8 +1289,8 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.setkey = crypto4xx_setkey_aes_ecb,
- .encrypt = crypto4xx_encrypt_noiv,
- .decrypt = crypto4xx_decrypt_noiv,
+ .encrypt = crypto4xx_encrypt_noiv_block,
+ .decrypt = crypto4xx_decrypt_noiv_block,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
@@ -1302,7 +1301,7 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
.cra_flags = CRYPTO_ALG_ASYNC |
CRYPTO_ALG_KERN_DRIVER_ONLY,
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto4xx_ctx),
.cra_module = THIS_MODULE,
},
@@ -1310,8 +1309,8 @@ static struct crypto4xx_alg_common crypto4xx_alg[] = {
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_IV_SIZE,
.setkey = crypto4xx_setkey_aes_ofb,
- .encrypt = crypto4xx_encrypt_iv,
- .decrypt = crypto4xx_decrypt_iv,
+ .encrypt = crypto4xx_encrypt_iv_stream,
+ .decrypt = crypto4xx_decrypt_iv_stream,
.init = crypto4xx_sk_init,
.exit = crypto4xx_sk_exit,
} },
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index ca1c25c40c23..6b6841359190 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -173,10 +173,12 @@ int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
const u8 *key, unsigned int keylen);
int crypto4xx_encrypt_ctr(struct skcipher_request *req);
int crypto4xx_decrypt_ctr(struct skcipher_request *req);
-int crypto4xx_encrypt_iv(struct skcipher_request *req);
-int crypto4xx_decrypt_iv(struct skcipher_request *req);
-int crypto4xx_encrypt_noiv(struct skcipher_request *req);
-int crypto4xx_decrypt_noiv(struct skcipher_request *req);
+int crypto4xx_encrypt_iv_stream(struct skcipher_request *req);
+int crypto4xx_decrypt_iv_stream(struct skcipher_request *req);
+int crypto4xx_encrypt_iv_block(struct skcipher_request *req);
+int crypto4xx_decrypt_iv_block(struct skcipher_request *req);
+int crypto4xx_encrypt_noiv_block(struct skcipher_request *req);
+int crypto4xx_decrypt_noiv_block(struct skcipher_request *req);
int crypto4xx_rfc3686_encrypt(struct skcipher_request *req);
int crypto4xx_rfc3686_decrypt(struct skcipher_request *req);
int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm);
diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index ba00e4563ca0..ff02cc05affb 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -6,8 +6,6 @@
* Author: Tudor Ambarus <tudor.ambarus@microchip.com>
*/
-#include <linux/bitrev.h>
-#include <linux/crc16.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -23,42 +21,11 @@
#include <crypto/internal/kpp.h>
#include <crypto/ecdh.h>
#include <crypto/kpp.h>
-#include "atmel-ecc.h"
-
-/* Used for binding tfm objects to i2c clients. */
-struct atmel_ecc_driver_data {
- struct list_head i2c_client_list;
- spinlock_t i2c_list_lock;
-} ____cacheline_aligned;
+#include "atmel-i2c.h"
static struct atmel_ecc_driver_data driver_data;
/**
- * atmel_ecc_i2c_client_priv - i2c_client private data
- * @client : pointer to i2c client device
- * @i2c_client_list_node: part of i2c_client_list
- * @lock : lock for sending i2c commands
- * @wake_token : wake token array of zeros
- * @wake_token_sz : size in bytes of the wake_token
- * @tfm_count : number of active crypto transformations on i2c client
- *
- * Reads and writes from/to the i2c client are sequential. The first byte
- * transmitted to the device is treated as the byte size. Any attempt to send
- * more than this number of bytes will cause the device to not ACK those bytes.
- * After the host writes a single command byte to the input buffer, reads are
- * prohibited until after the device completes command execution. Use a mutex
- * when sending i2c commands.
- */
-struct atmel_ecc_i2c_client_priv {
- struct i2c_client *client;
- struct list_head i2c_client_list_node;
- struct mutex lock;
- u8 wake_token[WAKE_TOKEN_MAX_SIZE];
- size_t wake_token_sz;
- atomic_t tfm_count ____cacheline_aligned;
-};
-
-/**
* atmel_ecdh_ctx - transformation context
* @client : pointer to i2c client device
* @fallback : used for unsupported curves or when user wants to use its own
@@ -80,188 +47,12 @@ struct atmel_ecdh_ctx {
bool do_fallback;
};
-/**
- * atmel_ecc_work_data - data structure representing the work
- * @ctx : transformation context.
- * @cbk : pointer to a callback function to be invoked upon completion of this
- * request. This has the form:
- * callback(struct atmel_ecc_work_data *work_data, void *areq, u8 status)
- * where:
- * @work_data: data structure representing the work
- * @areq : optional pointer to an argument passed with the original
- * request.
- * @status : status returned from the i2c client device or i2c error.
- * @areq: optional pointer to a user argument for use at callback time.
- * @work: describes the task to be executed.
- * @cmd : structure used for communicating with the device.
- */
-struct atmel_ecc_work_data {
- struct atmel_ecdh_ctx *ctx;
- void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq,
- int status);
- void *areq;
- struct work_struct work;
- struct atmel_ecc_cmd cmd;
-};
-
-static u16 atmel_ecc_crc16(u16 crc, const u8 *buffer, size_t len)
-{
- return cpu_to_le16(bitrev16(crc16(crc, buffer, len)));
-}
-
-/**
- * atmel_ecc_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
- * CRC16 verification of the count, opcode, param1, param2 and data bytes.
- * The checksum is saved in little-endian format in the least significant
- * two bytes of the command. CRC polynomial is 0x8005 and the initial register
- * value should be zero.
- *
- * @cmd : structure used for communicating with the device.
- */
-static void atmel_ecc_checksum(struct atmel_ecc_cmd *cmd)
-{
- u8 *data = &cmd->count;
- size_t len = cmd->count - CRC_SIZE;
- u16 *crc16 = (u16 *)(data + len);
-
- *crc16 = atmel_ecc_crc16(0, data, len);
-}
-
-static void atmel_ecc_init_read_cmd(struct atmel_ecc_cmd *cmd)
-{
- cmd->word_addr = COMMAND;
- cmd->opcode = OPCODE_READ;
- /*
- * Read the word from Configuration zone that contains the lock bytes
- * (UserExtra, Selector, LockValue, LockConfig).
- */
- cmd->param1 = CONFIG_ZONE;
- cmd->param2 = DEVICE_LOCK_ADDR;
- cmd->count = READ_COUNT;
-
- atmel_ecc_checksum(cmd);
-
- cmd->msecs = MAX_EXEC_TIME_READ;
- cmd->rxsize = READ_RSP_SIZE;
-}
-
-static void atmel_ecc_init_genkey_cmd(struct atmel_ecc_cmd *cmd, u16 keyid)
-{
- cmd->word_addr = COMMAND;
- cmd->count = GENKEY_COUNT;
- cmd->opcode = OPCODE_GENKEY;
- cmd->param1 = GENKEY_MODE_PRIVATE;
- /* a random private key will be generated and stored in slot keyID */
- cmd->param2 = cpu_to_le16(keyid);
-
- atmel_ecc_checksum(cmd);
-
- cmd->msecs = MAX_EXEC_TIME_GENKEY;
- cmd->rxsize = GENKEY_RSP_SIZE;
-}
-
-static int atmel_ecc_init_ecdh_cmd(struct atmel_ecc_cmd *cmd,
- struct scatterlist *pubkey)
-{
- size_t copied;
-
- cmd->word_addr = COMMAND;
- cmd->count = ECDH_COUNT;
- cmd->opcode = OPCODE_ECDH;
- cmd->param1 = ECDH_PREFIX_MODE;
- /* private key slot */
- cmd->param2 = cpu_to_le16(DATA_SLOT_2);
-
- /*
- * The device only supports NIST P256 ECC keys. The public key size will
- * always be the same. Use a macro for the key size to avoid unnecessary
- * computations.
- */
- copied = sg_copy_to_buffer(pubkey,
- sg_nents_for_len(pubkey,
- ATMEL_ECC_PUBKEY_SIZE),
- cmd->data, ATMEL_ECC_PUBKEY_SIZE);
- if (copied != ATMEL_ECC_PUBKEY_SIZE)
- return -EINVAL;
-
- atmel_ecc_checksum(cmd);
-
- cmd->msecs = MAX_EXEC_TIME_ECDH;
- cmd->rxsize = ECDH_RSP_SIZE;
-
- return 0;
-}
-
-/*
- * After wake and after execution of a command, there will be error, status, or
- * result bytes in the device's output register that can be retrieved by the
- * system. When the length of that group is four bytes, the codes returned are
- * detailed in error_list.
- */
-static int atmel_ecc_status(struct device *dev, u8 *status)
-{
- size_t err_list_len = ARRAY_SIZE(error_list);
- int i;
- u8 err_id = status[1];
-
- if (*status != STATUS_SIZE)
- return 0;
-
- if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR)
- return 0;
-
- for (i = 0; i < err_list_len; i++)
- if (error_list[i].value == err_id)
- break;
-
- /* if err_id is not in the error_list then ignore it */
- if (i != err_list_len) {
- dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text);
- return err_id;
- }
-
- return 0;
-}
-
-static int atmel_ecc_wakeup(struct i2c_client *client)
-{
- struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
- u8 status[STATUS_RSP_SIZE];
- int ret;
-
- /*
- * The device ignores any levels or transitions on the SCL pin when the
- * device is idle, asleep or during waking up. Don't check for error
- * when waking up the device.
- */
- i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
-
- /*
- * Wait to wake the device. Typical execution times for ecdh and genkey
- * are around tens of milliseconds. Delta is chosen to 50 microseconds.
- */
- usleep_range(TWHI_MIN, TWHI_MAX);
-
- ret = i2c_master_recv(client, status, STATUS_SIZE);
- if (ret < 0)
- return ret;
-
- return atmel_ecc_status(&client->dev, status);
-}
-
-static int atmel_ecc_sleep(struct i2c_client *client)
-{
- u8 sleep = SLEEP_TOKEN;
-
- return i2c_master_send(client, &sleep, 1);
-}
-
-static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq,
+static void atmel_ecdh_done(struct atmel_i2c_work_data *work_data, void *areq,
int status)
{
struct kpp_request *req = areq;
struct atmel_ecdh_ctx *ctx = work_data->ctx;
- struct atmel_ecc_cmd *cmd = &work_data->cmd;
+ struct atmel_i2c_cmd *cmd = &work_data->cmd;
size_t copied, n_sz;
if (status)
@@ -282,82 +73,6 @@ free_work_data:
kpp_request_complete(req, status);
}
-/*
- * atmel_ecc_send_receive() - send a command to the device and receive its
- * response.
- * @client: i2c client device
- * @cmd : structure used to communicate with the device
- *
- * After the device receives a Wake token, a watchdog counter starts within the
- * device. After the watchdog timer expires, the device enters sleep mode
- * regardless of whether some I/O transmission or command execution is in
- * progress. If a command is attempted when insufficient time remains prior to
- * watchdog timer execution, the device will return the watchdog timeout error
- * code without attempting to execute the command. There is no way to reset the
- * counter other than to put the device into sleep or idle mode and then
- * wake it up again.
- */
-static int atmel_ecc_send_receive(struct i2c_client *client,
- struct atmel_ecc_cmd *cmd)
-{
- struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
- int ret;
-
- mutex_lock(&i2c_priv->lock);
-
- ret = atmel_ecc_wakeup(client);
- if (ret)
- goto err;
-
- /* send the command */
- ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE);
- if (ret < 0)
- goto err;
-
- /* delay the appropriate amount of time for command to execute */
- msleep(cmd->msecs);
-
- /* receive the response */
- ret = i2c_master_recv(client, cmd->data, cmd->rxsize);
- if (ret < 0)
- goto err;
-
- /* put the device into low-power mode */
- ret = atmel_ecc_sleep(client);
- if (ret < 0)
- goto err;
-
- mutex_unlock(&i2c_priv->lock);
- return atmel_ecc_status(&client->dev, cmd->data);
-err:
- mutex_unlock(&i2c_priv->lock);
- return ret;
-}
-
-static void atmel_ecc_work_handler(struct work_struct *work)
-{
- struct atmel_ecc_work_data *work_data =
- container_of(work, struct atmel_ecc_work_data, work);
- struct atmel_ecc_cmd *cmd = &work_data->cmd;
- struct i2c_client *client = work_data->ctx->client;
- int status;
-
- status = atmel_ecc_send_receive(client, cmd);
- work_data->cbk(work_data, work_data->areq, status);
-}
-
-static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data,
- void (*cbk)(struct atmel_ecc_work_data *work_data,
- void *areq, int status),
- void *areq)
-{
- work_data->cbk = (void *)cbk;
- work_data->areq = areq;
-
- INIT_WORK(&work_data->work, atmel_ecc_work_handler);
- schedule_work(&work_data->work);
-}
-
static unsigned int atmel_ecdh_supported_curve(unsigned int curve_id)
{
if (curve_id == ECC_CURVE_NIST_P256)
@@ -374,7 +89,7 @@ static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
unsigned int len)
{
struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
- struct atmel_ecc_cmd *cmd;
+ struct atmel_i2c_cmd *cmd;
void *public_key;
struct ecdh params;
int ret = -ENOMEM;
@@ -412,9 +127,9 @@ static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
ctx->do_fallback = false;
ctx->curve_id = params.curve_id;
- atmel_ecc_init_genkey_cmd(cmd, DATA_SLOT_2);
+ atmel_i2c_init_genkey_cmd(cmd, DATA_SLOT_2);
- ret = atmel_ecc_send_receive(ctx->client, cmd);
+ ret = atmel_i2c_send_receive(ctx->client, cmd);
if (ret)
goto free_public_key;
@@ -444,6 +159,9 @@ static int atmel_ecdh_generate_public_key(struct kpp_request *req)
return crypto_kpp_generate_public_key(req);
}
+ if (!ctx->public_key)
+ return -EINVAL;
+
/* might want less than we've got */
nbytes = min_t(size_t, ATMEL_ECC_PUBKEY_SIZE, req->dst_len);
@@ -461,7 +179,7 @@ static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
{
struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
- struct atmel_ecc_work_data *work_data;
+ struct atmel_i2c_work_data *work_data;
gfp_t gfp;
int ret;
@@ -482,12 +200,13 @@ static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
return -ENOMEM;
work_data->ctx = ctx;
+ work_data->client = ctx->client;
- ret = atmel_ecc_init_ecdh_cmd(&work_data->cmd, req->src);
+ ret = atmel_i2c_init_ecdh_cmd(&work_data->cmd, req->src);
if (ret)
goto free_work_data;
- atmel_ecc_enqueue(work_data, atmel_ecdh_done, req);
+ atmel_i2c_enqueue(work_data, atmel_ecdh_done, req);
return -EINPROGRESS;
@@ -498,7 +217,7 @@ free_work_data:
static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
{
- struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
+ struct atmel_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
struct i2c_client *client = ERR_PTR(-ENODEV);
int min_tfm_cnt = INT_MAX;
int tfm_cnt;
@@ -533,7 +252,7 @@ static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
static void atmel_ecc_i2c_client_free(struct i2c_client *client)
{
- struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+ struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
atomic_dec(&i2c_priv->tfm_count);
}
@@ -604,96 +323,18 @@ static struct kpp_alg atmel_ecdh = {
},
};
-static inline size_t atmel_ecc_wake_token_sz(u32 bus_clk_rate)
-{
- u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
-
- /* return the size of the wake_token in bytes */
- return DIV_ROUND_UP(no_of_bits, 8);
-}
-
-static int device_sanity_check(struct i2c_client *client)
-{
- struct atmel_ecc_cmd *cmd;
- int ret;
-
- cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- atmel_ecc_init_read_cmd(cmd);
-
- ret = atmel_ecc_send_receive(client, cmd);
- if (ret)
- goto free_cmd;
-
- /*
- * It is vital that the Configuration, Data and OTP zones be locked
- * prior to release into the field of the system containing the device.
- * Failure to lock these zones may permit modification of any secret
- * keys and may lead to other security problems.
- */
- if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) {
- dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n");
- ret = -ENOTSUPP;
- }
-
- /* fall through */
-free_cmd:
- kfree(cmd);
- return ret;
-}
-
static int atmel_ecc_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- struct atmel_ecc_i2c_client_priv *i2c_priv;
- struct device *dev = &client->dev;
+ struct atmel_i2c_client_priv *i2c_priv;
int ret;
- u32 bus_clk_rate;
-
- if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
- dev_err(dev, "I2C_FUNC_I2C not supported\n");
- return -ENODEV;
- }
- ret = of_property_read_u32(client->adapter->dev.of_node,
- "clock-frequency", &bus_clk_rate);
- if (ret) {
- dev_err(dev, "of: failed to read clock-frequency property\n");
- return ret;
- }
-
- if (bus_clk_rate > 1000000L) {
- dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n",
- bus_clk_rate);
- return -EINVAL;
- }
-
- i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL);
- if (!i2c_priv)
- return -ENOMEM;
-
- i2c_priv->client = client;
- mutex_init(&i2c_priv->lock);
-
- /*
- * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate -
- * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz
- * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE.
- */
- i2c_priv->wake_token_sz = atmel_ecc_wake_token_sz(bus_clk_rate);
-
- memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token));
-
- atomic_set(&i2c_priv->tfm_count, 0);
-
- i2c_set_clientdata(client, i2c_priv);
-
- ret = device_sanity_check(client);
+ ret = atmel_i2c_probe(client, id);
if (ret)
return ret;
+ i2c_priv = i2c_get_clientdata(client);
+
spin_lock(&driver_data.i2c_list_lock);
list_add_tail(&i2c_priv->i2c_client_list_node,
&driver_data.i2c_client_list);
@@ -705,10 +346,10 @@ static int atmel_ecc_probe(struct i2c_client *client,
list_del(&i2c_priv->i2c_client_list_node);
spin_unlock(&driver_data.i2c_list_lock);
- dev_err(dev, "%s alg registration failed\n",
+ dev_err(&client->dev, "%s alg registration failed\n",
atmel_ecdh.base.cra_driver_name);
} else {
- dev_info(dev, "atmel ecc algorithms registered in /proc/crypto\n");
+ dev_info(&client->dev, "atmel ecc algorithms registered in /proc/crypto\n");
}
return ret;
@@ -716,7 +357,7 @@ static int atmel_ecc_probe(struct i2c_client *client,
static int atmel_ecc_remove(struct i2c_client *client)
{
- struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+ struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
/* Return EBUSY if i2c client already allocated. */
if (atomic_read(&i2c_priv->tfm_count)) {
diff --git a/drivers/crypto/atmel-ecc.h b/drivers/crypto/atmel-ecc.h
deleted file mode 100644
index 643a3b947338..000000000000
--- a/drivers/crypto/atmel-ecc.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2017, Microchip Technology Inc.
- * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
- */
-
-#ifndef __ATMEL_ECC_H__
-#define __ATMEL_ECC_H__
-
-#define ATMEL_ECC_PRIORITY 300
-
-#define COMMAND 0x03 /* packet function */
-#define SLEEP_TOKEN 0x01
-#define WAKE_TOKEN_MAX_SIZE 8
-
-/* Definitions of Data and Command sizes */
-#define WORD_ADDR_SIZE 1
-#define COUNT_SIZE 1
-#define CRC_SIZE 2
-#define CMD_OVERHEAD_SIZE (COUNT_SIZE + CRC_SIZE)
-
-/* size in bytes of the n prime */
-#define ATMEL_ECC_NIST_P256_N_SIZE 32
-#define ATMEL_ECC_PUBKEY_SIZE (2 * ATMEL_ECC_NIST_P256_N_SIZE)
-
-#define STATUS_RSP_SIZE 4
-#define ECDH_RSP_SIZE (32 + CMD_OVERHEAD_SIZE)
-#define GENKEY_RSP_SIZE (ATMEL_ECC_PUBKEY_SIZE + \
- CMD_OVERHEAD_SIZE)
-#define READ_RSP_SIZE (4 + CMD_OVERHEAD_SIZE)
-#define MAX_RSP_SIZE GENKEY_RSP_SIZE
-
-/**
- * atmel_ecc_cmd - structure used for communicating with the device.
- * @word_addr: indicates the function of the packet sent to the device. This
- * byte should have a value of COMMAND for normal operation.
- * @count : number of bytes to be transferred to (or from) the device.
- * @opcode : the command code.
- * @param1 : the first parameter; always present.
- * @param2 : the second parameter; always present.
- * @data : optional remaining input data. Includes a 2-byte CRC.
- * @rxsize : size of the data received from i2c client.
- * @msecs : command execution time in milliseconds
- */
-struct atmel_ecc_cmd {
- u8 word_addr;
- u8 count;
- u8 opcode;
- u8 param1;
- u16 param2;
- u8 data[MAX_RSP_SIZE];
- u8 msecs;
- u16 rxsize;
-} __packed;
-
-/* Status/Error codes */
-#define STATUS_SIZE 0x04
-#define STATUS_NOERR 0x00
-#define STATUS_WAKE_SUCCESSFUL 0x11
-
-static const struct {
- u8 value;
- const char *error_text;
-} error_list[] = {
- { 0x01, "CheckMac or Verify miscompare" },
- { 0x03, "Parse Error" },
- { 0x05, "ECC Fault" },
- { 0x0F, "Execution Error" },
- { 0xEE, "Watchdog about to expire" },
- { 0xFF, "CRC or other communication error" },
-};
-
-/* Definitions for eeprom organization */
-#define CONFIG_ZONE 0
-
-/* Definitions for Indexes common to all commands */
-#define RSP_DATA_IDX 1 /* buffer index of data in response */
-#define DATA_SLOT_2 2 /* used for ECDH private key */
-
-/* Definitions for the device lock state */
-#define DEVICE_LOCK_ADDR 0x15
-#define LOCK_VALUE_IDX (RSP_DATA_IDX + 2)
-#define LOCK_CONFIG_IDX (RSP_DATA_IDX + 3)
-
-/*
- * Wake High delay to data communication (microseconds). SDA should be stable
- * high for this entire duration.
- */
-#define TWHI_MIN 1500
-#define TWHI_MAX 1550
-
-/* Wake Low duration */
-#define TWLO_USEC 60
-
-/* Command execution time (milliseconds) */
-#define MAX_EXEC_TIME_ECDH 58
-#define MAX_EXEC_TIME_GENKEY 115
-#define MAX_EXEC_TIME_READ 1
-
-/* Command opcode */
-#define OPCODE_ECDH 0x43
-#define OPCODE_GENKEY 0x40
-#define OPCODE_READ 0x02
-
-/* Definitions for the READ Command */
-#define READ_COUNT 7
-
-/* Definitions for the GenKey Command */
-#define GENKEY_COUNT 7
-#define GENKEY_MODE_PRIVATE 0x04
-
-/* Definitions for the ECDH Command */
-#define ECDH_COUNT 71
-#define ECDH_PREFIX_MODE 0x00
-
-#endif /* __ATMEL_ECC_H__ */
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
new file mode 100644
index 000000000000..dc876fab2882
--- /dev/null
+++ b/drivers/crypto/atmel-i2c.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip / Atmel ECC (I2C) driver.
+ *
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ */
+
+#include <linux/bitrev.h>
+#include <linux/crc16.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include "atmel-i2c.h"
+
+/**
+ * atmel_i2c_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
+ * CRC16 verification of the count, opcode, param1, param2 and data bytes.
+ * The checksum is saved in little-endian format in the least significant
+ * two bytes of the command. CRC polynomial is 0x8005 and the initial register
+ * value should be zero.
+ *
+ * @cmd : structure used for communicating with the device.
+ */
+static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd)
+{
+ u8 *data = &cmd->count;
+ size_t len = cmd->count - CRC_SIZE;
+ __le16 *__crc16 = (__le16 *)(data + len);
+
+ *__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len)));
+}
+
+void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd)
+{
+ cmd->word_addr = COMMAND;
+ cmd->opcode = OPCODE_READ;
+ /*
+ * Read the word from Configuration zone that contains the lock bytes
+ * (UserExtra, Selector, LockValue, LockConfig).
+ */
+ cmd->param1 = CONFIG_ZONE;
+ cmd->param2 = cpu_to_le16(DEVICE_LOCK_ADDR);
+ cmd->count = READ_COUNT;
+
+ atmel_i2c_checksum(cmd);
+
+ cmd->msecs = MAX_EXEC_TIME_READ;
+ cmd->rxsize = READ_RSP_SIZE;
+}
+EXPORT_SYMBOL(atmel_i2c_init_read_cmd);
+
+void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd)
+{
+ cmd->word_addr = COMMAND;
+ cmd->opcode = OPCODE_RANDOM;
+ cmd->param1 = 0;
+ cmd->param2 = 0;
+ cmd->count = RANDOM_COUNT;
+
+ atmel_i2c_checksum(cmd);
+
+ cmd->msecs = MAX_EXEC_TIME_RANDOM;
+ cmd->rxsize = RANDOM_RSP_SIZE;
+}
+EXPORT_SYMBOL(atmel_i2c_init_random_cmd);
+
+void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid)
+{
+ cmd->word_addr = COMMAND;
+ cmd->count = GENKEY_COUNT;
+ cmd->opcode = OPCODE_GENKEY;
+ cmd->param1 = GENKEY_MODE_PRIVATE;
+ /* a random private key will be generated and stored in slot keyID */
+ cmd->param2 = cpu_to_le16(keyid);
+
+ atmel_i2c_checksum(cmd);
+
+ cmd->msecs = MAX_EXEC_TIME_GENKEY;
+ cmd->rxsize = GENKEY_RSP_SIZE;
+}
+EXPORT_SYMBOL(atmel_i2c_init_genkey_cmd);
+
+int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
+ struct scatterlist *pubkey)
+{
+ size_t copied;
+
+ cmd->word_addr = COMMAND;
+ cmd->count = ECDH_COUNT;
+ cmd->opcode = OPCODE_ECDH;
+ cmd->param1 = ECDH_PREFIX_MODE;
+ /* private key slot */
+ cmd->param2 = cpu_to_le16(DATA_SLOT_2);
+
+ /*
+ * The device only supports NIST P256 ECC keys. The public key size will
+ * always be the same. Use a macro for the key size to avoid unnecessary
+ * computations.
+ */
+ copied = sg_copy_to_buffer(pubkey,
+ sg_nents_for_len(pubkey,
+ ATMEL_ECC_PUBKEY_SIZE),
+ cmd->data, ATMEL_ECC_PUBKEY_SIZE);
+ if (copied != ATMEL_ECC_PUBKEY_SIZE)
+ return -EINVAL;
+
+ atmel_i2c_checksum(cmd);
+
+ cmd->msecs = MAX_EXEC_TIME_ECDH;
+ cmd->rxsize = ECDH_RSP_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(atmel_i2c_init_ecdh_cmd);
+
+/*
+ * After wake and after execution of a command, there will be error, status, or
+ * result bytes in the device's output register that can be retrieved by the
+ * system. When the length of that group is four bytes, the codes returned are
+ * detailed in error_list.
+ */
+static int atmel_i2c_status(struct device *dev, u8 *status)
+{
+ size_t err_list_len = ARRAY_SIZE(error_list);
+ int i;
+ u8 err_id = status[1];
+
+ if (*status != STATUS_SIZE)
+ return 0;
+
+ if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR)
+ return 0;
+
+ for (i = 0; i < err_list_len; i++)
+ if (error_list[i].value == err_id)
+ break;
+
+ /* if err_id is not in the error_list then ignore it */
+ if (i != err_list_len) {
+ dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text);
+ return err_id;
+ }
+
+ return 0;
+}
+
+static int atmel_i2c_wakeup(struct i2c_client *client)
+{
+ struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+ u8 status[STATUS_RSP_SIZE];
+ int ret;
+
+ /*
+ * The device ignores any levels or transitions on the SCL pin when the
+ * device is idle, asleep or during waking up. Don't check for error
+ * when waking up the device.
+ */
+ i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+
+ /*
+ * Wait to wake the device. Typical execution times for ecdh and genkey
+ * are around tens of milliseconds. Delta is chosen to 50 microseconds.
+ */
+ usleep_range(TWHI_MIN, TWHI_MAX);
+
+ ret = i2c_master_recv(client, status, STATUS_SIZE);
+ if (ret < 0)
+ return ret;
+
+ return atmel_i2c_status(&client->dev, status);
+}
+
+static int atmel_i2c_sleep(struct i2c_client *client)
+{
+ u8 sleep = SLEEP_TOKEN;
+
+ return i2c_master_send(client, &sleep, 1);
+}
+
+/*
+ * atmel_i2c_send_receive() - send a command to the device and receive its
+ * response.
+ * @client: i2c client device
+ * @cmd : structure used to communicate with the device
+ *
+ * After the device receives a Wake token, a watchdog counter starts within the
+ * device. After the watchdog timer expires, the device enters sleep mode
+ * regardless of whether some I/O transmission or command execution is in
+ * progress. If a command is attempted when insufficient time remains prior to
+ * watchdog timer execution, the device will return the watchdog timeout error
+ * code without attempting to execute the command. There is no way to reset the
+ * counter other than to put the device into sleep or idle mode and then
+ * wake it up again.
+ */
+int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd)
+{
+ struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+ int ret;
+
+ mutex_lock(&i2c_priv->lock);
+
+ ret = atmel_i2c_wakeup(client);
+ if (ret)
+ goto err;
+
+ /* send the command */
+ ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE);
+ if (ret < 0)
+ goto err;
+
+ /* delay the appropriate amount of time for command to execute */
+ msleep(cmd->msecs);
+
+ /* receive the response */
+ ret = i2c_master_recv(client, cmd->data, cmd->rxsize);
+ if (ret < 0)
+ goto err;
+
+ /* put the device into low-power mode */
+ ret = atmel_i2c_sleep(client);
+ if (ret < 0)
+ goto err;
+
+ mutex_unlock(&i2c_priv->lock);
+ return atmel_i2c_status(&client->dev, cmd->data);
+err:
+ mutex_unlock(&i2c_priv->lock);
+ return ret;
+}
+EXPORT_SYMBOL(atmel_i2c_send_receive);
+
+static void atmel_i2c_work_handler(struct work_struct *work)
+{
+ struct atmel_i2c_work_data *work_data =
+ container_of(work, struct atmel_i2c_work_data, work);
+ struct atmel_i2c_cmd *cmd = &work_data->cmd;
+ struct i2c_client *client = work_data->client;
+ int status;
+
+ status = atmel_i2c_send_receive(client, cmd);
+ work_data->cbk(work_data, work_data->areq, status);
+}
+
+void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
+ void (*cbk)(struct atmel_i2c_work_data *work_data,
+ void *areq, int status),
+ void *areq)
+{
+ work_data->cbk = (void *)cbk;
+ work_data->areq = areq;
+
+ INIT_WORK(&work_data->work, atmel_i2c_work_handler);
+ schedule_work(&work_data->work);
+}
+EXPORT_SYMBOL(atmel_i2c_enqueue);
+
+static inline size_t atmel_i2c_wake_token_sz(u32 bus_clk_rate)
+{
+ u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
+
+ /* return the size of the wake_token in bytes */
+ return DIV_ROUND_UP(no_of_bits, 8);
+}
+
+static int device_sanity_check(struct i2c_client *client)
+{
+ struct atmel_i2c_cmd *cmd;
+ int ret;
+
+ cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ atmel_i2c_init_read_cmd(cmd);
+
+ ret = atmel_i2c_send_receive(client, cmd);
+ if (ret)
+ goto free_cmd;
+
+ /*
+ * It is vital that the Configuration, Data and OTP zones be locked
+ * prior to release into the field of the system containing the device.
+ * Failure to lock these zones may permit modification of any secret
+ * keys and may lead to other security problems.
+ */
+ if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) {
+ dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n");
+ ret = -ENOTSUPP;
+ }
+
+ /* fall through */
+free_cmd:
+ kfree(cmd);
+ return ret;
+}
+
+int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+ struct atmel_i2c_client_priv *i2c_priv;
+ struct device *dev = &client->dev;
+ int ret;
+ u32 bus_clk_rate;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ dev_err(dev, "I2C_FUNC_I2C not supported\n");
+ return -ENODEV;
+ }
+
+ bus_clk_rate = i2c_acpi_find_bus_speed(&client->adapter->dev);
+ if (!bus_clk_rate) {
+ ret = device_property_read_u32(&client->adapter->dev,
+ "clock-frequency", &bus_clk_rate);
+ if (ret) {
+ dev_err(dev, "failed to read clock-frequency property\n");
+ return ret;
+ }
+ }
+
+ if (bus_clk_rate > 1000000L) {
+ dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n",
+ bus_clk_rate);
+ return -EINVAL;
+ }
+
+ i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL);
+ if (!i2c_priv)
+ return -ENOMEM;
+
+ i2c_priv->client = client;
+ mutex_init(&i2c_priv->lock);
+
+ /*
+ * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate -
+ * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz
+ * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE.
+ */
+ i2c_priv->wake_token_sz = atmel_i2c_wake_token_sz(bus_clk_rate);
+
+ memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token));
+
+ atomic_set(&i2c_priv->tfm_count, 0);
+
+ i2c_set_clientdata(client, i2c_priv);
+
+ ret = device_sanity_check(client);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL(atmel_i2c_probe);
+
+MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
new file mode 100644
index 000000000000..21860b99c3e3
--- /dev/null
+++ b/drivers/crypto/atmel-i2c.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ */
+
+#ifndef __ATMEL_I2C_H__
+#define __ATMEL_I2C_H__
+
+#include <linux/hw_random.h>
+#include <linux/types.h>
+
+#define ATMEL_ECC_PRIORITY 300
+
+#define COMMAND 0x03 /* packet function */
+#define SLEEP_TOKEN 0x01
+#define WAKE_TOKEN_MAX_SIZE 8
+
+/* Definitions of Data and Command sizes */
+#define WORD_ADDR_SIZE 1
+#define COUNT_SIZE 1
+#define CRC_SIZE 2
+#define CMD_OVERHEAD_SIZE (COUNT_SIZE + CRC_SIZE)
+
+/* size in bytes of the n prime */
+#define ATMEL_ECC_NIST_P256_N_SIZE 32
+#define ATMEL_ECC_PUBKEY_SIZE (2 * ATMEL_ECC_NIST_P256_N_SIZE)
+
+#define STATUS_RSP_SIZE 4
+#define ECDH_RSP_SIZE (32 + CMD_OVERHEAD_SIZE)
+#define GENKEY_RSP_SIZE (ATMEL_ECC_PUBKEY_SIZE + \
+ CMD_OVERHEAD_SIZE)
+#define READ_RSP_SIZE (4 + CMD_OVERHEAD_SIZE)
+#define RANDOM_RSP_SIZE (32 + CMD_OVERHEAD_SIZE)
+#define MAX_RSP_SIZE GENKEY_RSP_SIZE
+
+/**
+ * atmel_i2c_cmd - structure used for communicating with the device.
+ * @word_addr: indicates the function of the packet sent to the device. This
+ * byte should have a value of COMMAND for normal operation.
+ * @count : number of bytes to be transferred to (or from) the device.
+ * @opcode : the command code.
+ * @param1 : the first parameter; always present.
+ * @param2 : the second parameter; always present.
+ * @data : optional remaining input data. Includes a 2-byte CRC.
+ * @rxsize : size of the data received from i2c client.
+ * @msecs : command execution time in milliseconds
+ */
+struct atmel_i2c_cmd {
+ u8 word_addr;
+ u8 count;
+ u8 opcode;
+ u8 param1;
+ __le16 param2;
+ u8 data[MAX_RSP_SIZE];
+ u8 msecs;
+ u16 rxsize;
+} __packed;
+
+/* Status/Error codes */
+#define STATUS_SIZE 0x04
+#define STATUS_NOERR 0x00
+#define STATUS_WAKE_SUCCESSFUL 0x11
+
+static const struct {
+ u8 value;
+ const char *error_text;
+} error_list[] = {
+ { 0x01, "CheckMac or Verify miscompare" },
+ { 0x03, "Parse Error" },
+ { 0x05, "ECC Fault" },
+ { 0x0F, "Execution Error" },
+ { 0xEE, "Watchdog about to expire" },
+ { 0xFF, "CRC or other communication error" },
+};
+
+/* Definitions for eeprom organization */
+#define CONFIG_ZONE 0
+
+/* Definitions for Indexes common to all commands */
+#define RSP_DATA_IDX 1 /* buffer index of data in response */
+#define DATA_SLOT_2 2 /* used for ECDH private key */
+
+/* Definitions for the device lock state */
+#define DEVICE_LOCK_ADDR 0x15
+#define LOCK_VALUE_IDX (RSP_DATA_IDX + 2)
+#define LOCK_CONFIG_IDX (RSP_DATA_IDX + 3)
+
+/*
+ * Wake High delay to data communication (microseconds). SDA should be stable
+ * high for this entire duration.
+ */
+#define TWHI_MIN 1500
+#define TWHI_MAX 1550
+
+/* Wake Low duration */
+#define TWLO_USEC 60
+
+/* Command execution time (milliseconds) */
+#define MAX_EXEC_TIME_ECDH 58
+#define MAX_EXEC_TIME_GENKEY 115
+#define MAX_EXEC_TIME_READ 1
+#define MAX_EXEC_TIME_RANDOM 50
+
+/* Command opcode */
+#define OPCODE_ECDH 0x43
+#define OPCODE_GENKEY 0x40
+#define OPCODE_READ 0x02
+#define OPCODE_RANDOM 0x1b
+
+/* Definitions for the READ Command */
+#define READ_COUNT 7
+
+/* Definitions for the RANDOM Command */
+#define RANDOM_COUNT 7
+
+/* Definitions for the GenKey Command */
+#define GENKEY_COUNT 7
+#define GENKEY_MODE_PRIVATE 0x04
+
+/* Definitions for the ECDH Command */
+#define ECDH_COUNT 71
+#define ECDH_PREFIX_MODE 0x00
+
+/* Used for binding tfm objects to i2c clients. */
+struct atmel_ecc_driver_data {
+ struct list_head i2c_client_list;
+ spinlock_t i2c_list_lock;
+} ____cacheline_aligned;
+
+/**
+ * atmel_i2c_client_priv - i2c_client private data
+ * @client : pointer to i2c client device
+ * @i2c_client_list_node: part of i2c_client_list
+ * @lock : lock for sending i2c commands
+ * @wake_token : wake token array of zeros
+ * @wake_token_sz : size in bytes of the wake_token
+ * @tfm_count : number of active crypto transformations on i2c client
+ *
+ * Reads and writes from/to the i2c client are sequential. The first byte
+ * transmitted to the device is treated as the byte size. Any attempt to send
+ * more than this number of bytes will cause the device to not ACK those bytes.
+ * After the host writes a single command byte to the input buffer, reads are
+ * prohibited until after the device completes command execution. Use a mutex
+ * when sending i2c commands.
+ */
+struct atmel_i2c_client_priv {
+ struct i2c_client *client;
+ struct list_head i2c_client_list_node;
+ struct mutex lock;
+ u8 wake_token[WAKE_TOKEN_MAX_SIZE];
+ size_t wake_token_sz;
+ atomic_t tfm_count ____cacheline_aligned;
+ struct hwrng hwrng;
+};
+
+/**
+ * atmel_i2c_work_data - data structure representing the work
+ * @ctx : transformation context.
+ * @cbk : pointer to a callback function to be invoked upon completion of this
+ * request. This has the form:
+ * callback(struct atmel_i2c_work_data *work_data, void *areq, u8 status)
+ * where:
+ * @work_data: data structure representing the work
+ * @areq : optional pointer to an argument passed with the original
+ * request.
+ * @status : status returned from the i2c client device or i2c error.
+ * @areq: optional pointer to a user argument for use at callback time.
+ * @work: describes the task to be executed.
+ * @cmd : structure used for communicating with the device.
+ */
+struct atmel_i2c_work_data {
+ void *ctx;
+ struct i2c_client *client;
+ void (*cbk)(struct atmel_i2c_work_data *work_data, void *areq,
+ int status);
+ void *areq;
+ struct work_struct work;
+ struct atmel_i2c_cmd cmd;
+};
+
+int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id);
+
+void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data,
+ void (*cbk)(struct atmel_i2c_work_data *work_data,
+ void *areq, int status),
+ void *areq);
+
+int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd);
+
+void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd);
+void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd);
+void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid);
+int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
+ struct scatterlist *pubkey);
+
+#endif /* __ATMEL_I2C_H__ */
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
new file mode 100644
index 000000000000..ea0d2068ea4f
--- /dev/null
+++ b/drivers/crypto/atmel-sha204a.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip / Atmel SHA204A (I2C) driver.
+ *
+ * Copyright (c) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include "atmel-i2c.h"
+
+static void atmel_sha204a_rng_done(struct atmel_i2c_work_data *work_data,
+ void *areq, int status)
+{
+ struct atmel_i2c_client_priv *i2c_priv = work_data->ctx;
+ struct hwrng *rng = areq;
+
+ if (status)
+ dev_warn_ratelimited(&i2c_priv->client->dev,
+ "i2c transaction failed (%d)\n",
+ status);
+
+ rng->priv = (unsigned long)work_data;
+ atomic_dec(&i2c_priv->tfm_count);
+}
+
+static int atmel_sha204a_rng_read_nonblocking(struct hwrng *rng, void *data,
+ size_t max)
+{
+ struct atmel_i2c_client_priv *i2c_priv;
+ struct atmel_i2c_work_data *work_data;
+
+ i2c_priv = container_of(rng, struct atmel_i2c_client_priv, hwrng);
+
+ /* keep maximum 1 asynchronous read in flight at any time */
+ if (!atomic_add_unless(&i2c_priv->tfm_count, 1, 1))
+ return 0;
+
+ if (rng->priv) {
+ work_data = (struct atmel_i2c_work_data *)rng->priv;
+ max = min(sizeof(work_data->cmd.data), max);
+ memcpy(data, &work_data->cmd.data, max);
+ rng->priv = 0;
+ } else {
+ work_data = kmalloc(sizeof(*work_data), GFP_ATOMIC);
+ if (!work_data)
+ return -ENOMEM;
+
+ work_data->ctx = i2c_priv;
+ work_data->client = i2c_priv->client;
+
+ max = 0;
+ }
+
+ atmel_i2c_init_random_cmd(&work_data->cmd);
+ atmel_i2c_enqueue(work_data, atmel_sha204a_rng_done, rng);
+
+ return max;
+}
+
+static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max,
+ bool wait)
+{
+ struct atmel_i2c_client_priv *i2c_priv;
+ struct atmel_i2c_cmd cmd;
+ int ret;
+
+ if (!wait)
+ return atmel_sha204a_rng_read_nonblocking(rng, data, max);
+
+ i2c_priv = container_of(rng, struct atmel_i2c_client_priv, hwrng);
+
+ atmel_i2c_init_random_cmd(&cmd);
+
+ ret = atmel_i2c_send_receive(i2c_priv->client, &cmd);
+ if (ret)
+ return ret;
+
+ max = min(sizeof(cmd.data), max);
+ memcpy(data, cmd.data, max);
+
+ return max;
+}
+
+static int atmel_sha204a_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct atmel_i2c_client_priv *i2c_priv;
+ int ret;
+
+ ret = atmel_i2c_probe(client, id);
+ if (ret)
+ return ret;
+
+ i2c_priv = i2c_get_clientdata(client);
+
+ memset(&i2c_priv->hwrng, 0, sizeof(i2c_priv->hwrng));
+
+ i2c_priv->hwrng.name = dev_name(&client->dev);
+ i2c_priv->hwrng.read = atmel_sha204a_rng_read;
+ i2c_priv->hwrng.quality = 1024;
+
+ ret = hwrng_register(&i2c_priv->hwrng);
+ if (ret)
+ dev_warn(&client->dev, "failed to register RNG (%d)\n", ret);
+
+ return ret;
+}
+
+static int atmel_sha204a_remove(struct i2c_client *client)
+{
+ struct atmel_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+ if (atomic_read(&i2c_priv->tfm_count)) {
+ dev_err(&client->dev, "Device is busy\n");
+ return -EBUSY;
+ }
+
+ if (i2c_priv->hwrng.priv)
+ kfree((void *)i2c_priv->hwrng.priv);
+ hwrng_unregister(&i2c_priv->hwrng);
+
+ return 0;
+}
+
+static const struct of_device_id atmel_sha204a_dt_ids[] = {
+ { .compatible = "atmel,atsha204a", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, atmel_sha204a_dt_ids);
+
+static const struct i2c_device_id atmel_sha204a_id[] = {
+ { "atsha204a", 0 },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, atmel_sha204a_id);
+
+static struct i2c_driver atmel_sha204a_driver = {
+ .probe = atmel_sha204a_probe,
+ .remove = atmel_sha204a_remove,
+ .id_table = atmel_sha204a_id,
+
+ .driver.name = "atmel-sha204a",
+ .driver.of_match_table = of_match_ptr(atmel_sha204a_dt_ids),
+};
+
+static int __init atmel_sha204a_init(void)
+{
+ return i2c_add_driver(&atmel_sha204a_driver);
+}
+
+static void __exit atmel_sha204a_exit(void)
+{
+ flush_scheduled_work();
+ i2c_del_driver(&atmel_sha204a_driver);
+}
+
+module_init(atmel_sha204a_init);
+module_exit(atmel_sha204a_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 18410c9e7b29..869602fcfd96 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -85,7 +85,7 @@ MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
* 0x70 - ring 2
* 0x78 - ring 3
*/
-char BCMHEADER[] = { 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28 };
+static char BCMHEADER[] = { 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28 };
/*
* Some SPU hw does not use BCM header on SPU messages. So BCM_HDR_LEN
* is set dynamically after reading SPU type from device tree.
@@ -2083,7 +2083,7 @@ static int __ahash_init(struct ahash_request *req)
* Return: true if incremental hashing is not supported
* false otherwise
*/
-bool spu_no_incr_hash(struct iproc_ctx_s *ctx)
+static bool spu_no_incr_hash(struct iproc_ctx_s *ctx)
{
struct spu_hw *spu = &iproc_priv.spu;
@@ -4809,7 +4809,7 @@ static int spu_dt_read(struct platform_device *pdev)
return 0;
}
-int bcm_spu_probe(struct platform_device *pdev)
+static int bcm_spu_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct spu_hw *spu = &iproc_priv.spu;
@@ -4853,7 +4853,7 @@ failure:
return err;
}
-int bcm_spu_remove(struct platform_device *pdev)
+static int bcm_spu_remove(struct platform_device *pdev)
{
int i;
struct device *dev = &pdev->dev;
diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c
index cb477259a2e2..2add51024575 100644
--- a/drivers/crypto/bcm/spu2.c
+++ b/drivers/crypto/bcm/spu2.c
@@ -38,21 +38,21 @@ enum spu2_proto_sel {
SPU2_DTLS_AEAD = 10
};
-char *spu2_cipher_type_names[] = { "None", "AES128", "AES192", "AES256",
+static char *spu2_cipher_type_names[] = { "None", "AES128", "AES192", "AES256",
"DES", "3DES"
};
-char *spu2_cipher_mode_names[] = { "ECB", "CBC", "CTR", "CFB", "OFB", "XTS",
- "CCM", "GCM"
+static char *spu2_cipher_mode_names[] = { "ECB", "CBC", "CTR", "CFB", "OFB",
+ "XTS", "CCM", "GCM"
};
-char *spu2_hash_type_names[] = { "None", "AES128", "AES192", "AES256",
+static char *spu2_hash_type_names[] = { "None", "AES128", "AES192", "AES256",
"Reserved", "Reserved", "MD5", "SHA1", "SHA224", "SHA256", "SHA384",
"SHA512", "SHA512/224", "SHA512/256", "SHA3-224", "SHA3-256",
"SHA3-384", "SHA3-512"
};
-char *spu2_hash_mode_names[] = { "CMAC", "CBC-MAC", "XCBC-MAC", "HMAC",
+static char *spu2_hash_mode_names[] = { "CMAC", "CBC-MAC", "XCBC-MAC", "HMAC",
"Rabin", "CCM", "GCM", "Reserved"
};
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 577c9844b322..3720ddabb507 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -2,6 +2,12 @@
config CRYPTO_DEV_FSL_CAAM_COMMON
tristate
+config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
+ tristate
+
+config CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
+ tristate
+
config CRYPTO_DEV_FSL_CAAM
tristate "Freescale CAAM-Multicore platform driver backend"
depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
@@ -25,7 +31,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
Selecting this will enable printing of various debug
information in the CAAM driver.
-config CRYPTO_DEV_FSL_CAAM_JR
+menuconfig CRYPTO_DEV_FSL_CAAM_JR
tristate "Freescale CAAM Job Ring driver backend"
default y
help
@@ -86,8 +92,9 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
threshold. Range is 1-65535.
config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
- tristate "Register algorithm implementations with the Crypto API"
+ bool "Register algorithm implementations with the Crypto API"
default y
+ select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
select CRYPTO_AEAD
select CRYPTO_AUTHENC
select CRYPTO_BLKCIPHER
@@ -97,13 +104,11 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
scatterlist crypto API (such as the linux native IPSec
stack) to the SEC4 via job ring.
- To compile this as a module, choose M here: the module
- will be called caamalg.
-
config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
- tristate "Queue Interface as Crypto API backend"
+ bool "Queue Interface as Crypto API backend"
depends on FSL_DPAA && NET
default y
+ select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
select CRYPTO_AUTHENC
select CRYPTO_BLKCIPHER
help
@@ -114,33 +119,26 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI
assigned to the kernel should also be more than the number of
job rings.
- To compile this as a module, choose M here: the module
- will be called caamalg_qi.
-
config CRYPTO_DEV_FSL_CAAM_AHASH_API
- tristate "Register hash algorithm implementations with Crypto API"
+ bool "Register hash algorithm implementations with Crypto API"
default y
+ select CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
select CRYPTO_HASH
help
Selecting this will offload ahash for users of the
scatterlist crypto API to the SEC4 via job ring.
- To compile this as a module, choose M here: the module
- will be called caamhash.
-
config CRYPTO_DEV_FSL_CAAM_PKC_API
- tristate "Register public key cryptography implementations with Crypto API"
+ bool "Register public key cryptography implementations with Crypto API"
default y
select CRYPTO_RSA
help
Selecting this will allow SEC Public key support for RSA.
Supported cryptographic primitives: encryption, decryption,
signature and verification.
- To compile this as a module, choose M here: the module
- will be called caam_pkc.
config CRYPTO_DEV_FSL_CAAM_RNG_API
- tristate "Register caam device for hwrng API"
+ bool "Register caam device for hwrng API"
default y
select CRYPTO_RNG
select HW_RANDOM
@@ -148,9 +146,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
Selecting this will register the SEC4 hardware rng to
the hw_random API for suppying the kernel entropy pool.
- To compile this as a module, choose M here: the module
- will be called caamrng.
-
endif # CRYPTO_DEV_FSL_CAAM_JR
endif # CRYPTO_DEV_FSL_CAAM
@@ -160,6 +155,8 @@ config CRYPTO_DEV_FSL_DPAA2_CAAM
depends on FSL_MC_DPIO
depends on NETDEVICES
select CRYPTO_DEV_FSL_CAAM_COMMON
+ select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
+ select CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
select CRYPTO_BLKCIPHER
select CRYPTO_AUTHENC
select CRYPTO_AEAD
@@ -171,12 +168,3 @@ config CRYPTO_DEV_FSL_DPAA2_CAAM
To compile this as a module, choose M here: the module
will be called dpaa2_caam.
-
-config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
- def_tristate (CRYPTO_DEV_FSL_CAAM_CRYPTO_API || \
- CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI || \
- CRYPTO_DEV_FSL_DPAA2_CAAM)
-
-config CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC
- def_tristate (CRYPTO_DEV_FSL_CAAM_AHASH_API || \
- CRYPTO_DEV_FSL_DPAA2_CAAM)
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 7bbfd06a11ff..9ab4e81ea21e 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -11,20 +11,20 @@ ccflags-y += -DVERSION=\"\"
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_COMMON) += error.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API_DESC) += caamhash_desc.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
-caam-objs := ctrl.o
-caam_jr-objs := jr.o key_gen.o
-caam_pkc-y := caampkc.o pkc_desc.o
+caam-y := ctrl.o
+caam_jr-y := jr.o key_gen.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += caamalg_qi.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
+caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o
+
+caam-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += qi.o
ifneq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI),)
ccflags-y += -DCONFIG_CAAM_QI
- caam-objs += qi.o
endif
obj-$(CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM) += dpaa2_caam.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index c0ece44f303b..43f18253e5b6 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -77,13 +77,6 @@
#define DESC_MAX_USED_BYTES (CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN)
#define DESC_MAX_USED_LEN (DESC_MAX_USED_BYTES / CAAM_CMD_SZ)
-#ifdef DEBUG
-/* for print_hex_dumps with line references */
-#define debug(format, arg...) printk(format, arg)
-#else
-#define debug(format, arg...)
-#endif
-
struct caam_alg_entry {
int class1_alg_type;
int class2_alg_type;
@@ -583,13 +576,11 @@ static int aead_setkey(struct crypto_aead *aead,
if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
goto badkey;
-#ifdef DEBUG
- printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
+ dev_dbg(jrdev, "keylen %d enckeylen %d authkeylen %d\n",
keys.authkeylen + keys.enckeylen, keys.enckeylen,
keys.authkeylen);
- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
/*
* If DKP is supported, use it in the shared descriptor to generate
@@ -623,11 +614,10 @@ static int aead_setkey(struct crypto_aead *aead,
memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
keys.enckeylen, ctx->dir);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
- ctx->adata.keylen_pad + keys.enckeylen, 1);
-#endif
+
+ print_hex_dump_debug("ctx.key@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+ ctx->adata.keylen_pad + keys.enckeylen, 1);
skip_split_key:
ctx->cdata.keylen = keys.enckeylen;
@@ -678,10 +668,8 @@ static int gcm_setkey(struct crypto_aead *aead,
struct caam_ctx *ctx = crypto_aead_ctx(aead);
struct device *jrdev = ctx->jrdev;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
@@ -699,10 +687,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
if (keylen < 4)
return -EINVAL;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
@@ -725,10 +711,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
if (keylen < 4)
return -EINVAL;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
@@ -757,10 +741,8 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
OP_ALG_AAI_CTR_MOD128);
const bool is_rfc3686 = alg->caam.rfc3686;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
/*
* AES-CTR needs to load IV in CONTEXT1 reg
* at an offset of 128bits (16bytes)
@@ -916,7 +898,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
}
if (iv_dma)
- dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
+ dma_unmap_single(dev, iv_dma, ivsize, DMA_BIDIRECTIONAL);
if (sec4_sg_bytes)
dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
DMA_TO_DEVICE);
@@ -949,9 +931,7 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
struct aead_request *req = context;
struct aead_edesc *edesc;
-#ifdef DEBUG
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
@@ -971,9 +951,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
struct aead_request *req = context;
struct aead_edesc *edesc;
-#ifdef DEBUG
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
@@ -1001,33 +979,32 @@ static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
int ivsize = crypto_skcipher_ivsize(skcipher);
-#ifdef DEBUG
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
- edesc->src_nents > 1 ? 100 : ivsize, 1);
-#endif
- caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
- edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
-
skcipher_unmap(jrdev, edesc, req);
/*
* The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block. This is used e.g. by the CTS mode.
+ * ciphertext block (CBC mode) or last counter (CTR mode).
+ * This is used e.g. by the CTS mode.
*/
- if (ivsize)
- scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen -
- ivsize, ivsize, 0);
+ if (ivsize) {
+ memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
+ ivsize);
+
+ print_hex_dump_debug("dstiv @"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
+ edesc->src_nents > 1 ? 100 : ivsize, 1);
+ }
+
+ caam_dump_sg("dst @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+ edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
kfree(edesc);
@@ -1039,26 +1016,35 @@ static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
{
struct skcipher_request *req = context;
struct skcipher_edesc *edesc;
-#ifdef DEBUG
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
int ivsize = crypto_skcipher_ivsize(skcipher);
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "dstiv @"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1);
-#endif
- caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ",
+ skcipher_unmap(jrdev, edesc, req);
+
+ /*
+ * The crypto API expects us to set the IV (req->iv) to the last
+ * ciphertext block (CBC mode) or last counter (CTR mode).
+ * This is used e.g. by the CTS mode.
+ */
+ if (ivsize) {
+ memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
+ ivsize);
+
+ print_hex_dump_debug("dstiv @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
+ ivsize, 1);
+ }
+
+ caam_dump_sg("dst @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
- skcipher_unmap(jrdev, edesc, req);
kfree(edesc);
skcipher_request_complete(req, err);
@@ -1106,6 +1092,7 @@ static void init_aead_job(struct aead_request *req,
if (unlikely(req->src != req->dst)) {
if (!edesc->mapped_dst_nents) {
dst_dma = 0;
+ out_options = 0;
} else if (edesc->mapped_dst_nents == 1) {
dst_dma = sg_dma_address(req->dst);
out_options = 0;
@@ -1249,6 +1236,7 @@ static void init_skcipher_job(struct skcipher_request *req,
{
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+ struct device *jrdev = ctx->jrdev;
int ivsize = crypto_skcipher_ivsize(skcipher);
u32 *desc = edesc->hw_desc;
u32 *sh_desc;
@@ -1256,13 +1244,12 @@ static void init_skcipher_job(struct skcipher_request *req,
dma_addr_t src_dma, dst_dma, ptr;
int len, sec4_sg_index = 0;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1);
- pr_err("asked=%d, cryptlen%d\n",
+ print_hex_dump_debug("presciv@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->iv, ivsize, 1);
+ dev_dbg(jrdev, "asked=%d, cryptlen%d\n",
(int)edesc->src_nents > 1 ? 100 : req->cryptlen, req->cryptlen);
-#endif
- caam_dump_sg(KERN_ERR, "src @" __stringify(__LINE__)": ",
+
+ caam_dump_sg("src @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->src,
edesc->src_nents > 1 ? 100 : req->cryptlen, 1);
@@ -1285,7 +1272,7 @@ static void init_skcipher_job(struct skcipher_request *req,
if (likely(req->src == req->dst)) {
dst_dma = src_dma + !!ivsize * sizeof(struct sec4_sg_entry);
out_options = in_options;
- } else if (edesc->mapped_dst_nents == 1) {
+ } else if (!ivsize && edesc->mapped_dst_nents == 1) {
dst_dma = sg_dma_address(req->dst);
} else {
dst_dma = edesc->sec4_sg_dma + sec4_sg_index *
@@ -1293,7 +1280,7 @@ static void init_skcipher_job(struct skcipher_request *req,
out_options = LDST_SGF;
}
- append_seq_out_ptr(desc, dst_dma, req->cryptlen, out_options);
+ append_seq_out_ptr(desc, dst_dma, req->cryptlen + ivsize, out_options);
}
/*
@@ -1309,37 +1296,36 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
+ int src_len, dst_len = 0;
struct aead_edesc *edesc;
int sec4_sg_index, sec4_sg_len, sec4_sg_bytes;
unsigned int authsize = ctx->authsize;
if (unlikely(req->dst != req->src)) {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen);
+ src_len = req->assoclen + req->cryptlen;
+ dst_len = src_len + (encrypt ? authsize : (-authsize));
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen);
+ src_len);
return ERR_PTR(src_nents);
}
- dst_nents = sg_nents_for_len(req->dst, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize :
- (-authsize)));
+ dst_nents = sg_nents_for_len(req->dst, dst_len);
if (unlikely(dst_nents < 0)) {
dev_err(jrdev, "Insufficient bytes (%d) in dst S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : (-authsize)));
+ dst_len);
return ERR_PTR(dst_nents);
}
} else {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len = req->assoclen + req->cryptlen +
+ (encrypt ? authsize : 0);
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(jrdev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len);
return ERR_PTR(src_nents);
}
}
@@ -1380,8 +1366,16 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
}
}
+ /*
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries.
+ */
sec4_sg_len = mapped_src_nents > 1 ? mapped_src_nents : 0;
- sec4_sg_len += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+ if (mapped_dst_nents > 1)
+ sec4_sg_len += pad_sg_nents(mapped_dst_nents);
+ else
+ sec4_sg_len = pad_sg_nents(sec4_sg_len);
+
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
@@ -1403,12 +1397,12 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
sec4_sg_index = 0;
if (mapped_src_nents > 1) {
- sg_to_sec4_sg_last(req->src, mapped_src_nents,
+ sg_to_sec4_sg_last(req->src, src_len,
edesc->sec4_sg + sec4_sg_index, 0);
sec4_sg_index += mapped_src_nents;
}
if (mapped_dst_nents > 1) {
- sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
+ sg_to_sec4_sg_last(req->dst, dst_len,
edesc->sec4_sg + sec4_sg_index, 0);
}
@@ -1446,11 +1440,10 @@ static int gcm_encrypt(struct aead_request *req)
/* Create and submit job descriptor */
init_gcm_job(req, edesc, all_contig, true);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
desc = edesc->hw_desc;
ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
@@ -1556,11 +1549,10 @@ static int aead_encrypt(struct aead_request *req)
/* Create and submit job descriptor */
init_authenc_job(req, edesc, all_contig, true);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
desc = edesc->hw_desc;
ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
@@ -1591,11 +1583,10 @@ static int gcm_decrypt(struct aead_request *req)
/* Create and submit job descriptor*/
init_gcm_job(req, edesc, all_contig, false);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
desc = edesc->hw_desc;
ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
@@ -1627,7 +1618,7 @@ static int aead_decrypt(struct aead_request *req)
u32 *desc;
int ret = 0;
- caam_dump_sg(KERN_ERR, "dec src@" __stringify(__LINE__)": ",
+ caam_dump_sg("dec src@" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->src,
req->assoclen + req->cryptlen, 1);
@@ -1639,11 +1630,10 @@ static int aead_decrypt(struct aead_request *req)
/* Create and submit job descriptor*/
init_authenc_job(req, edesc, all_contig, false);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
desc = edesc->hw_desc;
ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
@@ -1719,7 +1709,29 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
else
sec4_sg_ents = mapped_src_nents + !!ivsize;
dst_sg_idx = sec4_sg_ents;
- sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+
+ /*
+ * Input, output HW S/G tables: [IV, src][dst, IV]
+ * IV entries point to the same buffer
+ * If src == dst, S/G entries are reused (S/G tables overlap)
+ *
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries. Logic:
+ * if (output S/G)
+ * pad output S/G, if needed
+ * else if (input S/G) ...
+ * pad input S/G, if needed
+ */
+ if (ivsize || mapped_dst_nents > 1) {
+ if (req->src == req->dst)
+ sec4_sg_ents = !!ivsize + pad_sg_nents(sec4_sg_ents);
+ else
+ sec4_sg_ents += pad_sg_nents(mapped_dst_nents +
+ !!ivsize);
+ } else {
+ sec4_sg_ents = pad_sg_nents(sec4_sg_ents);
+ }
+
sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
/*
@@ -1744,10 +1756,10 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
/* Make sure IV is located in a DMAable area */
if (ivsize) {
- iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
+ iv = (u8 *)edesc->sec4_sg + sec4_sg_bytes;
memcpy(iv, req->iv, ivsize);
- iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_TO_DEVICE);
+ iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
caam_unmap(jrdev, req->src, req->dst, src_nents,
@@ -1759,13 +1771,20 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
}
if (dst_sg_idx)
- sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg +
- !!ivsize, 0);
+ sg_to_sec4_sg(req->src, req->cryptlen, edesc->sec4_sg +
+ !!ivsize, 0);
- if (mapped_dst_nents > 1) {
- sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
- edesc->sec4_sg + dst_sg_idx, 0);
- }
+ if (req->src != req->dst && (ivsize || mapped_dst_nents > 1))
+ sg_to_sec4_sg(req->dst, req->cryptlen, edesc->sec4_sg +
+ dst_sg_idx, 0);
+
+ if (ivsize)
+ dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx +
+ mapped_dst_nents, iv_dma, ivsize, 0);
+
+ if (ivsize || mapped_dst_nents > 1)
+ sg_to_sec4_set_last(edesc->sec4_sg + dst_sg_idx +
+ mapped_dst_nents);
if (sec4_sg_bytes) {
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
@@ -1782,11 +1801,9 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
edesc->iv_dma = iv_dma;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "skcipher sec4_sg@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
- sec4_sg_bytes, 1);
-#endif
+ print_hex_dump_debug("skcipher sec4_sg@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
+ sec4_sg_bytes, 1);
return edesc;
}
@@ -1807,11 +1824,11 @@ static int skcipher_encrypt(struct skcipher_request *req)
/* Create and submit job descriptor*/
init_skcipher_job(req, edesc, true);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "skcipher jobdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
+
desc = edesc->hw_desc;
ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req);
@@ -1830,7 +1847,6 @@ static int skcipher_decrypt(struct skcipher_request *req)
struct skcipher_edesc *edesc;
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
- int ivsize = crypto_skcipher_ivsize(skcipher);
struct device *jrdev = ctx->jrdev;
u32 *desc;
int ret = 0;
@@ -1840,22 +1856,13 @@ static int skcipher_decrypt(struct skcipher_request *req)
if (IS_ERR(edesc))
return PTR_ERR(edesc);
- /*
- * The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block.
- */
- if (ivsize)
- scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen -
- ivsize, ivsize, 0);
-
/* Create and submit job descriptor*/
init_skcipher_job(req, edesc, false);
desc = edesc->hw_desc;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "skcipher jobdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
- desc_bytes(edesc->hw_desc), 1);
-#endif
+
+ print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+ desc_bytes(edesc->hw_desc), 1);
ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req);
if (!ret) {
@@ -3444,7 +3451,7 @@ static void caam_aead_exit(struct crypto_aead *tfm)
caam_exit_common(crypto_aead_ctx(tfm));
}
-static void __exit caam_algapi_exit(void)
+void caam_algapi_exit(void)
{
int i;
@@ -3489,43 +3496,15 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
alg->exit = caam_aead_exit;
}
-static int __init caam_algapi_init(void)
+int caam_algapi_init(struct device *ctrldev)
{
- struct device_node *dev_node;
- struct platform_device *pdev;
- struct caam_drv_private *priv;
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
int i = 0, err = 0;
u32 aes_vid, aes_inst, des_inst, md_vid, md_inst, ccha_inst, ptha_inst;
u32 arc4_inst;
unsigned int md_limit = SHA512_DIGEST_SIZE;
bool registered = false, gcm_support;
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
- if (!dev_node) {
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
- if (!dev_node)
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(dev_node);
- if (!pdev) {
- of_node_put(dev_node);
- return -ENODEV;
- }
-
- priv = dev_get_drvdata(&pdev->dev);
- of_node_put(dev_node);
-
- /*
- * If priv is NULL, it's probably because the caam driver wasn't
- * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
- */
- if (!priv) {
- err = -ENODEV;
- goto out_put_dev;
- }
-
-
/*
* Register crypto algorithms the device supports.
* First, detect presence and attributes of DES, AES, and MD blocks.
@@ -3668,14 +3647,5 @@ static int __init caam_algapi_init(void)
if (registered)
pr_info("caam algorithms registered in /proc/crypto\n");
-out_put_dev:
- put_device(&pdev->dev);
return err;
}
-
-module_init(caam_algapi_init);
-module_exit(caam_algapi_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FSL CAAM support for crypto API");
-MODULE_AUTHOR("Freescale Semiconductor - NMG/STC");
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index 1e1a376edc2f..72531837571e 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -33,12 +33,11 @@ static inline void append_dec_op1(u32 *desc, u32 type)
}
jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD);
- append_operation(desc, type | OP_ALG_AS_INITFINAL |
- OP_ALG_DECRYPT);
+ append_operation(desc, type | OP_ALG_AS_INIT | OP_ALG_DECRYPT);
uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL);
set_jump_tgt_here(desc, jump_cmd);
- append_operation(desc, type | OP_ALG_AS_INITFINAL |
- OP_ALG_DECRYPT | OP_ALG_AAI_DK);
+ append_operation(desc, type | OP_ALG_AS_INIT | OP_ALG_DECRYPT |
+ OP_ALG_AAI_DK);
set_jump_tgt_here(desc, uncond_jump_cmd);
}
@@ -115,11 +114,9 @@ void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "aead null enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("aead null enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap);
@@ -204,11 +201,9 @@ void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "aead null dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("aead null dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap);
@@ -358,10 +353,9 @@ void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("aead enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_aead_encap);
@@ -475,10 +469,9 @@ void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "aead dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("aead dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_aead_decap);
@@ -613,11 +606,9 @@ copy_iv:
append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "aead givenc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("aead givenc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
@@ -742,10 +733,9 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "gcm enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("gcm enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
@@ -838,10 +828,9 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "gcm dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("gcm dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
@@ -933,11 +922,9 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "rfc4106 enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("rfc4106 enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
@@ -1030,11 +1017,9 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "rfc4106 dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("rfc4106 dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
@@ -1115,11 +1100,9 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "rfc4543 enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("rfc4543 enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
@@ -1205,11 +1188,9 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "rfc4543 dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("rfc4543 dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap);
@@ -1410,17 +1391,21 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata,
LDST_OFFSET_SHIFT));
/* Load operation */
- append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+ append_operation(desc, cdata->algtype | OP_ALG_AS_INIT |
OP_ALG_ENCRYPT);
/* Perform operation */
skcipher_append_src_dst(desc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "skcipher enc shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ /* Store IV */
+ if (ivsize)
+ append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+ LDST_CLASS_1_CCB | (ctx1_iv_off <<
+ LDST_OFFSET_SHIFT));
+
+ print_hex_dump_debug("skcipher enc shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_skcipher_encap);
@@ -1479,7 +1464,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
/* Choose operation */
if (ctx1_iv_off)
- append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+ append_operation(desc, cdata->algtype | OP_ALG_AS_INIT |
OP_ALG_DECRYPT);
else
append_dec_op1(desc, cdata->algtype);
@@ -1487,11 +1472,15 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata,
/* Perform operation */
skcipher_append_src_dst(desc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "skcipher dec shdesc@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ /* Store IV */
+ if (ivsize)
+ append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+ LDST_CLASS_1_CCB | (ctx1_iv_off <<
+ LDST_OFFSET_SHIFT));
+
+ print_hex_dump_debug("skcipher dec shdesc@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
}
EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap);
@@ -1538,11 +1527,13 @@ void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata)
/* Perform operation */
skcipher_append_src_dst(desc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "xts skcipher enc shdesc@" __stringify(__LINE__) ": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ /* Store upper 8B of IV */
+ append_seq_store(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+ (0x20 << LDST_OFFSET_SHIFT));
+
+ print_hex_dump_debug("xts skcipher enc shdesc@" __stringify(__LINE__)
+ ": ", DUMP_PREFIX_ADDRESS, 16, 4,
+ desc, desc_bytes(desc), 1);
}
EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap);
@@ -1588,11 +1579,13 @@ void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata)
/* Perform operation */
skcipher_append_src_dst(desc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "xts skcipher dec shdesc@" __stringify(__LINE__) ": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ /* Store upper 8B of IV */
+ append_seq_store(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+ (0x20 << LDST_OFFSET_SHIFT));
+
+ print_hex_dump_debug("xts skcipher dec shdesc@" __stringify(__LINE__)
+ ": ", DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
}
EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_decap);
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index d5ca42ff961a..da4a4ee60c80 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -44,9 +44,9 @@
#define DESC_SKCIPHER_BASE (3 * CAAM_CMD_SZ)
#define DESC_SKCIPHER_ENC_LEN (DESC_SKCIPHER_BASE + \
- 20 * CAAM_CMD_SZ)
+ 21 * CAAM_CMD_SZ)
#define DESC_SKCIPHER_DEC_LEN (DESC_SKCIPHER_BASE + \
- 15 * CAAM_CMD_SZ)
+ 16 * CAAM_CMD_SZ)
void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
unsigned int icvsize, int era);
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index d290d6b41825..32f0f8a72067 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -4,7 +4,7 @@
* Based on caamalg.c
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2018 NXP
+ * Copyright 2016-2019 NXP
*/
#include "compat.h"
@@ -214,13 +214,11 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
goto badkey;
-#ifdef DEBUG
- dev_err(jrdev, "keylen %d enckeylen %d authkeylen %d\n",
+ dev_dbg(jrdev, "keylen %d enckeylen %d authkeylen %d\n",
keys.authkeylen + keys.enckeylen, keys.enckeylen,
keys.authkeylen);
- print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
/*
* If DKP is supported, use it in the shared descriptor to generate
@@ -237,7 +235,7 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
memcpy(ctx->key, keys.authkey, keys.authkeylen);
memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey,
keys.enckeylen);
- dma_sync_single_for_device(jrdev, ctx->key_dma,
+ dma_sync_single_for_device(jrdev->parent, ctx->key_dma,
ctx->adata.keylen_pad +
keys.enckeylen, ctx->dir);
goto skip_split_key;
@@ -251,8 +249,9 @@ static int aead_setkey(struct crypto_aead *aead, const u8 *key,
/* postpend encryption key to auth split key */
memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
- dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
- keys.enckeylen, ctx->dir);
+ dma_sync_single_for_device(jrdev->parent, ctx->key_dma,
+ ctx->adata.keylen_pad + keys.enckeylen,
+ ctx->dir);
#ifdef DEBUG
print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
@@ -386,13 +385,12 @@ static int gcm_setkey(struct crypto_aead *aead,
struct device *jrdev = ctx->jrdev;
int ret;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
- dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
+ dma_sync_single_for_device(jrdev->parent, ctx->key_dma, keylen,
+ ctx->dir);
ctx->cdata.keylen = keylen;
ret = gcm_set_sh_desc(aead);
@@ -485,10 +483,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
if (keylen < 4)
return -EINVAL;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
/*
@@ -496,8 +492,8 @@ static int rfc4106_setkey(struct crypto_aead *aead,
* in the nonce. Update the AES key length.
*/
ctx->cdata.keylen = keylen - 4;
- dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
- ctx->dir);
+ dma_sync_single_for_device(jrdev->parent, ctx->key_dma,
+ ctx->cdata.keylen, ctx->dir);
ret = rfc4106_set_sh_desc(aead);
if (ret)
@@ -589,10 +585,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
if (keylen < 4)
return -EINVAL;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
memcpy(ctx->key, key, keylen);
/*
@@ -600,8 +594,8 @@ static int rfc4543_setkey(struct crypto_aead *aead,
* in the nonce. Update the AES key length.
*/
ctx->cdata.keylen = keylen - 4;
- dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
- ctx->dir);
+ dma_sync_single_for_device(jrdev->parent, ctx->key_dma,
+ ctx->cdata.keylen, ctx->dir);
ret = rfc4543_set_sh_desc(aead);
if (ret)
@@ -644,10 +638,9 @@ static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
const bool is_rfc3686 = alg->caam.rfc3686;
int ret = 0;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
+ print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+
/*
* AES-CTR needs to load IV in CONTEXT1 reg
* at an offset of 128bits (16bytes)
@@ -838,7 +831,8 @@ static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx,
static void caam_unmap(struct device *dev, struct scatterlist *src,
struct scatterlist *dst, int src_nents,
int dst_nents, dma_addr_t iv_dma, int ivsize,
- dma_addr_t qm_sg_dma, int qm_sg_bytes)
+ enum dma_data_direction iv_dir, dma_addr_t qm_sg_dma,
+ int qm_sg_bytes)
{
if (dst != src) {
if (src_nents)
@@ -850,7 +844,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
}
if (iv_dma)
- dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
+ dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
if (qm_sg_bytes)
dma_unmap_single(dev, qm_sg_dma, qm_sg_bytes, DMA_TO_DEVICE);
}
@@ -863,7 +857,8 @@ static void aead_unmap(struct device *dev,
int ivsize = crypto_aead_ivsize(aead);
caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
- edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+ edesc->iv_dma, ivsize, DMA_TO_DEVICE, edesc->qm_sg_dma,
+ edesc->qm_sg_bytes);
dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
}
@@ -874,7 +869,8 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
int ivsize = crypto_skcipher_ivsize(skcipher);
caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
- edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+ edesc->iv_dma, ivsize, DMA_BIDIRECTIONAL, edesc->qm_sg_dma,
+ edesc->qm_sg_bytes);
}
static void aead_done(struct caam_drv_req *drv_req, u32 status)
@@ -924,6 +920,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
+ int src_len, dst_len = 0;
struct aead_edesc *edesc;
dma_addr_t qm_sg_dma, iv_dma = 0;
int ivsize = 0;
@@ -945,13 +942,13 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
}
if (likely(req->src == req->dst)) {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len = req->assoclen + req->cryptlen +
+ (encrypt ? authsize : 0);
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(qidev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len);
qi_cache_free(edesc);
return ERR_PTR(src_nents);
}
@@ -964,23 +961,21 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
return ERR_PTR(-ENOMEM);
}
} else {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen);
+ src_len = req->assoclen + req->cryptlen;
+ dst_len = src_len + (encrypt ? authsize : (-authsize));
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(qidev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen);
+ src_len);
qi_cache_free(edesc);
return ERR_PTR(src_nents);
}
- dst_nents = sg_nents_for_len(req->dst, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize :
- (-authsize)));
+ dst_nents = sg_nents_for_len(req->dst, dst_len);
if (unlikely(dst_nents < 0)) {
dev_err(qidev, "Insufficient bytes (%d) in dst S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : (-authsize)));
+ dst_len);
qi_cache_free(edesc);
return ERR_PTR(dst_nents);
}
@@ -1019,9 +1014,24 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
/*
* Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
* Input is not contiguous.
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries. Logic:
+ * if (src != dst && output S/G)
+ * pad output S/G, if needed
+ * else if (src == dst && S/G)
+ * overlapping S/Gs; pad one of them
+ * else if (input S/G) ...
+ * pad input S/G, if needed
*/
- qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
- (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+ qm_sg_ents = 1 + !!ivsize + mapped_src_nents;
+ if (mapped_dst_nents > 1)
+ qm_sg_ents += pad_sg_nents(mapped_dst_nents);
+ else if ((req->src == req->dst) && (mapped_src_nents > 1))
+ qm_sg_ents = max(pad_sg_nents(qm_sg_ents),
+ 1 + !!ivsize + pad_sg_nents(mapped_src_nents));
+ else
+ qm_sg_ents = pad_sg_nents(qm_sg_ents);
+
sg_table = &edesc->sgt[0];
qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
@@ -1029,7 +1039,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1044,7 +1054,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents,
- dst_nents, 0, 0, 0, 0);
+ dst_nents, 0, 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1063,7 +1073,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
dev_err(qidev, "unable to map assoclen\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1074,19 +1084,18 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dma_to_qm_sg_one(sg_table + qm_sg_index, iv_dma, ivsize, 0);
qm_sg_index++;
}
- sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + qm_sg_index, 0);
+ sg_to_qm_sg_last(req->src, src_len, sg_table + qm_sg_index, 0);
qm_sg_index += mapped_src_nents;
if (mapped_dst_nents > 1)
- sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
- qm_sg_index, 0);
+ sg_to_qm_sg_last(req->dst, dst_len, sg_table + qm_sg_index, 0);
qm_sg_dma = dma_map_single(qidev, sg_table, qm_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(qidev, qm_sg_dma)) {
dev_err(qidev, "unable to map S/G table\n");
dma_unmap_single(qidev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1109,7 +1118,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dma_to_qm_sg_one_ext(&fd_sgt[0], qm_sg_dma +
(1 + !!ivsize) * sizeof(*sg_table),
out_len, 0);
- } else if (mapped_dst_nents == 1) {
+ } else if (mapped_dst_nents <= 1) {
dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst), out_len,
0);
} else {
@@ -1182,33 +1191,28 @@ static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
struct device *qidev = caam_ctx->qidev;
int ivsize = crypto_skcipher_ivsize(skcipher);
-#ifdef DEBUG
- dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
-#endif
+ dev_dbg(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
edesc = container_of(drv_req, typeof(*edesc), drv_req);
if (status)
caam_jr_strstatus(qidev, status);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "dstiv @" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
- edesc->src_nents > 1 ? 100 : ivsize, 1);
- caam_dump_sg(KERN_ERR, "dst @" __stringify(__LINE__)": ",
+ print_hex_dump_debug("dstiv @" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
+ edesc->src_nents > 1 ? 100 : ivsize, 1);
+ caam_dump_sg("dst @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
-#endif
skcipher_unmap(qidev, edesc, req);
/*
* The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block. This is used e.g. by the CTS mode.
+ * ciphertext block (CBC mode) or last counter (CTR mode).
+ * This is used e.g. by the CTS mode.
*/
- if (edesc->drv_req.drv_ctx->op_type == ENCRYPT)
- scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen -
- ivsize, ivsize, 0);
+ memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
qi_cache_free(edesc);
skcipher_request_complete(req, status);
@@ -1276,14 +1280,26 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
qm_sg_ents = 1 + mapped_src_nents;
dst_sg_idx = qm_sg_ents;
- qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+ /*
+ * Input, output HW S/G tables: [IV, src][dst, IV]
+ * IV entries point to the same buffer
+ * If src == dst, S/G entries are reused (S/G tables overlap)
+ *
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries.
+ */
+ if (req->src != req->dst)
+ qm_sg_ents += pad_sg_nents(mapped_dst_nents + 1);
+ else
+ qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents);
+
qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes +
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -1292,7 +1308,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
if (unlikely(!edesc)) {
dev_err(qidev, "could not allocate extended descriptor\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -1301,11 +1317,11 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
iv = (u8 *)(sg_table + qm_sg_ents);
memcpy(iv, req->iv, ivsize);
- iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
+ iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(qidev, iv_dma)) {
dev_err(qidev, "unable to map IV\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1319,18 +1335,20 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
edesc->drv_req.drv_ctx = drv_ctx;
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
- sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
+ sg_to_qm_sg(req->src, req->cryptlen, sg_table + 1, 0);
- if (mapped_dst_nents > 1)
- sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
- dst_sg_idx, 0);
+ if (req->src != req->dst)
+ sg_to_qm_sg(req->dst, req->cryptlen, sg_table + dst_sg_idx, 0);
+
+ dma_to_qm_sg_one(sg_table + dst_sg_idx + mapped_dst_nents, iv_dma,
+ ivsize, 0);
edesc->qm_sg_dma = dma_map_single(qidev, sg_table, edesc->qm_sg_bytes,
DMA_TO_DEVICE);
if (dma_mapping_error(qidev, edesc->qm_sg_dma)) {
dev_err(qidev, "unable to map S/G table\n");
caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_BIDIRECTIONAL, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1340,16 +1358,14 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
ivsize + req->cryptlen, 0);
- if (req->src == req->dst) {
+ if (req->src == req->dst)
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
- sizeof(*sg_table), req->cryptlen, 0);
- } else if (mapped_dst_nents > 1) {
+ sizeof(*sg_table), req->cryptlen + ivsize,
+ 0);
+ else
dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
- sizeof(*sg_table), req->cryptlen, 0);
- } else {
- dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst),
- req->cryptlen, 0);
- }
+ sizeof(*sg_table), req->cryptlen + ivsize,
+ 0);
return edesc;
}
@@ -1359,7 +1375,6 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
struct skcipher_edesc *edesc;
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
- int ivsize = crypto_skcipher_ivsize(skcipher);
int ret;
if (unlikely(caam_congested))
@@ -1370,14 +1385,6 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
if (IS_ERR(edesc))
return PTR_ERR(edesc);
- /*
- * The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block.
- */
- if (!encrypt)
- scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen -
- ivsize, ivsize, 0);
-
ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
if (!ret) {
ret = -EINPROGRESS;
@@ -2382,6 +2389,7 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
bool uses_dkp)
{
struct caam_drv_private *priv;
+ struct device *dev;
/*
* distribute tfms across job rings to ensure in-order
@@ -2393,16 +2401,17 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
return PTR_ERR(ctx->jrdev);
}
- priv = dev_get_drvdata(ctx->jrdev->parent);
+ dev = ctx->jrdev->parent;
+ priv = dev_get_drvdata(dev);
if (priv->era >= 6 && uses_dkp)
ctx->dir = DMA_BIDIRECTIONAL;
else
ctx->dir = DMA_TO_DEVICE;
- ctx->key_dma = dma_map_single(ctx->jrdev, ctx->key, sizeof(ctx->key),
+ ctx->key_dma = dma_map_single(dev, ctx->key, sizeof(ctx->key),
ctx->dir);
- if (dma_mapping_error(ctx->jrdev, ctx->key_dma)) {
- dev_err(ctx->jrdev, "unable to map key\n");
+ if (dma_mapping_error(dev, ctx->key_dma)) {
+ dev_err(dev, "unable to map key\n");
caam_jr_free(ctx->jrdev);
return -ENOMEM;
}
@@ -2411,7 +2420,7 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam,
ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
- ctx->qidev = priv->qidev;
+ ctx->qidev = dev;
spin_lock_init(&ctx->lock);
ctx->drv_ctx[ENCRYPT] = NULL;
@@ -2445,7 +2454,8 @@ static void caam_exit_common(struct caam_ctx *ctx)
caam_drv_ctx_rel(ctx->drv_ctx[ENCRYPT]);
caam_drv_ctx_rel(ctx->drv_ctx[DECRYPT]);
- dma_unmap_single(ctx->jrdev, ctx->key_dma, sizeof(ctx->key), ctx->dir);
+ dma_unmap_single(ctx->jrdev->parent, ctx->key_dma, sizeof(ctx->key),
+ ctx->dir);
caam_jr_free(ctx->jrdev);
}
@@ -2460,7 +2470,7 @@ static void caam_aead_exit(struct crypto_aead *tfm)
caam_exit_common(crypto_aead_ctx(tfm));
}
-static void __exit caam_qi_algapi_exit(void)
+void caam_qi_algapi_exit(void)
{
int i;
@@ -2505,45 +2515,17 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
alg->exit = caam_aead_exit;
}
-static int __init caam_qi_algapi_init(void)
+int caam_qi_algapi_init(struct device *ctrldev)
{
- struct device_node *dev_node;
- struct platform_device *pdev;
- struct device *ctrldev;
- struct caam_drv_private *priv;
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
int i = 0, err = 0;
u32 aes_vid, aes_inst, des_inst, md_vid, md_inst;
unsigned int md_limit = SHA512_DIGEST_SIZE;
bool registered = false;
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
- if (!dev_node) {
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
- if (!dev_node)
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(dev_node);
- of_node_put(dev_node);
- if (!pdev)
- return -ENODEV;
-
- ctrldev = &pdev->dev;
- priv = dev_get_drvdata(ctrldev);
-
- /*
- * If priv is NULL, it's probably because the caam driver wasn't
- * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
- */
- if (!priv || !priv->qi_present) {
- err = -ENODEV;
- goto out_put_dev;
- }
-
if (caam_dpaa2) {
dev_info(ctrldev, "caam/qi frontend driver not suitable for DPAA 2.x, aborting...\n");
- err = -ENODEV;
- goto out_put_dev;
+ return -ENODEV;
}
/*
@@ -2598,7 +2580,7 @@ static int __init caam_qi_algapi_init(void)
err = crypto_register_skcipher(&t_alg->skcipher);
if (err) {
- dev_warn(priv->qidev, "%s alg registration failed\n",
+ dev_warn(ctrldev, "%s alg registration failed\n",
t_alg->skcipher.base.cra_driver_name);
continue;
}
@@ -2654,16 +2636,7 @@ static int __init caam_qi_algapi_init(void)
}
if (registered)
- dev_info(priv->qidev, "algorithms registered in /proc/crypto\n");
+ dev_info(ctrldev, "algorithms registered in /proc/crypto\n");
-out_put_dev:
- put_device(ctrldev);
return err;
}
-
-module_init(caam_qi_algapi_init);
-module_exit(caam_qi_algapi_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Support for crypto API using CAAM-QI backend");
-MODULE_AUTHOR("Freescale Semiconductor");
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 2b2980a8a9b9..06bf32c32cbd 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/*
* Copyright 2015-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2019 NXP
*/
#include "compat.h"
@@ -140,7 +140,8 @@ static struct caam_request *to_caam_req(struct crypto_async_request *areq)
static void caam_unmap(struct device *dev, struct scatterlist *src,
struct scatterlist *dst, int src_nents,
int dst_nents, dma_addr_t iv_dma, int ivsize,
- dma_addr_t qm_sg_dma, int qm_sg_bytes)
+ enum dma_data_direction iv_dir, dma_addr_t qm_sg_dma,
+ int qm_sg_bytes)
{
if (dst != src) {
if (src_nents)
@@ -152,7 +153,7 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
}
if (iv_dma)
- dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
+ dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
if (qm_sg_bytes)
dma_unmap_single(dev, qm_sg_dma, qm_sg_bytes, DMA_TO_DEVICE);
@@ -371,6 +372,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
+ int src_len, dst_len = 0;
struct aead_edesc *edesc;
dma_addr_t qm_sg_dma, iv_dma = 0;
int ivsize = 0;
@@ -387,23 +389,21 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
}
if (unlikely(req->dst != req->src)) {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen);
+ src_len = req->assoclen + req->cryptlen;
+ dst_len = src_len + (encrypt ? authsize : (-authsize));
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(dev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen);
+ src_len);
qi_cache_free(edesc);
return ERR_PTR(src_nents);
}
- dst_nents = sg_nents_for_len(req->dst, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize :
- (-authsize)));
+ dst_nents = sg_nents_for_len(req->dst, dst_len);
if (unlikely(dst_nents < 0)) {
dev_err(dev, "Insufficient bytes (%d) in dst S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : (-authsize)));
+ dst_len);
qi_cache_free(edesc);
return ERR_PTR(dst_nents);
}
@@ -434,13 +434,13 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
mapped_dst_nents = 0;
}
} else {
- src_nents = sg_nents_for_len(req->src, req->assoclen +
- req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len = req->assoclen + req->cryptlen +
+ (encrypt ? authsize : 0);
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (unlikely(src_nents < 0)) {
dev_err(dev, "Insufficient bytes (%d) in src S/G\n",
- req->assoclen + req->cryptlen +
- (encrypt ? authsize : 0));
+ src_len);
qi_cache_free(edesc);
return ERR_PTR(src_nents);
}
@@ -460,9 +460,25 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
/*
* Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
* Input is not contiguous.
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries. Logic:
+ * if (src != dst && output S/G)
+ * pad output S/G, if needed
+ * else if (src == dst && S/G)
+ * overlapping S/Gs; pad one of them
+ * else if (input S/G) ...
+ * pad input S/G, if needed
*/
- qm_sg_nents = 1 + !!ivsize + mapped_src_nents +
- (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+ qm_sg_nents = 1 + !!ivsize + mapped_src_nents;
+ if (mapped_dst_nents > 1)
+ qm_sg_nents += pad_sg_nents(mapped_dst_nents);
+ else if ((req->src == req->dst) && (mapped_src_nents > 1))
+ qm_sg_nents = max(pad_sg_nents(qm_sg_nents),
+ 1 + !!ivsize +
+ pad_sg_nents(mapped_src_nents));
+ else
+ qm_sg_nents = pad_sg_nents(qm_sg_nents);
+
sg_table = &edesc->sgt[0];
qm_sg_bytes = qm_sg_nents * sizeof(*sg_table);
if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
@@ -470,7 +486,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dev_err(dev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_nents, ivsize);
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -485,7 +501,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
if (dma_mapping_error(dev, iv_dma)) {
dev_err(dev, "unable to map IV\n");
caam_unmap(dev, req->src, req->dst, src_nents,
- dst_nents, 0, 0, 0, 0);
+ dst_nents, 0, 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -509,7 +525,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
if (dma_mapping_error(dev, edesc->assoclen_dma)) {
dev_err(dev, "unable to map assoclen\n");
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -520,19 +536,18 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dma_to_qm_sg_one(sg_table + qm_sg_index, iv_dma, ivsize, 0);
qm_sg_index++;
}
- sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + qm_sg_index, 0);
+ sg_to_qm_sg_last(req->src, src_len, sg_table + qm_sg_index, 0);
qm_sg_index += mapped_src_nents;
if (mapped_dst_nents > 1)
- sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
- qm_sg_index, 0);
+ sg_to_qm_sg_last(req->dst, dst_len, sg_table + qm_sg_index, 0);
qm_sg_dma = dma_map_single(dev, sg_table, qm_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(dev, qm_sg_dma)) {
dev_err(dev, "unable to map S/G table\n");
dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -559,6 +574,14 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dpaa2_fl_set_addr(out_fle, qm_sg_dma +
(1 + !!ivsize) * sizeof(*sg_table));
}
+ } else if (!mapped_dst_nents) {
+ /*
+ * crypto engine requires the output entry to be present when
+ * "frame list" FD is used.
+ * Since engine does not support FMT=2'b11 (unused entry type),
+ * leaving out_fle zeroized is the best option.
+ */
+ goto skip_out_fle;
} else if (mapped_dst_nents == 1) {
dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
dpaa2_fl_set_addr(out_fle, sg_dma_address(req->dst));
@@ -570,6 +593,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
dpaa2_fl_set_len(out_fle, out_len);
+skip_out_fle:
return edesc;
}
@@ -1077,14 +1101,26 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
qm_sg_ents = 1 + mapped_src_nents;
dst_sg_idx = qm_sg_ents;
- qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+ /*
+ * Input, output HW S/G tables: [IV, src][dst, IV]
+ * IV entries point to the same buffer
+ * If src == dst, S/G entries are reused (S/G tables overlap)
+ *
+ * HW reads 4 S/G entries at a time; make sure the reads don't go beyond
+ * the end of the table by allocating more S/G entries.
+ */
+ if (req->src != req->dst)
+ qm_sg_ents += pad_sg_nents(mapped_dst_nents + 1);
+ else
+ qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents);
+
qm_sg_bytes = qm_sg_ents * sizeof(struct dpaa2_sg_entry);
if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes +
ivsize > CAAM_QI_MEMCACHE_SIZE)) {
dev_err(dev, "No space for %d S/G entries and/or %dB IV\n",
qm_sg_ents, ivsize);
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -1093,7 +1129,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
if (unlikely(!edesc)) {
dev_err(dev, "could not allocate extended descriptor\n");
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -1102,11 +1138,11 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
iv = (u8 *)(sg_table + qm_sg_ents);
memcpy(iv, req->iv, ivsize);
- iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE);
+ iv_dma = dma_map_single(dev, iv, ivsize, DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, iv_dma)) {
dev_err(dev, "unable to map IV\n");
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0,
- 0, 0, 0);
+ 0, DMA_NONE, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1117,18 +1153,20 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
edesc->qm_sg_bytes = qm_sg_bytes;
dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
- sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
+ sg_to_qm_sg(req->src, req->cryptlen, sg_table + 1, 0);
- if (mapped_dst_nents > 1)
- sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
- dst_sg_idx, 0);
+ if (req->src != req->dst)
+ sg_to_qm_sg(req->dst, req->cryptlen, sg_table + dst_sg_idx, 0);
+
+ dma_to_qm_sg_one(sg_table + dst_sg_idx + mapped_dst_nents, iv_dma,
+ ivsize, 0);
edesc->qm_sg_dma = dma_map_single(dev, sg_table, edesc->qm_sg_bytes,
DMA_TO_DEVICE);
if (dma_mapping_error(dev, edesc->qm_sg_dma)) {
dev_err(dev, "unable to map S/G table\n");
caam_unmap(dev, req->src, req->dst, src_nents, dst_nents,
- iv_dma, ivsize, 0, 0);
+ iv_dma, ivsize, DMA_BIDIRECTIONAL, 0, 0);
qi_cache_free(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -1136,23 +1174,19 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
memset(&req_ctx->fd_flt, 0, sizeof(req_ctx->fd_flt));
dpaa2_fl_set_final(in_fle, true);
dpaa2_fl_set_len(in_fle, req->cryptlen + ivsize);
- dpaa2_fl_set_len(out_fle, req->cryptlen);
+ dpaa2_fl_set_len(out_fle, req->cryptlen + ivsize);
dpaa2_fl_set_format(in_fle, dpaa2_fl_sg);
dpaa2_fl_set_addr(in_fle, edesc->qm_sg_dma);
- if (req->src == req->dst) {
- dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+ dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+
+ if (req->src == req->dst)
dpaa2_fl_set_addr(out_fle, edesc->qm_sg_dma +
sizeof(*sg_table));
- } else if (mapped_dst_nents > 1) {
- dpaa2_fl_set_format(out_fle, dpaa2_fl_sg);
+ else
dpaa2_fl_set_addr(out_fle, edesc->qm_sg_dma + dst_sg_idx *
sizeof(*sg_table));
- } else {
- dpaa2_fl_set_format(out_fle, dpaa2_fl_single);
- dpaa2_fl_set_addr(out_fle, sg_dma_address(req->dst));
- }
return edesc;
}
@@ -1164,7 +1198,8 @@ static void aead_unmap(struct device *dev, struct aead_edesc *edesc,
int ivsize = crypto_aead_ivsize(aead);
caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
- edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+ edesc->iv_dma, ivsize, DMA_TO_DEVICE, edesc->qm_sg_dma,
+ edesc->qm_sg_bytes);
dma_unmap_single(dev, edesc->assoclen_dma, 4, DMA_TO_DEVICE);
}
@@ -1175,7 +1210,8 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc,
int ivsize = crypto_skcipher_ivsize(skcipher);
caam_unmap(dev, req->src, req->dst, edesc->src_nents, edesc->dst_nents,
- edesc->iv_dma, ivsize, edesc->qm_sg_dma, edesc->qm_sg_bytes);
+ edesc->iv_dma, ivsize, DMA_BIDIRECTIONAL, edesc->qm_sg_dma,
+ edesc->qm_sg_bytes);
}
static void aead_encrypt_done(void *cbk_ctx, u32 status)
@@ -1324,7 +1360,7 @@ static void skcipher_encrypt_done(void *cbk_ctx, u32 status)
print_hex_dump_debug("dstiv @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
edesc->src_nents > 1 ? 100 : ivsize, 1);
- caam_dump_sg(KERN_DEBUG, "dst @" __stringify(__LINE__)": ",
+ caam_dump_sg("dst @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
@@ -1332,10 +1368,10 @@ static void skcipher_encrypt_done(void *cbk_ctx, u32 status)
/*
* The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block. This is used e.g. by the CTS mode.
+ * ciphertext block (CBC mode) or last counter (CTR mode).
+ * This is used e.g. by the CTS mode.
*/
- scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen - ivsize,
- ivsize, 0);
+ memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
qi_cache_free(edesc);
skcipher_request_complete(req, ecode);
@@ -1362,11 +1398,19 @@ static void skcipher_decrypt_done(void *cbk_ctx, u32 status)
print_hex_dump_debug("dstiv @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
edesc->src_nents > 1 ? 100 : ivsize, 1);
- caam_dump_sg(KERN_DEBUG, "dst @" __stringify(__LINE__)": ",
+ caam_dump_sg("dst @" __stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
edesc->dst_nents > 1 ? 100 : req->cryptlen, 1);
skcipher_unmap(ctx->dev, edesc, req);
+
+ /*
+ * The crypto API expects us to set the IV (req->iv) to the last
+ * ciphertext block (CBC mode) or last counter (CTR mode).
+ * This is used e.g. by the CTS mode.
+ */
+ memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
+
qi_cache_free(edesc);
skcipher_request_complete(req, ecode);
}
@@ -1405,7 +1449,6 @@ static int skcipher_decrypt(struct skcipher_request *req)
struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
struct caam_request *caam_req = skcipher_request_ctx(req);
- int ivsize = crypto_skcipher_ivsize(skcipher);
int ret;
/* allocate extended descriptor */
@@ -1413,13 +1456,6 @@ static int skcipher_decrypt(struct skcipher_request *req)
if (IS_ERR(edesc))
return PTR_ERR(edesc);
- /*
- * The crypto API expects us to set the IV (req->iv) to the last
- * ciphertext block.
- */
- scatterwalk_map_and_copy(req->iv, req->src, req->cryptlen - ivsize,
- ivsize, 0);
-
caam_req->flc = &ctx->flc[DECRYPT];
caam_req->flc_dma = ctx->flc_dma[DECRYPT];
caam_req->cbk = skcipher_decrypt_done;
@@ -3380,9 +3416,9 @@ static int ahash_update_ctx(struct ahash_request *req)
if (to_hash) {
struct dpaa2_sg_entry *sg_table;
+ int src_len = req->nbytes - *next_buflen;
- src_nents = sg_nents_for_len(req->src,
- req->nbytes - (*next_buflen));
+ src_nents = sg_nents_for_len(req->src, src_len);
if (src_nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return src_nents;
@@ -3409,7 +3445,7 @@ static int ahash_update_ctx(struct ahash_request *req)
edesc->src_nents = src_nents;
qm_sg_src_index = 1 + (*buflen ? 1 : 0);
- qm_sg_bytes = (qm_sg_src_index + mapped_nents) *
+ qm_sg_bytes = pad_sg_nents(qm_sg_src_index + mapped_nents) *
sizeof(*sg_table);
sg_table = &edesc->sgt[0];
@@ -3423,7 +3459,7 @@ static int ahash_update_ctx(struct ahash_request *req)
goto unmap_ctx;
if (mapped_nents) {
- sg_to_qm_sg_last(req->src, mapped_nents,
+ sg_to_qm_sg_last(req->src, src_len,
sg_table + qm_sg_src_index, 0);
if (*next_buflen)
scatterwalk_map_and_copy(next_buf, req->src,
@@ -3494,7 +3530,7 @@ static int ahash_final_ctx(struct ahash_request *req)
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
int buflen = *current_buflen(state);
- int qm_sg_bytes, qm_sg_src_index;
+ int qm_sg_bytes;
int digestsize = crypto_ahash_digestsize(ahash);
struct ahash_edesc *edesc;
struct dpaa2_sg_entry *sg_table;
@@ -3505,8 +3541,7 @@ static int ahash_final_ctx(struct ahash_request *req)
if (!edesc)
return -ENOMEM;
- qm_sg_src_index = 1 + (buflen ? 1 : 0);
- qm_sg_bytes = qm_sg_src_index * sizeof(*sg_table);
+ qm_sg_bytes = pad_sg_nents(1 + (buflen ? 1 : 0)) * sizeof(*sg_table);
sg_table = &edesc->sgt[0];
ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
@@ -3518,7 +3553,7 @@ static int ahash_final_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- dpaa2_sg_set_final(sg_table + qm_sg_src_index - 1, true);
+ dpaa2_sg_set_final(sg_table + (buflen ? 1 : 0), true);
edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
DMA_TO_DEVICE);
@@ -3599,7 +3634,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
edesc->src_nents = src_nents;
qm_sg_src_index = 1 + (buflen ? 1 : 0);
- qm_sg_bytes = (qm_sg_src_index + mapped_nents) * sizeof(*sg_table);
+ qm_sg_bytes = pad_sg_nents(qm_sg_src_index + mapped_nents) *
+ sizeof(*sg_table);
sg_table = &edesc->sgt[0];
ret = ctx_map_to_qm_sg(ctx->dev, state, ctx->ctx_len, sg_table,
@@ -3611,7 +3647,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- sg_to_qm_sg_last(req->src, mapped_nents, sg_table + qm_sg_src_index, 0);
+ sg_to_qm_sg_last(req->src, req->nbytes, sg_table + qm_sg_src_index, 0);
edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
DMA_TO_DEVICE);
@@ -3696,8 +3732,8 @@ static int ahash_digest(struct ahash_request *req)
int qm_sg_bytes;
struct dpaa2_sg_entry *sg_table = &edesc->sgt[0];
- qm_sg_bytes = mapped_nents * sizeof(*sg_table);
- sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
+ qm_sg_bytes = pad_sg_nents(mapped_nents) * sizeof(*sg_table);
+ sg_to_qm_sg_last(req->src, req->nbytes, sg_table, 0);
edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
qm_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(ctx->dev, edesc->qm_sg_dma)) {
@@ -3840,9 +3876,9 @@ static int ahash_update_no_ctx(struct ahash_request *req)
if (to_hash) {
struct dpaa2_sg_entry *sg_table;
+ int src_len = req->nbytes - *next_buflen;
- src_nents = sg_nents_for_len(req->src,
- req->nbytes - *next_buflen);
+ src_nents = sg_nents_for_len(req->src, src_len);
if (src_nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return src_nents;
@@ -3868,14 +3904,15 @@ static int ahash_update_no_ctx(struct ahash_request *req)
}
edesc->src_nents = src_nents;
- qm_sg_bytes = (1 + mapped_nents) * sizeof(*sg_table);
+ qm_sg_bytes = pad_sg_nents(1 + mapped_nents) *
+ sizeof(*sg_table);
sg_table = &edesc->sgt[0];
ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
if (ret)
goto unmap_ctx;
- sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+ sg_to_qm_sg_last(req->src, src_len, sg_table + 1, 0);
if (*next_buflen)
scatterwalk_map_and_copy(next_buf, req->src,
@@ -3987,14 +4024,14 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
}
edesc->src_nents = src_nents;
- qm_sg_bytes = (2 + mapped_nents) * sizeof(*sg_table);
+ qm_sg_bytes = pad_sg_nents(2 + mapped_nents) * sizeof(*sg_table);
sg_table = &edesc->sgt[0];
ret = buf_map_to_qm_sg(ctx->dev, sg_table, state);
if (ret)
goto unmap;
- sg_to_qm_sg_last(req->src, mapped_nents, sg_table + 1, 0);
+ sg_to_qm_sg_last(req->src, req->nbytes, sg_table + 1, 0);
edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table, qm_sg_bytes,
DMA_TO_DEVICE);
@@ -4064,9 +4101,9 @@ static int ahash_update_first(struct ahash_request *req)
if (to_hash) {
struct dpaa2_sg_entry *sg_table;
+ int src_len = req->nbytes - *next_buflen;
- src_nents = sg_nents_for_len(req->src,
- req->nbytes - (*next_buflen));
+ src_nents = sg_nents_for_len(req->src, src_len);
if (src_nents < 0) {
dev_err(ctx->dev, "Invalid number of src SG.\n");
return src_nents;
@@ -4101,8 +4138,9 @@ static int ahash_update_first(struct ahash_request *req)
if (mapped_nents > 1) {
int qm_sg_bytes;
- sg_to_qm_sg_last(req->src, mapped_nents, sg_table, 0);
- qm_sg_bytes = mapped_nents * sizeof(*sg_table);
+ sg_to_qm_sg_last(req->src, src_len, sg_table, 0);
+ qm_sg_bytes = pad_sg_nents(mapped_nents) *
+ sizeof(*sg_table);
edesc->qm_sg_dma = dma_map_single(ctx->dev, sg_table,
qm_sg_bytes,
DMA_TO_DEVICE);
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 7205d9f4029e..e4ac5d591ad6 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -82,14 +82,6 @@
#define HASH_MSG_LEN 8
#define MAX_CTX_LEN (HASH_MSG_LEN + SHA512_DIGEST_SIZE)
-#ifdef DEBUG
-/* for print_hex_dumps with line references */
-#define debug(format, arg...) printk(format, arg)
-#else
-#define debug(format, arg...)
-#endif
-
-
static struct list_head hash_list;
/* ahash per-session context */
@@ -243,11 +235,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, true, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
desc_bytes(desc), ctx->dir);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "ahash update shdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+
+ print_hex_dump_debug("ahash update shdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
/* ahash_update_first shared descriptor */
desc = ctx->sh_desc_update_first;
@@ -255,11 +246,9 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, false, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
desc_bytes(desc), ctx->dir);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "ahash update first shdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("ahash update first shdesc@"__stringify(__LINE__)
+ ": ", DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
/* ahash_final shared descriptor */
desc = ctx->sh_desc_fin;
@@ -267,11 +256,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, true, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
desc_bytes(desc), ctx->dir);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ahash final shdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
+
+ print_hex_dump_debug("ahash final shdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
/* ahash_digest shared descriptor */
desc = ctx->sh_desc_digest;
@@ -279,12 +267,10 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, false, ctrlpriv->era);
dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
desc_bytes(desc), ctx->dir);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "ahash digest shdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
+
+ print_hex_dump_debug("ahash digest shdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
return 0;
}
@@ -328,9 +314,9 @@ static int axcbc_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, ctx->key_dma);
dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
desc_bytes(desc), ctx->dir);
- print_hex_dump_debug("axcbc update first shdesc@" __stringify(__LINE__)" : ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
- 1);
+ print_hex_dump_debug("axcbc update first shdesc@" __stringify(__LINE__)
+ " : ", DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
/* shared descriptor for ahash_digest */
desc = ctx->sh_desc_digest;
@@ -377,8 +363,8 @@ static int acmac_set_sh_desc(struct crypto_ahash *ahash)
ctx->ctx_len, 0);
dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
desc_bytes(desc), ctx->dir);
- print_hex_dump_debug("acmac update first shdesc@" __stringify(__LINE__)" : ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ print_hex_dump_debug("acmac update first shdesc@" __stringify(__LINE__)
+ " : ", DUMP_PREFIX_ADDRESS, 16, 4, desc,
desc_bytes(desc), 1);
/* shared descriptor for ahash_digest */
@@ -429,12 +415,11 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
LDST_SRCDST_BYTE_CONTEXT);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "key_in@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, *keylen, 1);
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("key_in@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key, *keylen, 1);
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
result.err = 0;
init_completion(&result.completion);
@@ -444,11 +429,10 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
/* in progress */
wait_for_completion(&result.completion);
ret = result.err;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR,
- "digested key@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key, digestsize, 1);
-#endif
+
+ print_hex_dump_debug("digested key@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key,
+ digestsize, 1);
}
dma_unmap_single(jrdev, key_dma, *keylen, DMA_BIDIRECTIONAL);
@@ -463,15 +447,14 @@ static int ahash_setkey(struct crypto_ahash *ahash,
const u8 *key, unsigned int keylen)
{
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+ struct device *jrdev = ctx->jrdev;
int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
int digestsize = crypto_ahash_digestsize(ahash);
struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
int ret;
u8 *hashed_key = NULL;
-#ifdef DEBUG
- printk(KERN_ERR "keylen %d\n", keylen);
-#endif
+ dev_dbg(jrdev, "keylen %d\n", keylen);
if (keylen > blocksize) {
hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
@@ -600,11 +583,9 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
int digestsize = crypto_ahash_digestsize(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
-#ifdef DEBUG
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
@@ -614,11 +595,9 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
memcpy(req->result, state->caam_ctx, digestsize);
kfree(edesc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
-#endif
+ print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+ ctx->ctx_len, 1);
req->base.complete(&req->base, err);
}
@@ -631,11 +610,9 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
-#ifdef DEBUG
int digestsize = crypto_ahash_digestsize(ahash);
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
@@ -645,15 +622,13 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
switch_buf(state);
kfree(edesc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
+ print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+ ctx->ctx_len, 1);
if (req->result)
- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->result,
- digestsize, 1);
-#endif
+ print_hex_dump_debug("result@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+ digestsize, 1);
req->base.complete(&req->base, err);
}
@@ -666,11 +641,9 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
int digestsize = crypto_ahash_digestsize(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
-#ifdef DEBUG
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
@@ -680,11 +653,9 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
memcpy(req->result, state->caam_ctx, digestsize);
kfree(edesc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
-#endif
+ print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+ ctx->ctx_len, 1);
req->base.complete(&req->base, err);
}
@@ -697,11 +668,9 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
struct caam_hash_state *state = ahash_request_ctx(req);
-#ifdef DEBUG
int digestsize = crypto_ahash_digestsize(ahash);
- dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
@@ -711,15 +680,13 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
switch_buf(state);
kfree(edesc);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
- ctx->ctx_len, 1);
+ print_hex_dump_debug("ctx@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
+ ctx->ctx_len, 1);
if (req->result)
- print_hex_dump(KERN_ERR, "result@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, req->result,
- digestsize, 1);
-#endif
+ print_hex_dump_debug("result@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, req->result,
+ digestsize, 1);
req->base.complete(&req->base, err);
}
@@ -759,9 +726,10 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
if (nents > 1 || first_sg) {
struct sec4_sg_entry *sg = edesc->sec4_sg;
- unsigned int sgsize = sizeof(*sg) * (first_sg + nents);
+ unsigned int sgsize = sizeof(*sg) *
+ pad_sg_nents(first_sg + nents);
- sg_to_sec4_sg_last(req->src, nents, sg + first_sg, 0);
+ sg_to_sec4_sg_last(req->src, to_hash, sg + first_sg, 0);
src_dma = dma_map_single(ctx->jrdev, sg, sgsize, DMA_TO_DEVICE);
if (dma_mapping_error(ctx->jrdev, src_dma)) {
@@ -819,8 +787,10 @@ static int ahash_update_ctx(struct ahash_request *req)
}
if (to_hash) {
- src_nents = sg_nents_for_len(req->src,
- req->nbytes - (*next_buflen));
+ int pad_nents;
+ int src_len = req->nbytes - *next_buflen;
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (src_nents < 0) {
dev_err(jrdev, "Invalid number of src SG.\n");
return src_nents;
@@ -838,15 +808,14 @@ static int ahash_update_ctx(struct ahash_request *req)
}
sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
- sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
- sizeof(struct sec4_sg_entry);
+ pad_nents = pad_sg_nents(sec4_sg_src_index + mapped_nents);
+ sec4_sg_bytes = pad_nents * sizeof(struct sec4_sg_entry);
/*
* allocate space for base edesc and hw desc commands,
* link tables
*/
- edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
- ctx->sh_desc_update,
+ edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update,
ctx->sh_desc_update_dma, flags);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
@@ -866,7 +835,7 @@ static int ahash_update_ctx(struct ahash_request *req)
goto unmap_ctx;
if (mapped_nents)
- sg_to_sec4_sg_last(req->src, mapped_nents,
+ sg_to_sec4_sg_last(req->src, src_len,
edesc->sec4_sg + sec4_sg_src_index,
0);
else
@@ -893,11 +862,9 @@ static int ahash_update_ctx(struct ahash_request *req)
append_seq_out_ptr(desc, state->ctx_dma, ctx->ctx_len, 0);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
if (ret)
@@ -910,13 +877,12 @@ static int ahash_update_ctx(struct ahash_request *req)
*buflen = *next_buflen;
*next_buflen = last_buflen;
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "buf@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
- *next_buflen, 1);
-#endif
+
+ print_hex_dump_debug("buf@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+ print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
+ *next_buflen, 1);
return ret;
unmap_ctx:
@@ -935,18 +901,17 @@ static int ahash_final_ctx(struct ahash_request *req)
GFP_KERNEL : GFP_ATOMIC;
int buflen = *current_buflen(state);
u32 *desc;
- int sec4_sg_bytes, sec4_sg_src_index;
+ int sec4_sg_bytes;
int digestsize = crypto_ahash_digestsize(ahash);
struct ahash_edesc *edesc;
int ret;
- sec4_sg_src_index = 1 + (buflen ? 1 : 0);
- sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry);
+ sec4_sg_bytes = pad_sg_nents(1 + (buflen ? 1 : 0)) *
+ sizeof(struct sec4_sg_entry);
/* allocate space for base edesc and hw desc commands, link tables */
- edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index,
- ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
- flags);
+ edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin,
+ ctx->sh_desc_fin_dma, flags);
if (!edesc)
return -ENOMEM;
@@ -963,7 +928,7 @@ static int ahash_final_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1);
+ sg_to_sec4_set_last(edesc->sec4_sg + (buflen ? 1 : 0));
edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
sec4_sg_bytes, DMA_TO_DEVICE);
@@ -977,10 +942,9 @@ static int ahash_final_ctx(struct ahash_request *req)
LDST_SGF);
append_seq_out_ptr(desc, state->ctx_dma, digestsize, 0);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
if (ret)
@@ -1058,10 +1022,9 @@ static int ahash_finup_ctx(struct ahash_request *req)
append_seq_out_ptr(desc, state->ctx_dma, digestsize, 0);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
if (ret)
@@ -1135,10 +1098,9 @@ static int ahash_digest(struct ahash_request *req)
return -ENOMEM;
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
if (!ret) {
@@ -1190,10 +1152,9 @@ static int ahash_final_no_ctx(struct ahash_request *req)
if (ret)
goto unmap;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
if (!ret) {
@@ -1246,8 +1207,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
}
if (to_hash) {
- src_nents = sg_nents_for_len(req->src,
- req->nbytes - *next_buflen);
+ int pad_nents;
+ int src_len = req->nbytes - *next_buflen;
+
+ src_nents = sg_nents_for_len(req->src, src_len);
if (src_nents < 0) {
dev_err(jrdev, "Invalid number of src SG.\n");
return src_nents;
@@ -1264,14 +1227,14 @@ static int ahash_update_no_ctx(struct ahash_request *req)
mapped_nents = 0;
}
- sec4_sg_bytes = (1 + mapped_nents) *
- sizeof(struct sec4_sg_entry);
+ pad_nents = pad_sg_nents(1 + mapped_nents);
+ sec4_sg_bytes = pad_nents * sizeof(struct sec4_sg_entry);
/*
* allocate space for base edesc and hw desc commands,
* link tables
*/
- edesc = ahash_edesc_alloc(ctx, 1 + mapped_nents,
+ edesc = ahash_edesc_alloc(ctx, pad_nents,
ctx->sh_desc_update_first,
ctx->sh_desc_update_first_dma,
flags);
@@ -1287,8 +1250,7 @@ static int ahash_update_no_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
- sg_to_sec4_sg_last(req->src, mapped_nents,
- edesc->sec4_sg + 1, 0);
+ sg_to_sec4_sg_last(req->src, src_len, edesc->sec4_sg + 1, 0);
if (*next_buflen) {
scatterwalk_map_and_copy(next_buf, req->src,
@@ -1313,11 +1275,9 @@ static int ahash_update_no_ctx(struct ahash_request *req)
if (ret)
goto unmap_ctx;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
if (ret)
@@ -1333,13 +1293,12 @@ static int ahash_update_no_ctx(struct ahash_request *req)
*buflen = *next_buflen;
*next_buflen = 0;
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "buf@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
- *next_buflen, 1);
-#endif
+
+ print_hex_dump_debug("buf@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, buf, *buflen, 1);
+ print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
+ 1);
return ret;
unmap_ctx:
@@ -1414,10 +1373,9 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
if (ret)
goto unmap;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done, req);
if (!ret) {
@@ -1517,11 +1475,9 @@ static int ahash_update_first(struct ahash_request *req)
if (ret)
goto unmap_ctx;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc,
+ desc_bytes(desc), 1);
ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
if (ret)
@@ -1539,11 +1495,10 @@ static int ahash_update_first(struct ahash_request *req)
req->nbytes, 0);
switch_buf(state);
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "next buf@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, next_buf,
- *next_buflen, 1);
-#endif
+
+ print_hex_dump_debug("next buf@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, next_buf, *next_buflen,
+ 1);
return ret;
unmap_ctx:
@@ -1930,7 +1885,7 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
caam_jr_free(ctx->jrdev);
}
-static void __exit caam_algapi_hash_exit(void)
+void caam_algapi_hash_exit(void)
{
struct caam_hash_alg *t_alg, *n;
@@ -1988,40 +1943,13 @@ caam_hash_alloc(struct caam_hash_template *template,
return t_alg;
}
-static int __init caam_algapi_hash_init(void)
+int caam_algapi_hash_init(struct device *ctrldev)
{
- struct device_node *dev_node;
- struct platform_device *pdev;
int i = 0, err = 0;
- struct caam_drv_private *priv;
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
unsigned int md_limit = SHA512_DIGEST_SIZE;
u32 md_inst, md_vid;
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
- if (!dev_node) {
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
- if (!dev_node)
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(dev_node);
- if (!pdev) {
- of_node_put(dev_node);
- return -ENODEV;
- }
-
- priv = dev_get_drvdata(&pdev->dev);
- of_node_put(dev_node);
-
- /*
- * If priv is NULL, it's probably because the caam driver wasn't
- * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
- */
- if (!priv) {
- err = -ENODEV;
- goto out_put_dev;
- }
-
/*
* Register crypto algorithms the device supports. First, identify
* presence and attributes of MD block.
@@ -2042,10 +1970,8 @@ static int __init caam_algapi_hash_init(void)
* Skip registration of any hashing algorithms if MD block
* is not present.
*/
- if (!md_inst) {
- err = -ENODEV;
- goto out_put_dev;
- }
+ if (!md_inst)
+ return -ENODEV;
/* Limit digest size based on LP256 */
if (md_vid == CHA_VER_VID_MD_LP256)
@@ -2102,14 +2028,5 @@ static int __init caam_algapi_hash_init(void)
list_add_tail(&t_alg->entry, &hash_list);
}
-out_put_dev:
- put_device(&pdev->dev);
return err;
}
-
-module_init(caam_algapi_hash_init);
-module_exit(caam_algapi_hash_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FSL CAAM support for ahash functions of crypto API");
-MODULE_AUTHOR("Freescale Semiconductor - NMG");
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index fe24485274e1..80574106af29 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -3,7 +3,7 @@
* caam - Freescale FSL CAAM support for Public Key Cryptography
*
* Copyright 2016 Freescale Semiconductor, Inc.
- * Copyright 2018 NXP
+ * Copyright 2018-2019 NXP
*
* There is no Shared Descriptor for PKC so that the Job Descriptor must carry
* all the desired key parameters, input and output pointers.
@@ -24,12 +24,18 @@
sizeof(struct rsa_priv_f2_pdb))
#define DESC_RSA_PRIV_F3_LEN (2 * CAAM_CMD_SZ + \
sizeof(struct rsa_priv_f3_pdb))
+#define CAAM_RSA_MAX_INPUT_SIZE 512 /* for a 4096-bit modulus */
+
+/* buffer filled with zeros, used for padding */
+static u8 *zero_buffer;
static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
struct akcipher_request *req)
{
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+
dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE);
- dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
+ dma_unmap_sg(dev, req_ctx->fixup_src, edesc->src_nents, DMA_TO_DEVICE);
if (edesc->sec4_sg_bytes)
dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes,
@@ -168,6 +174,13 @@ static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
akcipher_request_complete(req, err);
}
+/**
+ * Count leading zeros, need it to strip, from a given scatterlist
+ *
+ * @sgl : scatterlist to count zeros from
+ * @nbytes: number of zeros, in bytes, to strip
+ * @flags : operation flags
+ */
static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
unsigned int nbytes,
unsigned int flags)
@@ -187,7 +200,8 @@ static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
lzeros = 0;
len = 0;
while (nbytes > 0) {
- while (len && !*buff) {
+ /* do not strip more than given bytes */
+ while (len && !*buff && lzeros < nbytes) {
lzeros++;
len--;
buff++;
@@ -218,6 +232,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct device *dev = ctx->dev;
struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+ struct caam_rsa_key *key = &ctx->key;
struct rsa_edesc *edesc;
gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
GFP_KERNEL : GFP_ATOMIC;
@@ -225,22 +240,45 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
int sgc;
int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
int src_nents, dst_nents;
+ unsigned int diff_size = 0;
int lzeros;
- lzeros = caam_rsa_count_leading_zeros(req->src, req->src_len, sg_flags);
- if (lzeros < 0)
- return ERR_PTR(lzeros);
-
- req->src_len -= lzeros;
- req->src = scatterwalk_ffwd(req_ctx->src, req->src, lzeros);
+ if (req->src_len > key->n_sz) {
+ /*
+ * strip leading zeros and
+ * return the number of zeros to skip
+ */
+ lzeros = caam_rsa_count_leading_zeros(req->src, req->src_len -
+ key->n_sz, sg_flags);
+ if (lzeros < 0)
+ return ERR_PTR(lzeros);
+
+ req_ctx->fixup_src = scatterwalk_ffwd(req_ctx->src, req->src,
+ lzeros);
+ req_ctx->fixup_src_len = req->src_len - lzeros;
+ } else {
+ /*
+ * input src is less then n key modulus,
+ * so there will be zero padding
+ */
+ diff_size = key->n_sz - req->src_len;
+ req_ctx->fixup_src = req->src;
+ req_ctx->fixup_src_len = req->src_len;
+ }
- src_nents = sg_nents_for_len(req->src, req->src_len);
+ src_nents = sg_nents_for_len(req_ctx->fixup_src,
+ req_ctx->fixup_src_len);
dst_nents = sg_nents_for_len(req->dst, req->dst_len);
- if (src_nents > 1)
- sec4_sg_len = src_nents;
+ if (!diff_size && src_nents == 1)
+ sec4_sg_len = 0; /* no need for an input hw s/g table */
+ else
+ sec4_sg_len = src_nents + !!diff_size;
+ sec4_sg_index = sec4_sg_len;
if (dst_nents > 1)
- sec4_sg_len += dst_nents;
+ sec4_sg_len += pad_sg_nents(dst_nents);
+ else
+ sec4_sg_len = pad_sg_nents(sec4_sg_len);
sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
@@ -250,7 +288,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
if (!edesc)
return ERR_PTR(-ENOMEM);
- sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+ sgc = dma_map_sg(dev, req_ctx->fixup_src, src_nents, DMA_TO_DEVICE);
if (unlikely(!sgc)) {
dev_err(dev, "unable to map source\n");
goto src_fail;
@@ -263,14 +301,16 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
}
edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen;
+ if (diff_size)
+ dma_to_sec4_sg_one(edesc->sec4_sg, ctx->padding_dma, diff_size,
+ 0);
+
+ if (sec4_sg_index)
+ sg_to_sec4_sg_last(req_ctx->fixup_src, req_ctx->fixup_src_len,
+ edesc->sec4_sg + !!diff_size, 0);
- sec4_sg_index = 0;
- if (src_nents > 1) {
- sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
- sec4_sg_index += src_nents;
- }
if (dst_nents > 1)
- sg_to_sec4_sg_last(req->dst, dst_nents,
+ sg_to_sec4_sg_last(req->dst, req->dst_len,
edesc->sec4_sg + sec4_sg_index, 0);
/* Save nents for later use in Job Descriptor */
@@ -289,12 +329,16 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
edesc->sec4_sg_bytes = sec4_sg_bytes;
+ print_hex_dump_debug("caampkc sec4_sg@" __stringify(__LINE__) ": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, edesc->sec4_sg,
+ edesc->sec4_sg_bytes, 1);
+
return edesc;
sec4_sg_fail:
dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE);
dst_fail:
- dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE);
+ dma_unmap_sg(dev, req_ctx->fixup_src, src_nents, DMA_TO_DEVICE);
src_fail:
kfree(edesc);
return ERR_PTR(-ENOMEM);
@@ -304,6 +348,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req,
struct rsa_edesc *edesc)
{
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct caam_rsa_key *key = &ctx->key;
struct device *dev = ctx->dev;
@@ -328,7 +373,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req,
pdb->f_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->src_nents;
} else {
- pdb->f_dma = sg_dma_address(req->src);
+ pdb->f_dma = sg_dma_address(req_ctx->fixup_src);
}
if (edesc->dst_nents > 1) {
@@ -340,7 +385,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req,
}
pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz;
- pdb->f_len = req->src_len;
+ pdb->f_len = req_ctx->fixup_src_len;
return 0;
}
@@ -373,7 +418,9 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
pdb->g_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->src_nents;
} else {
- pdb->g_dma = sg_dma_address(req->src);
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+
+ pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
}
if (edesc->dst_nents > 1) {
@@ -436,7 +483,9 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
pdb->g_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->src_nents;
} else {
- pdb->g_dma = sg_dma_address(req->src);
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+
+ pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
}
if (edesc->dst_nents > 1) {
@@ -523,7 +572,9 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
pdb->g_dma = edesc->sec4_sg_dma;
sec4_sg_index += edesc->src_nents;
} else {
- pdb->g_dma = sg_dma_address(req->src);
+ struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+
+ pdb->g_dma = sg_dma_address(req_ctx->fixup_src);
}
if (edesc->dst_nents > 1) {
@@ -978,6 +1029,15 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
return PTR_ERR(ctx->dev);
}
+ ctx->padding_dma = dma_map_single(ctx->dev, zero_buffer,
+ CAAM_RSA_MAX_INPUT_SIZE - 1,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ctx->dev, ctx->padding_dma)) {
+ dev_err(ctx->dev, "unable to map padding\n");
+ caam_jr_free(ctx->dev);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -987,6 +1047,8 @@ static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm)
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct caam_rsa_key *key = &ctx->key;
+ dma_unmap_single(ctx->dev, ctx->padding_dma, CAAM_RSA_MAX_INPUT_SIZE -
+ 1, DMA_TO_DEVICE);
caam_rsa_free_key(key);
caam_jr_free(ctx->dev);
}
@@ -1010,41 +1072,12 @@ static struct akcipher_alg caam_rsa = {
};
/* Public Key Cryptography module initialization handler */
-static int __init caam_pkc_init(void)
+int caam_pkc_init(struct device *ctrldev)
{
- struct device_node *dev_node;
- struct platform_device *pdev;
- struct device *ctrldev;
- struct caam_drv_private *priv;
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
u32 pk_inst;
int err;
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
- if (!dev_node) {
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
- if (!dev_node)
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(dev_node);
- if (!pdev) {
- of_node_put(dev_node);
- return -ENODEV;
- }
-
- ctrldev = &pdev->dev;
- priv = dev_get_drvdata(ctrldev);
- of_node_put(dev_node);
-
- /*
- * If priv is NULL, it's probably because the caam driver wasn't
- * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
- */
- if (!priv) {
- err = -ENODEV;
- goto out_put_dev;
- }
-
/* Determine public key hardware accelerator presence. */
if (priv->era < 10)
pk_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) &
@@ -1053,31 +1086,29 @@ static int __init caam_pkc_init(void)
pk_inst = rd_reg32(&priv->ctrl->vreg.pkha) & CHA_VER_NUM_MASK;
/* Do not register algorithms if PKHA is not present. */
- if (!pk_inst) {
- err = -ENODEV;
- goto out_put_dev;
- }
+ if (!pk_inst)
+ return 0;
+
+ /* allocate zero buffer, used for padding input */
+ zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_DMA |
+ GFP_KERNEL);
+ if (!zero_buffer)
+ return -ENOMEM;
err = crypto_register_akcipher(&caam_rsa);
- if (err)
+ if (err) {
+ kfree(zero_buffer);
dev_warn(ctrldev, "%s alg registration failed\n",
caam_rsa.base.cra_driver_name);
- else
+ } else {
dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n");
+ }
-out_put_dev:
- put_device(ctrldev);
return err;
}
-static void __exit caam_pkc_exit(void)
+void caam_pkc_exit(void)
{
+ kfree(zero_buffer);
crypto_unregister_akcipher(&caam_rsa);
}
-
-module_init(caam_pkc_init);
-module_exit(caam_pkc_exit);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API");
-MODULE_AUTHOR("Freescale Semiconductor");
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index 82645bcf8b27..2c488c9a3812 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -89,18 +89,25 @@ struct caam_rsa_key {
* caam_rsa_ctx - per session context.
* @key : RSA key in DMA zone
* @dev : device structure
+ * @padding_dma : dma address of padding, for adding it to the input
*/
struct caam_rsa_ctx {
struct caam_rsa_key key;
struct device *dev;
+ dma_addr_t padding_dma;
+
};
/**
* caam_rsa_req_ctx - per request context.
- * @src: input scatterlist (stripped of leading zeros)
+ * @src : input scatterlist (stripped of leading zeros)
+ * @fixup_src : input scatterlist (that might be stripped of leading zeros)
+ * @fixup_src_len : length of the fixup_src input scatterlist
*/
struct caam_rsa_req_ctx {
struct scatterlist src[2];
+ struct scatterlist *fixup_src;
+ unsigned int fixup_src_len;
};
/**
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 95eb5402c59f..561bcb535184 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -3,7 +3,7 @@
* caam - Freescale FSL CAAM support for hw_random
*
* Copyright 2011 Freescale Semiconductor, Inc.
- * Copyright 2018 NXP
+ * Copyright 2018-2019 NXP
*
* Based on caamalg.c crypto API driver.
*
@@ -113,10 +113,8 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
/* Buffer refilled, invalidate cache */
dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "rng refreshed buf@: ",
- DUMP_PREFIX_ADDRESS, 16, 4, bd->buf, RN_BUF_SIZE, 1);
-#endif
+ print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ bd->buf, RN_BUF_SIZE, 1);
}
static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
@@ -209,10 +207,10 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
dev_err(jrdev, "unable to map shared descriptor\n");
return -ENOMEM;
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
-#endif
+
+ print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ desc, desc_bytes(desc), 1);
+
return 0;
}
@@ -233,10 +231,10 @@ static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
}
append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
-#endif
+
+ print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ desc, desc_bytes(desc), 1);
+
return 0;
}
@@ -296,47 +294,20 @@ static struct hwrng caam_rng = {
.read = caam_read,
};
-static void __exit caam_rng_exit(void)
+void caam_rng_exit(void)
{
caam_jr_free(rng_ctx->jrdev);
hwrng_unregister(&caam_rng);
kfree(rng_ctx);
}
-static int __init caam_rng_init(void)
+int caam_rng_init(struct device *ctrldev)
{
struct device *dev;
- struct device_node *dev_node;
- struct platform_device *pdev;
- struct caam_drv_private *priv;
u32 rng_inst;
+ struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
int err;
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
- if (!dev_node) {
- dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
- if (!dev_node)
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(dev_node);
- if (!pdev) {
- of_node_put(dev_node);
- return -ENODEV;
- }
-
- priv = dev_get_drvdata(&pdev->dev);
- of_node_put(dev_node);
-
- /*
- * If priv is NULL, it's probably because the caam driver wasn't
- * properly initialized (e.g. RNG4 init failed). Thus, bail out here.
- */
- if (!priv) {
- err = -ENODEV;
- goto out_put_dev;
- }
-
/* Check for an instantiated RNG before registration */
if (priv->era < 10)
rng_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) &
@@ -344,16 +315,13 @@ static int __init caam_rng_init(void)
else
rng_inst = rd_reg32(&priv->ctrl->vreg.rng) & CHA_VER_NUM_MASK;
- if (!rng_inst) {
- err = -ENODEV;
- goto out_put_dev;
- }
+ if (!rng_inst)
+ return 0;
dev = caam_jr_alloc();
if (IS_ERR(dev)) {
pr_err("Job Ring Device allocation for transform failed\n");
- err = PTR_ERR(dev);
- goto out_put_dev;
+ return PTR_ERR(dev);
}
rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
if (!rng_ctx) {
@@ -364,7 +332,6 @@ static int __init caam_rng_init(void)
if (err)
goto free_rng_ctx;
- put_device(&pdev->dev);
dev_info(dev, "registering rng-caam\n");
return hwrng_register(&caam_rng);
@@ -372,14 +339,5 @@ free_rng_ctx:
kfree(rng_ctx);
free_caam_alloc:
caam_jr_free(dev);
-out_put_dev:
- put_device(&pdev->dev);
return err;
}
-
-module_init(caam_rng_init);
-module_exit(caam_rng_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FSL CAAM support for hw_random API");
-MODULE_AUTHOR("Freescale Semiconductor - NMG");
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index fec39c35c877..4e43ca4d3656 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -3,7 +3,7 @@
* Controller-level driver, kernel property detection, initialization
*
* Copyright 2008-2012 Freescale Semiconductor, Inc.
- * Copyright 2018 NXP
+ * Copyright 2018-2019 NXP
*/
#include <linux/device.h>
@@ -323,8 +323,8 @@ static int caam_remove(struct platform_device *pdev)
of_platform_depopulate(ctrldev);
#ifdef CONFIG_CAAM_QI
- if (ctrlpriv->qidev)
- caam_qi_shutdown(ctrlpriv->qidev);
+ if (ctrlpriv->qi_init)
+ caam_qi_shutdown(ctrldev);
#endif
/*
@@ -540,7 +540,8 @@ static int caam_probe(struct platform_device *pdev)
ctrlpriv->caam_ipg = clk;
if (!of_machine_is_compatible("fsl,imx7d") &&
- !of_machine_is_compatible("fsl,imx7s")) {
+ !of_machine_is_compatible("fsl,imx7s") &&
+ !of_machine_is_compatible("fsl,imx7ulp")) {
clk = caam_drv_identify_clk(&pdev->dev, "mem");
if (IS_ERR(clk)) {
ret = PTR_ERR(clk);
@@ -562,7 +563,8 @@ static int caam_probe(struct platform_device *pdev)
if (!of_machine_is_compatible("fsl,imx6ul") &&
!of_machine_is_compatible("fsl,imx7d") &&
- !of_machine_is_compatible("fsl,imx7s")) {
+ !of_machine_is_compatible("fsl,imx7s") &&
+ !of_machine_is_compatible("fsl,imx7ulp")) {
clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
if (IS_ERR(clk)) {
ret = PTR_ERR(clk);
@@ -702,12 +704,7 @@ static int caam_probe(struct platform_device *pdev)
}
ctrlpriv->era = caam_get_era(ctrl);
-
- ret = of_platform_populate(nprop, caam_match, NULL, dev);
- if (ret) {
- dev_err(dev, "JR platform devices creation error\n");
- goto iounmap_ctrl;
- }
+ ctrlpriv->domain = iommu_get_domain_for_dev(dev);
#ifdef CONFIG_DEBUG_FS
/*
@@ -721,19 +718,6 @@ static int caam_probe(struct platform_device *pdev)
ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
#endif
- ring = 0;
- for_each_available_child_of_node(nprop, np)
- if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
- of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
- ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
- ((__force uint8_t *)ctrl +
- (ring + JR_BLOCK_NUMBER) *
- BLOCK_OFFSET
- );
- ctrlpriv->total_jobrs++;
- ring++;
- }
-
/* Check to see if (DPAA 1.x) QI present. If so, enable */
ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
if (ctrlpriv->qi_present && !caam_dpaa2) {
@@ -752,6 +736,25 @@ static int caam_probe(struct platform_device *pdev)
#endif
}
+ ret = of_platform_populate(nprop, caam_match, NULL, dev);
+ if (ret) {
+ dev_err(dev, "JR platform devices creation error\n");
+ goto shutdown_qi;
+ }
+
+ ring = 0;
+ for_each_available_child_of_node(nprop, np)
+ if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
+ of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
+ ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
+ ((__force uint8_t *)ctrl +
+ (ring + JR_BLOCK_NUMBER) *
+ BLOCK_OFFSET
+ );
+ ctrlpriv->total_jobrs++;
+ ring++;
+ }
+
/* If no QI and no rings specified, quit and go home */
if ((!ctrlpriv->qi_present) && (!ctrlpriv->total_jobrs)) {
dev_err(dev, "no queues configured, terminating\n");
@@ -898,6 +901,11 @@ caam_remove:
caam_remove(pdev);
return ret;
+shutdown_qi:
+#ifdef CONFIG_CAAM_QI
+ if (ctrlpriv->qi_init)
+ caam_qi_shutdown(dev);
+#endif
iounmap_ctrl:
iounmap(ctrl);
disable_caam_emi_slow:
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 2980b8ef1fb1..5988a26a2441 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -3,6 +3,7 @@
* caam descriptor construction helper functions
*
* Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2019 NXP
*/
#ifndef DESC_CONSTR_H
@@ -37,6 +38,16 @@
extern bool caam_little_end;
+/*
+ * HW fetches 4 S/G table entries at a time, irrespective of how many entries
+ * are in the table. It's SW's responsibility to make sure these accesses
+ * do not have side effects.
+ */
+static inline int pad_sg_nents(int sg_nents)
+{
+ return ALIGN(sg_nents, 4);
+}
+
static inline int desc_len(u32 * const desc)
{
return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 4da844e4b61d..4f0d45865aa2 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -13,7 +13,7 @@
#ifdef DEBUG
#include <linux/highmem.h>
-void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+void caam_dump_sg(const char *prefix_str, int prefix_type,
int rowsize, int groupsize, struct scatterlist *sg,
size_t tlen, bool ascii)
{
@@ -35,15 +35,15 @@ void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
buf = it_page + it->offset;
len = min_t(size_t, tlen, it->length);
- print_hex_dump(level, prefix_str, prefix_type, rowsize,
- groupsize, buf, len, ascii);
+ print_hex_dump_debug(prefix_str, prefix_type, rowsize,
+ groupsize, buf, len, ascii);
tlen -= len;
kunmap_atomic(it_page);
}
}
#else
-void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+void caam_dump_sg(const char *prefix_str, int prefix_type,
int rowsize, int groupsize, struct scatterlist *sg,
size_t tlen, bool ascii)
{}
diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h
index 8c6b83e02a70..d9726e66edbf 100644
--- a/drivers/crypto/caam/error.h
+++ b/drivers/crypto/caam/error.h
@@ -17,7 +17,7 @@ void caam_strstatus(struct device *dev, u32 status, bool qi_v2);
#define caam_jr_strstatus(jrdev, status) caam_strstatus(jrdev, status, false)
#define caam_qi2_strstatus(qidev, status) caam_strstatus(qidev, status, true)
-void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+void caam_dump_sg(const char *prefix_str, int prefix_type,
int rowsize, int groupsize, struct scatterlist *sg,
size_t tlen, bool ascii);
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 3392615dc91b..6af84bbc612c 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -4,7 +4,7 @@
* Private/internal definitions between modules
*
* Copyright 2008-2011 Freescale Semiconductor, Inc.
- *
+ * Copyright 2019 NXP
*/
#ifndef INTERN_H
@@ -63,10 +63,6 @@ struct caam_drv_private_jr {
* Driver-private storage for a single CAAM block instance
*/
struct caam_drv_private {
-#ifdef CONFIG_CAAM_QI
- struct device *qidev;
-#endif
-
/* Physical-presence section */
struct caam_ctrl __iomem *ctrl; /* controller region */
struct caam_deco __iomem *deco; /* DECO/CCB views */
@@ -74,12 +70,17 @@ struct caam_drv_private {
struct caam_queue_if __iomem *qi; /* QI control region */
struct caam_job_ring __iomem *jr[4]; /* JobR's register space */
+ struct iommu_domain *domain;
+
/*
* Detected geometry block. Filled in from device tree if powerpc,
* or from register-based version detection code
*/
u8 total_jobrs; /* Total Job Rings in device */
u8 qi_present; /* Nonzero if QI present in device */
+#ifdef CONFIG_CAAM_QI
+ u8 qi_init; /* Nonzero if QI has been initialized */
+#endif
u8 mc_en; /* Nonzero if MC f/w is active */
int secvio_irq; /* Security violation interrupt number */
int virt_en; /* Virtualization enabled in CAAM */
@@ -107,8 +108,95 @@ struct caam_drv_private {
#endif
};
-void caam_jr_algapi_init(struct device *dev);
-void caam_jr_algapi_remove(struct device *dev);
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API
+
+int caam_algapi_init(struct device *dev);
+void caam_algapi_exit(void);
+
+#else
+
+static inline int caam_algapi_init(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_algapi_exit(void)
+{
+}
+
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API */
+
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API
+
+int caam_algapi_hash_init(struct device *dev);
+void caam_algapi_hash_exit(void);
+
+#else
+
+static inline int caam_algapi_hash_init(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_algapi_hash_exit(void)
+{
+}
+
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API */
+
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API
+
+int caam_pkc_init(struct device *dev);
+void caam_pkc_exit(void);
+
+#else
+
+static inline int caam_pkc_init(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_pkc_exit(void)
+{
+}
+
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API */
+
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API
+
+int caam_rng_init(struct device *dev);
+void caam_rng_exit(void);
+
+#else
+
+static inline int caam_rng_init(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_rng_exit(void)
+{
+}
+
+#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */
+
+#ifdef CONFIG_CAAM_QI
+
+int caam_qi_algapi_init(struct device *dev);
+void caam_qi_algapi_exit(void);
+
+#else
+
+static inline int caam_qi_algapi_init(struct device *dev)
+{
+ return 0;
+}
+
+static inline void caam_qi_algapi_exit(void)
+{
+}
+
+#endif /* CONFIG_CAAM_QI */
#ifdef CONFIG_DEBUG_FS
static int caam_debugfs_u64_get(void *data, u64 *val)
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 1de2562d0982..cea811fed320 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -4,6 +4,7 @@
* JobR backend functionality
*
* Copyright 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright 2019 NXP
*/
#include <linux/of_irq.h>
@@ -23,6 +24,43 @@ struct jr_driver_data {
} ____cacheline_aligned;
static struct jr_driver_data driver_data;
+static DEFINE_MUTEX(algs_lock);
+static unsigned int active_devs;
+
+static void register_algs(struct device *dev)
+{
+ mutex_lock(&algs_lock);
+
+ if (++active_devs != 1)
+ goto algs_unlock;
+
+ caam_algapi_init(dev);
+ caam_algapi_hash_init(dev);
+ caam_pkc_init(dev);
+ caam_rng_init(dev);
+ caam_qi_algapi_init(dev);
+
+algs_unlock:
+ mutex_unlock(&algs_lock);
+}
+
+static void unregister_algs(void)
+{
+ mutex_lock(&algs_lock);
+
+ if (--active_devs != 0)
+ goto algs_unlock;
+
+ caam_qi_algapi_exit();
+
+ caam_rng_exit();
+ caam_pkc_exit();
+ caam_algapi_hash_exit();
+ caam_algapi_exit();
+
+algs_unlock:
+ mutex_unlock(&algs_lock);
+}
static int caam_reset_hw_jr(struct device *dev)
{
@@ -109,6 +147,9 @@ static int caam_jr_remove(struct platform_device *pdev)
return -EBUSY;
}
+ /* Unregister JR-based RNG & crypto algorithms */
+ unregister_algs();
+
/* Remove the node from Physical JobR list maintained by driver */
spin_lock(&driver_data.jr_alloc_lock);
list_del(&jrpriv->list_node);
@@ -541,6 +582,8 @@ static int caam_jr_probe(struct platform_device *pdev)
atomic_set(&jrpriv->tfm_count, 0);
+ register_algs(jrdev->parent);
+
return 0;
}
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 8d0713fae6ac..48dd3536060d 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -16,9 +16,7 @@ void split_key_done(struct device *dev, u32 *desc, u32 err,
{
struct split_key_result *res = context;
-#ifdef DEBUG
- dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
-#endif
+ dev_dbg(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
if (err)
caam_jr_strstatus(dev, err);
@@ -55,12 +53,10 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
adata->keylen_pad = split_key_pad_len(adata->algtype &
OP_ALG_ALGSEL_MASK);
-#ifdef DEBUG
- dev_err(jrdev, "split keylen %d split keylen padded %d\n",
+ dev_dbg(jrdev, "split keylen %d split keylen padded %d\n",
adata->keylen, adata->keylen_pad);
- print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
-#endif
+ print_hex_dump_debug("ctx.key@" __stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
if (adata->keylen_pad > max_keylen)
return -EINVAL;
@@ -102,10 +98,9 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
append_fifo_store(desc, dma_addr, adata->keylen,
LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
+ print_hex_dump_debug("jobdesc@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc),
+ 1);
result.err = 0;
init_completion(&result.completion);
@@ -115,11 +110,10 @@ int gen_split_key(struct device *jrdev, u8 *key_out,
/* in progress */
wait_for_completion(&result.completion);
ret = result.err;
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, key_out,
- adata->keylen_pad, 1);
-#endif
+
+ print_hex_dump_debug("ctx.key@"__stringify(__LINE__)": ",
+ DUMP_PREFIX_ADDRESS, 16, 4, key_out,
+ adata->keylen_pad, 1);
}
dma_unmap_single(jrdev, dma_addr, adata->keylen_pad, DMA_BIDIRECTIONAL);
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 9f08f84cca59..0fe618e3804a 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -4,7 +4,7 @@
* Queue Interface backend functionality
*
* Copyright 2013-2016 Freescale Semiconductor, Inc.
- * Copyright 2016-2017 NXP
+ * Copyright 2016-2017, 2019 NXP
*/
#include <linux/cpumask.h>
@@ -18,6 +18,7 @@
#include "desc_constr.h"
#define PREHDR_RSLS_SHIFT 31
+#define PREHDR_ABS BIT(25)
/*
* Use a reasonable backlog of frames (per CPU) as congestion threshold,
@@ -58,11 +59,9 @@ static DEFINE_PER_CPU(int, last_cpu);
/*
* caam_qi_priv - CAAM QI backend private params
* @cgr: QMan congestion group
- * @qi_pdev: platform device for QI backend
*/
struct caam_qi_priv {
struct qman_cgr cgr;
- struct platform_device *qi_pdev;
};
static struct caam_qi_priv qipriv ____cacheline_aligned;
@@ -95,6 +94,16 @@ static u64 times_congested;
*/
static struct kmem_cache *qi_cache;
+static void *caam_iova_to_virt(struct iommu_domain *domain,
+ dma_addr_t iova_addr)
+{
+ phys_addr_t phys_addr;
+
+ phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
+
+ return phys_to_virt(phys_addr);
+}
+
int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req)
{
struct qm_fd fd;
@@ -135,6 +144,7 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
const struct qm_fd *fd;
struct caam_drv_req *drv_req;
struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+ struct caam_drv_private *priv = dev_get_drvdata(qidev);
fd = &msg->ern.fd;
@@ -143,7 +153,7 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq,
return;
}
- drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd));
+ drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
if (!drv_req) {
dev_err(qidev,
"Can't find original request for CAAM response\n");
@@ -346,6 +356,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
*/
drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) |
num_words);
+ drv_ctx->prehdr[1] = cpu_to_caam32(PREHDR_ABS);
memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc));
dma_sync_single_for_device(qidev, drv_ctx->context_a,
sizeof(drv_ctx->sh_desc) +
@@ -401,6 +412,7 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
*/
drv_ctx->prehdr[0] = cpu_to_caam32((1 << PREHDR_RSLS_SHIFT) |
num_words);
+ drv_ctx->prehdr[1] = cpu_to_caam32(PREHDR_ABS);
memcpy(drv_ctx->sh_desc, sh_desc, desc_bytes(sh_desc));
size = sizeof(drv_ctx->prehdr) + sizeof(drv_ctx->sh_desc);
hwdesc = dma_map_single(qidev, drv_ctx->prehdr, size,
@@ -488,7 +500,7 @@ EXPORT_SYMBOL(caam_drv_ctx_rel);
void caam_qi_shutdown(struct device *qidev)
{
int i;
- struct caam_qi_priv *priv = dev_get_drvdata(qidev);
+ struct caam_qi_priv *priv = &qipriv;
const cpumask_t *cpus = qman_affine_cpus();
for_each_cpu(i, cpus) {
@@ -506,8 +518,6 @@ void caam_qi_shutdown(struct device *qidev)
qman_release_cgrid(priv->cgr.cgrid);
kmem_cache_destroy(qi_cache);
-
- platform_device_unregister(priv->qi_pdev);
}
static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
@@ -550,6 +560,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
struct caam_drv_req *drv_req;
const struct qm_fd *fd;
struct device *qidev = &(raw_cpu_ptr(&pcpu_qipriv)->net_dev.dev);
+ struct caam_drv_private *priv = dev_get_drvdata(qidev);
u32 status;
if (caam_qi_napi_schedule(p, caam_napi))
@@ -572,7 +583,7 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
return qman_cb_dqrr_consume;
}
- drv_req = (struct caam_drv_req *)phys_to_virt(qm_fd_addr_get64(fd));
+ drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd));
if (unlikely(!drv_req)) {
dev_err(qidev,
"Can't find original request for caam response\n");
@@ -692,33 +703,17 @@ static void free_rsp_fqs(void)
int caam_qi_init(struct platform_device *caam_pdev)
{
int err, i;
- struct platform_device *qi_pdev;
struct device *ctrldev = &caam_pdev->dev, *qidev;
struct caam_drv_private *ctrlpriv;
const cpumask_t *cpus = qman_affine_cpus();
- static struct platform_device_info qi_pdev_info = {
- .name = "caam_qi",
- .id = PLATFORM_DEVID_NONE
- };
-
- qi_pdev_info.parent = ctrldev;
- qi_pdev_info.dma_mask = dma_get_mask(ctrldev);
- qi_pdev = platform_device_register_full(&qi_pdev_info);
- if (IS_ERR(qi_pdev))
- return PTR_ERR(qi_pdev);
- set_dma_ops(&qi_pdev->dev, get_dma_ops(ctrldev));
ctrlpriv = dev_get_drvdata(ctrldev);
- qidev = &qi_pdev->dev;
-
- qipriv.qi_pdev = qi_pdev;
- dev_set_drvdata(qidev, &qipriv);
+ qidev = ctrldev;
/* Initialize the congestion detection */
err = init_cgr(qidev);
if (err) {
dev_err(qidev, "CGR initialization failed: %d\n", err);
- platform_device_unregister(qi_pdev);
return err;
}
@@ -727,7 +722,6 @@ int caam_qi_init(struct platform_device *caam_pdev)
if (err) {
dev_err(qidev, "Can't allocate CAAM response FQs: %d\n", err);
free_rsp_fqs();
- platform_device_unregister(qi_pdev);
return err;
}
@@ -750,15 +744,11 @@ int caam_qi_init(struct platform_device *caam_pdev)
napi_enable(irqtask);
}
- /* Hook up QI device to parent controlling caam device */
- ctrlpriv->qidev = qidev;
-
qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0,
SLAB_CACHE_DMA, NULL);
if (!qi_cache) {
dev_err(qidev, "Can't allocate CAAM cache\n");
free_rsp_fqs();
- platform_device_unregister(qi_pdev);
return -ENOMEM;
}
@@ -766,6 +756,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
&times_congested, &caam_fops_u64_ro);
#endif
+
+ ctrlpriv->qi_init = 1;
dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
return 0;
}
diff --git a/drivers/crypto/caam/sg_sw_qm.h b/drivers/crypto/caam/sg_sw_qm.h
index b3e1aaaeffea..d56cc7efbc13 100644
--- a/drivers/crypto/caam/sg_sw_qm.h
+++ b/drivers/crypto/caam/sg_sw_qm.h
@@ -54,15 +54,19 @@ static inline void dma_to_qm_sg_one_last_ext(struct qm_sg_entry *qm_sg_ptr,
* but does not have final bit; instead, returns last entry
*/
static inline struct qm_sg_entry *
-sg_to_qm_sg(struct scatterlist *sg, int sg_count,
+sg_to_qm_sg(struct scatterlist *sg, int len,
struct qm_sg_entry *qm_sg_ptr, u16 offset)
{
- while (sg_count && sg) {
- dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg),
- sg_dma_len(sg), offset);
+ int ent_len;
+
+ while (len) {
+ ent_len = min_t(int, sg_dma_len(sg), len);
+
+ dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg), ent_len,
+ offset);
qm_sg_ptr++;
sg = sg_next(sg);
- sg_count--;
+ len -= ent_len;
}
return qm_sg_ptr - 1;
}
@@ -71,10 +75,10 @@ sg_to_qm_sg(struct scatterlist *sg, int sg_count,
* convert scatterlist to h/w link table format
* scatterlist must have been previously dma mapped
*/
-static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count,
+static inline void sg_to_qm_sg_last(struct scatterlist *sg, int len,
struct qm_sg_entry *qm_sg_ptr, u16 offset)
{
- qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset);
+ qm_sg_ptr = sg_to_qm_sg(sg, len, qm_sg_ptr, offset);
qm_sg_entry_set_f(qm_sg_ptr, qm_sg_entry_get_len(qm_sg_ptr));
}
diff --git a/drivers/crypto/caam/sg_sw_qm2.h b/drivers/crypto/caam/sg_sw_qm2.h
index c9378402a5f8..b8b737d2b0ea 100644
--- a/drivers/crypto/caam/sg_sw_qm2.h
+++ b/drivers/crypto/caam/sg_sw_qm2.h
@@ -25,15 +25,19 @@ static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr,
* but does not have final bit; instead, returns last entry
*/
static inline struct dpaa2_sg_entry *
-sg_to_qm_sg(struct scatterlist *sg, int sg_count,
+sg_to_qm_sg(struct scatterlist *sg, int len,
struct dpaa2_sg_entry *qm_sg_ptr, u16 offset)
{
- while (sg_count && sg) {
- dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg),
- sg_dma_len(sg), offset);
+ int ent_len;
+
+ while (len) {
+ ent_len = min_t(int, sg_dma_len(sg), len);
+
+ dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg), ent_len,
+ offset);
qm_sg_ptr++;
sg = sg_next(sg);
- sg_count--;
+ len -= ent_len;
}
return qm_sg_ptr - 1;
}
@@ -42,11 +46,11 @@ sg_to_qm_sg(struct scatterlist *sg, int sg_count,
* convert scatterlist to h/w link table format
* scatterlist must have been previously dma mapped
*/
-static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count,
+static inline void sg_to_qm_sg_last(struct scatterlist *sg, int len,
struct dpaa2_sg_entry *qm_sg_ptr,
u16 offset)
{
- qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset);
+ qm_sg_ptr = sg_to_qm_sg(sg, len, qm_sg_ptr, offset);
dpaa2_sg_set_final(qm_sg_ptr, true);
}
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index dbfa9fce33e0..07e1ee99273b 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -35,11 +35,9 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset &
SEC4_SG_OFFSET_MASK);
}
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
- DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
- sizeof(struct sec4_sg_entry), 1);
-#endif
+
+ print_hex_dump_debug("sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4,
+ sec4_sg_ptr, sizeof(struct sec4_sg_entry), 1);
}
/*
@@ -47,15 +45,19 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
* but does not have final bit; instead, returns last entry
*/
static inline struct sec4_sg_entry *
-sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
+sg_to_sec4_sg(struct scatterlist *sg, int len,
struct sec4_sg_entry *sec4_sg_ptr, u16 offset)
{
- while (sg_count) {
- dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg),
- sg_dma_len(sg), offset);
+ int ent_len;
+
+ while (len) {
+ ent_len = min_t(int, sg_dma_len(sg), len);
+
+ dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), ent_len,
+ offset);
sec4_sg_ptr++;
sg = sg_next(sg);
- sg_count--;
+ len -= ent_len;
}
return sec4_sg_ptr - 1;
}
@@ -72,11 +74,11 @@ static inline void sg_to_sec4_set_last(struct sec4_sg_entry *sec4_sg_ptr)
* convert scatterlist to h/w link table format
* scatterlist must have been previously dma mapped
*/
-static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
+static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int len,
struct sec4_sg_entry *sec4_sg_ptr,
u16 offset)
{
- sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
+ sec4_sg_ptr = sg_to_sec4_sg(sg, len, sec4_sg_ptr, offset);
sg_to_sec4_set_last(sec4_sg_ptr);
}
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index e9f4704494fb..ff3cb1f8f2b6 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -7,7 +7,6 @@
#include <crypto/aes.h>
#include <crypto/algapi.h>
#include <crypto/authenc.h>
-#include <crypto/crypto_wq.h>
#include <crypto/des.h>
#include <crypto/xts.h>
#include <linux/crypto.h>
diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.h b/drivers/crypto/cavium/nitrox/nitrox_debugfs.h
index f177b79bbab0..09c4cf2513fb 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_debugfs.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NITROX_DEBUGFS_H
#define __NITROX_DEBUGFS_H
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.h b/drivers/crypto/cavium/nitrox/nitrox_mbx.h
index 5008399775a9..7c93d0282174 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NITROX_MBX_H
#define __NITROX_MBX_H
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index ea3d6de55ff6..58c6dddfc5e1 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -2,7 +2,7 @@
/*
* AMD Cryptographic Coprocessor (CCP) AES crypto API support
*
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
@@ -76,8 +76,7 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
return -EINVAL;
if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
- (ctx->u.aes.mode == CCP_AES_MODE_CBC) ||
- (ctx->u.aes.mode == CCP_AES_MODE_CFB)) &&
+ (ctx->u.aes.mode == CCP_AES_MODE_CBC)) &&
(req->nbytes & (AES_BLOCK_SIZE - 1)))
return -EINVAL;
@@ -288,7 +287,7 @@ static struct ccp_aes_def aes_algs[] = {
.version = CCP_VERSION(3, 0),
.name = "cfb(aes)",
.driver_name = "cfb-aes-ccp",
- .blocksize = AES_BLOCK_SIZE,
+ .blocksize = 1,
.ivsize = AES_BLOCK_SIZE,
.alg_defaults = &ccp_aes_defaults,
},
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index cc3e96c4f5fb..f79eede71c62 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -32,56 +32,62 @@ struct ccp_tasklet_data {
};
/* Human-readable error strings */
+#define CCP_MAX_ERROR_CODE 64
static char *ccp_error_codes[] = {
"",
- "ERR 01: ILLEGAL_ENGINE",
- "ERR 02: ILLEGAL_KEY_ID",
- "ERR 03: ILLEGAL_FUNCTION_TYPE",
- "ERR 04: ILLEGAL_FUNCTION_MODE",
- "ERR 05: ILLEGAL_FUNCTION_ENCRYPT",
- "ERR 06: ILLEGAL_FUNCTION_SIZE",
- "ERR 07: Zlib_MISSING_INIT_EOM",
- "ERR 08: ILLEGAL_FUNCTION_RSVD",
- "ERR 09: ILLEGAL_BUFFER_LENGTH",
- "ERR 10: VLSB_FAULT",
- "ERR 11: ILLEGAL_MEM_ADDR",
- "ERR 12: ILLEGAL_MEM_SEL",
- "ERR 13: ILLEGAL_CONTEXT_ID",
- "ERR 14: ILLEGAL_KEY_ADDR",
- "ERR 15: 0xF Reserved",
- "ERR 16: Zlib_ILLEGAL_MULTI_QUEUE",
- "ERR 17: Zlib_ILLEGAL_JOBID_CHANGE",
- "ERR 18: CMD_TIMEOUT",
- "ERR 19: IDMA0_AXI_SLVERR",
- "ERR 20: IDMA0_AXI_DECERR",
- "ERR 21: 0x15 Reserved",
- "ERR 22: IDMA1_AXI_SLAVE_FAULT",
- "ERR 23: IDMA1_AIXI_DECERR",
- "ERR 24: 0x18 Reserved",
- "ERR 25: ZLIBVHB_AXI_SLVERR",
- "ERR 26: ZLIBVHB_AXI_DECERR",
- "ERR 27: 0x1B Reserved",
- "ERR 27: ZLIB_UNEXPECTED_EOM",
- "ERR 27: ZLIB_EXTRA_DATA",
- "ERR 30: ZLIB_BTYPE",
- "ERR 31: ZLIB_UNDEFINED_SYMBOL",
- "ERR 32: ZLIB_UNDEFINED_DISTANCE_S",
- "ERR 33: ZLIB_CODE_LENGTH_SYMBOL",
- "ERR 34: ZLIB _VHB_ILLEGAL_FETCH",
- "ERR 35: ZLIB_UNCOMPRESSED_LEN",
- "ERR 36: ZLIB_LIMIT_REACHED",
- "ERR 37: ZLIB_CHECKSUM_MISMATCH0",
- "ERR 38: ODMA0_AXI_SLVERR",
- "ERR 39: ODMA0_AXI_DECERR",
- "ERR 40: 0x28 Reserved",
- "ERR 41: ODMA1_AXI_SLVERR",
- "ERR 42: ODMA1_AXI_DECERR",
- "ERR 43: LSB_PARITY_ERR",
+ "ILLEGAL_ENGINE",
+ "ILLEGAL_KEY_ID",
+ "ILLEGAL_FUNCTION_TYPE",
+ "ILLEGAL_FUNCTION_MODE",
+ "ILLEGAL_FUNCTION_ENCRYPT",
+ "ILLEGAL_FUNCTION_SIZE",
+ "Zlib_MISSING_INIT_EOM",
+ "ILLEGAL_FUNCTION_RSVD",
+ "ILLEGAL_BUFFER_LENGTH",
+ "VLSB_FAULT",
+ "ILLEGAL_MEM_ADDR",
+ "ILLEGAL_MEM_SEL",
+ "ILLEGAL_CONTEXT_ID",
+ "ILLEGAL_KEY_ADDR",
+ "0xF Reserved",
+ "Zlib_ILLEGAL_MULTI_QUEUE",
+ "Zlib_ILLEGAL_JOBID_CHANGE",
+ "CMD_TIMEOUT",
+ "IDMA0_AXI_SLVERR",
+ "IDMA0_AXI_DECERR",
+ "0x15 Reserved",
+ "IDMA1_AXI_SLAVE_FAULT",
+ "IDMA1_AIXI_DECERR",
+ "0x18 Reserved",
+ "ZLIBVHB_AXI_SLVERR",
+ "ZLIBVHB_AXI_DECERR",
+ "0x1B Reserved",
+ "ZLIB_UNEXPECTED_EOM",
+ "ZLIB_EXTRA_DATA",
+ "ZLIB_BTYPE",
+ "ZLIB_UNDEFINED_SYMBOL",
+ "ZLIB_UNDEFINED_DISTANCE_S",
+ "ZLIB_CODE_LENGTH_SYMBOL",
+ "ZLIB _VHB_ILLEGAL_FETCH",
+ "ZLIB_UNCOMPRESSED_LEN",
+ "ZLIB_LIMIT_REACHED",
+ "ZLIB_CHECKSUM_MISMATCH0",
+ "ODMA0_AXI_SLVERR",
+ "ODMA0_AXI_DECERR",
+ "0x28 Reserved",
+ "ODMA1_AXI_SLVERR",
+ "ODMA1_AXI_DECERR",
};
-void ccp_log_error(struct ccp_device *d, int e)
+void ccp_log_error(struct ccp_device *d, unsigned int e)
{
- dev_err(d->dev, "CCP error: %s (0x%x)\n", ccp_error_codes[e], e);
+ if (WARN_ON(e >= CCP_MAX_ERROR_CODE))
+ return;
+
+ if (e < ARRAY_SIZE(ccp_error_codes))
+ dev_err(d->dev, "CCP error %d: %s\n", e, ccp_error_codes[e]);
+ else
+ dev_err(d->dev, "CCP error %d: Unknown Error\n", e);
}
/* List of CCPs, CCP count, read-write access lock, and access functions
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 90523a069bff..5e624920fd99 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -629,7 +629,7 @@ struct ccp5_desc {
void ccp_add_device(struct ccp_device *ccp);
void ccp_del_device(struct ccp_device *ccp);
-extern void ccp_log_error(struct ccp_device *, int);
+extern void ccp_log_error(struct ccp_device *, unsigned int);
struct ccp_device *ccp_alloc_struct(struct sp_device *sp);
bool ccp_queues_suspended(struct ccp_device *ccp);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index db8de89d990f..866b2e05ca77 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -2,7 +2,7 @@
/*
* AMD Cryptographic Coprocessor (CCP) driver
*
- * Copyright (C) 2013,2018 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
* Author: Gary R Hook <gary.hook@amd.com>
@@ -890,8 +890,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
return -EINVAL;
if (((aes->mode == CCP_AES_MODE_ECB) ||
- (aes->mode == CCP_AES_MODE_CBC) ||
- (aes->mode == CCP_AES_MODE_CFB)) &&
+ (aes->mode == CCP_AES_MODE_CBC)) &&
(aes->src_len & (AES_BLOCK_SIZE - 1)))
return -EINVAL;
@@ -1264,6 +1263,9 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
int ret;
/* Error checks */
+ if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
+ return -EINVAL;
+
if (!cmd_q->ccp->vdata->perform->des3)
return -EINVAL;
@@ -1346,8 +1348,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
* passthru option to convert from big endian to little endian.
*/
if (des3->mode != CCP_DES3_MODE_ECB) {
- u32 load_mode;
-
op.sb_ctx = cmd_q->sb_ctx;
ret = ccp_init_dm_workarea(&ctx, cmd_q,
@@ -1363,12 +1363,8 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
if (ret)
goto e_ctx;
- if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
- load_mode = CCP_PASSTHRU_BYTESWAP_NOOP;
- else
- load_mode = CCP_PASSTHRU_BYTESWAP_256BIT;
ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
- load_mode);
+ CCP_PASSTHRU_BYTESWAP_256BIT);
if (ret) {
cmd->engine_error = cmd_q->cmd_error;
goto e_ctx;
@@ -1430,10 +1426,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
}
/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
- if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
- dm_offset = CCP_SB_BYTES - des3->iv_len;
- else
- dm_offset = 0;
ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
DES3_EDE_BLOCK_SIZE);
}
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 86ac7b443355..980aa04b655b 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -48,6 +48,7 @@ struct cc_hw_data {
};
#define CC_NUM_IDRS 4
+#define CC_HW_RESET_LOOP_COUNT 10
/* Note: PIDR3 holds CMOD/Rev so ignored for HW identification purposes */
static const u32 pidr_0124_offsets[CC_NUM_IDRS] = {
@@ -133,6 +134,9 @@ static irqreturn_t cc_isr(int irq, void *dev_id)
u32 imr;
/* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */
+ /* if driver suspended return, probebly shared interrupt */
+ if (cc_pm_is_dev_suspended(dev))
+ return IRQ_NONE;
/* read the interrupt status */
irr = cc_ioread(drvdata, CC_REG(HOST_IRR));
@@ -188,6 +192,31 @@ static irqreturn_t cc_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
+bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata)
+{
+ unsigned int val;
+ unsigned int i;
+
+ /* 712/710/63 has no reset completion indication, always return true */
+ if (drvdata->hw_rev <= CC_HW_REV_712)
+ return true;
+
+ for (i = 0; i < CC_HW_RESET_LOOP_COUNT; i++) {
+ /* in cc7x3 NVM_IS_IDLE indicates that CC reset is
+ * completed and device is fully functional
+ */
+ val = cc_ioread(drvdata, CC_REG(NVM_IS_IDLE));
+ if (val & CC_NVM_IS_IDLE_MASK) {
+ /* hw indicate reset completed */
+ return true;
+ }
+ /* allow scheduling other process on the processor */
+ schedule();
+ }
+ /* reset not completed */
+ return false;
+}
+
int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe)
{
unsigned int val, cache_params;
@@ -315,15 +344,6 @@ static int init_cc_resources(struct platform_device *plat_dev)
return new_drvdata->irq;
}
- rc = devm_request_irq(dev, new_drvdata->irq, cc_isr,
- IRQF_SHARED, "ccree", new_drvdata);
- if (rc) {
- dev_err(dev, "Could not register to interrupt %d\n",
- new_drvdata->irq);
- return rc;
- }
- dev_dbg(dev, "Registered to IRQ: %d\n", new_drvdata->irq);
-
init_completion(&new_drvdata->hw_queue_avail);
if (!plat_dev->dev.dma_mask)
@@ -352,6 +372,11 @@ static int init_cc_resources(struct platform_device *plat_dev)
new_drvdata->sec_disabled = cc_sec_disable;
+ /* wait for Crytpcell reset completion */
+ if (!cc_wait_for_reset_completion(new_drvdata)) {
+ dev_err(dev, "Cryptocell reset not completed");
+ }
+
if (hw_rev->rev <= CC_HW_REV_712) {
/* Verify correct mapping */
val = cc_ioread(new_drvdata, new_drvdata->sig_offset);
@@ -383,6 +408,24 @@ static int init_cc_resources(struct platform_device *plat_dev)
}
sig_cidr = val;
+ /* Check HW engine configuration */
+ val = cc_ioread(new_drvdata, CC_REG(HOST_REMOVE_INPUT_PINS));
+ switch (val) {
+ case CC_PINS_FULL:
+ /* This is fine */
+ break;
+ case CC_PINS_SLIM:
+ if (new_drvdata->std_bodies & CC_STD_NIST) {
+ dev_warn(dev, "703 mode forced due to HW configuration.\n");
+ new_drvdata->std_bodies = CC_STD_OSCCA;
+ }
+ break;
+ default:
+ dev_err(dev, "Unsupported engines configration.\n");
+ rc = -EINVAL;
+ goto post_clk_err;
+ }
+
/* Check security disable state */
val = cc_ioread(new_drvdata, CC_REG(SECURITY_DISABLED));
val &= CC_SECURITY_DISABLED_MASK;
@@ -401,6 +444,15 @@ static int init_cc_resources(struct platform_device *plat_dev)
/* Display HW versions */
dev_info(dev, "ARM CryptoCell %s Driver: HW version 0x%08X/0x%8X, Driver version %s\n",
hw_rev->name, hw_rev_pidr, sig_cidr, DRV_MODULE_VERSION);
+ /* register the driver isr function */
+ rc = devm_request_irq(dev, new_drvdata->irq, cc_isr,
+ IRQF_SHARED, "ccree", new_drvdata);
+ if (rc) {
+ dev_err(dev, "Could not register to interrupt %d\n",
+ new_drvdata->irq);
+ goto post_clk_err;
+ }
+ dev_dbg(dev, "Registered to IRQ: %d\n", new_drvdata->irq);
rc = init_cc_regs(new_drvdata, true);
if (rc) {
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index b76181335c08..7cd99380bf1f 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -53,6 +53,9 @@ enum cc_std_body {
#define CC_COHERENT_CACHE_PARAMS 0xEEE
+#define CC_PINS_FULL 0x0
+#define CC_PINS_SLIM 0x9F
+
/* Maximum DMA mask supported by IP */
#define DMA_BIT_MASK_LEN 48
@@ -67,6 +70,8 @@ enum cc_std_body {
#define CC_SECURITY_DISABLED_MASK BIT(CC_SECURITY_DISABLED_VALUE_BIT_SHIFT)
+#define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT)
+
#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \
CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \
CC_AXIM_MON_COMP_VALUE_BIT_SHIFT)
@@ -216,6 +221,7 @@ static inline void dump_byte_array(const char *name, const u8 *the_array,
__dump_byte_array(name, the_array, size);
}
+bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata);
int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe);
void fini_cc_regs(struct cc_drvdata *drvdata);
int cc_clk_on(struct cc_drvdata *drvdata);
diff --git a/drivers/crypto/ccree/cc_host_regs.h b/drivers/crypto/ccree/cc_host_regs.h
index d0764147573f..efe3e1d8b87b 100644
--- a/drivers/crypto/ccree/cc_host_regs.h
+++ b/drivers/crypto/ccree/cc_host_regs.h
@@ -114,6 +114,9 @@
#define CC_HOST_ICR_DSCRPTR_WATERMARK_QUEUE0_CLEAR_BIT_SIZE 0x1UL
#define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SHIFT 0x17UL
#define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SIZE 0x1UL
+#define CC_NVM_IS_IDLE_REG_OFFSET 0x0A10UL
+#define CC_NVM_IS_IDLE_VALUE_BIT_SHIFT 0x0UL
+#define CC_NVM_IS_IDLE_VALUE_BIT_SIZE 0x1UL
#define CC_SECURITY_DISABLED_REG_OFFSET 0x0A1CUL
#define CC_SECURITY_DISABLED_VALUE_BIT_SHIFT 0x0UL
#define CC_SECURITY_DISABLED_VALUE_BIT_SIZE 0x1UL
@@ -203,6 +206,23 @@
#define CC_HOST_POWER_DOWN_EN_REG_OFFSET 0xA78UL
#define CC_HOST_POWER_DOWN_EN_VALUE_BIT_SHIFT 0x0UL
#define CC_HOST_POWER_DOWN_EN_VALUE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REG_OFFSET 0x0A7CUL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_AES_ENGINE_BIT_SHIFT 0x0UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_AES_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_AES_MAC_ENGINE_BIT_SHIFT 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_AES_MAC_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_GHASH_ENGINE_BIT_SHIFT 0x2UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_GHASH_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_DES_ENGINE_BIT_SHIFT 0x3UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_DES_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_HASH_ENGINE_BIT_SHIFT 0x4UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_HASH_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_SM3_ENGINE_BIT_SHIFT 0x5UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_SM3_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_SM4_ENGINE_BIT_SHIFT 0x6UL
+#define CC_HOST_REMOVE_INPUT_PINS_REMOVE_SM4_ENGINE_BIT_SIZE 0x1UL
+#define CC_HOST_REMOVE_INPUT_PINS_OTP_DISCONNECTED_BIT_SHIFT 0x7UL
+#define CC_HOST_REMOVE_INPUT_PINS_OTP_DISCONNECTED_BIT_SIZE 0x1UL
// --------------------------------------
// BLOCK: ID_REGISTERS
// --------------------------------------
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index 2dad9c9543c6..899a52f05b7a 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -49,6 +49,11 @@ int cc_pm_resume(struct device *dev)
dev_err(dev, "failed getting clock back on. We're toast.\n");
return rc;
}
+ /* wait for Crytpcell reset completion */
+ if (!cc_wait_for_reset_completion(drvdata)) {
+ dev_err(dev, "Cryptocell reset not completed");
+ return -EBUSY;
+ }
cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE);
rc = init_cc_regs(drvdata, false);
@@ -101,6 +106,12 @@ int cc_pm_put_suspend(struct device *dev)
return rc;
}
+bool cc_pm_is_dev_suspended(struct device *dev)
+{
+ /* check device state using runtime api */
+ return pm_runtime_suspended(dev);
+}
+
int cc_pm_init(struct cc_drvdata *drvdata)
{
struct device *dev = drvdata_to_dev(drvdata);
diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h
index 6190cdba5dad..a7d98a5da2e1 100644
--- a/drivers/crypto/ccree/cc_pm.h
+++ b/drivers/crypto/ccree/cc_pm.h
@@ -22,6 +22,7 @@ int cc_pm_suspend(struct device *dev);
int cc_pm_resume(struct device *dev);
int cc_pm_get(struct device *dev);
int cc_pm_put_suspend(struct device *dev);
+bool cc_pm_is_dev_suspended(struct device *dev);
#else
@@ -54,6 +55,12 @@ static inline int cc_pm_put_suspend(struct device *dev)
return 0;
}
+static inline bool cc_pm_is_dev_suspended(struct device *dev)
+{
+ /* if PM not supported device is never suspend */
+ return false;
+}
+
#endif
#endif /*__POWER_MGR_H__*/
diff --git a/drivers/crypto/hisilicon/sec/sec_drv.h b/drivers/crypto/hisilicon/sec/sec_drv.h
index 2d2f186674ba..4d9063a8b10b 100644
--- a/drivers/crypto/hisilicon/sec/sec_drv.h
+++ b/drivers/crypto/hisilicon/sec/sec_drv.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2016-2017 Hisilicon Limited. */
#ifndef _SEC_DRV_H_
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 86c699c14f84..df43a2c6933b 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -398,6 +398,12 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
/* Processing Engine configuration */
+ /* Token & context configuration */
+ val = EIP197_PE_EIP96_TOKEN_CTRL_CTX_UPDATES |
+ EIP197_PE_EIP96_TOKEN_CTRL_REUSE_CTX |
+ EIP197_PE_EIP96_TOKEN_CTRL_POST_REUSE_CTX;
+ writel(val, EIP197_PE(priv) + EIP197_PE_EIP96_TOKEN_CTRL(pe));
+
/* H/W capabilities selection */
val = EIP197_FUNCTION_RSVD;
val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
@@ -589,9 +595,9 @@ inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
if (rdesc->result_data.error_code & 0x407f) {
/* Fatal error (bits 0-7, 14) */
dev_err(priv->dev,
- "cipher: result: result descriptor error (%d)\n",
+ "cipher: result: result descriptor error (0x%x)\n",
rdesc->result_data.error_code);
- return -EIO;
+ return -EINVAL;
} else if (rdesc->result_data.error_code == BIT(9)) {
/* Authentication failed */
return -EBADMSG;
@@ -720,11 +726,10 @@ handle_results:
}
acknowledge:
- if (i) {
+ if (i)
writel(EIP197_xDR_PROC_xD_PKT(i) |
EIP197_xDR_PROC_xD_COUNT(tot_descs * priv->config.rd_offset),
EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_PROC_COUNT);
- }
/* If the number of requests overflowed the counter, try to proceed more
* requests.
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 65624a81f0fd..e0c202f33674 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -118,6 +118,7 @@
#define EIP197_PE_ICE_SCRATCH_CTRL(n) (0x0d04 + (0x2000 * (n)))
#define EIP197_PE_ICE_FPP_CTRL(n) (0x0d80 + (0x2000 * (n)))
#define EIP197_PE_ICE_RAM_CTRL(n) (0x0ff0 + (0x2000 * (n)))
+#define EIP197_PE_EIP96_TOKEN_CTRL(n) (0x1000 + (0x2000 * (n)))
#define EIP197_PE_EIP96_FUNCTION_EN(n) (0x1004 + (0x2000 * (n)))
#define EIP197_PE_EIP96_CONTEXT_CTRL(n) (0x1008 + (0x2000 * (n)))
#define EIP197_PE_EIP96_CONTEXT_STAT(n) (0x100c + (0x2000 * (n)))
@@ -249,6 +250,11 @@
#define EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN BIT(0)
#define EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN BIT(1)
+/* EIP197_PE_EIP96_TOKEN_CTRL */
+#define EIP197_PE_EIP96_TOKEN_CTRL_CTX_UPDATES BIT(16)
+#define EIP197_PE_EIP96_TOKEN_CTRL_REUSE_CTX BIT(19)
+#define EIP197_PE_EIP96_TOKEN_CTRL_POST_REUSE_CTX BIT(20)
+
/* EIP197_PE_EIP96_FUNCTION_EN */
#define EIP197_FUNCTION_RSVD (BIT(6) | BIT(15) | BIT(20) | BIT(23))
#define EIP197_PROTOCOL_HASH_ONLY BIT(0)
@@ -333,6 +339,7 @@ struct safexcel_context_record {
#define CONTEXT_CONTROL_IV3 BIT(8)
#define CONTEXT_CONTROL_DIGEST_CNT BIT(9)
#define CONTEXT_CONTROL_COUNTER_MODE BIT(10)
+#define CONTEXT_CONTROL_CRYPTO_STORE BIT(12)
#define CONTEXT_CONTROL_HASH_STORE BIT(19)
/* The hash counter given to the engine in the context has a granularity of
@@ -425,6 +432,10 @@ struct safexcel_token {
#define EIP197_TOKEN_HASH_RESULT_VERIFY BIT(16)
+#define EIP197_TOKEN_CTX_OFFSET(x) (x)
+#define EIP197_TOKEN_DIRECTION_EXTERNAL BIT(11)
+#define EIP197_TOKEN_EXEC_IF_SUCCESSFUL (0x1 << 12)
+
#define EIP197_TOKEN_STAT_LAST_HASH BIT(0)
#define EIP197_TOKEN_STAT_LAST_PACKET BIT(1)
#define EIP197_TOKEN_OPCODE_DIRECTION 0x0
@@ -432,6 +443,7 @@ struct safexcel_token {
#define EIP197_TOKEN_OPCODE_NOOP EIP197_TOKEN_OPCODE_INSERT
#define EIP197_TOKEN_OPCODE_RETRIEVE 0x4
#define EIP197_TOKEN_OPCODE_VERIFY 0xd
+#define EIP197_TOKEN_OPCODE_CTX_ACCESS 0xe
#define EIP197_TOKEN_OPCODE_BYPASS GENMASK(3, 0)
static inline void eip197_noop_token(struct safexcel_token *token)
@@ -442,6 +454,8 @@ static inline void eip197_noop_token(struct safexcel_token *token)
/* Instructions */
#define EIP197_TOKEN_INS_INSERT_HASH_DIGEST 0x1c
+#define EIP197_TOKEN_INS_ORIGIN_IV0 0x14
+#define EIP197_TOKEN_INS_ORIGIN_LEN(x) ((x) << 5)
#define EIP197_TOKEN_INS_TYPE_OUTPUT BIT(5)
#define EIP197_TOKEN_INS_TYPE_HASH BIT(6)
#define EIP197_TOKEN_INS_TYPE_CRYTO BIT(7)
@@ -468,6 +482,7 @@ struct safexcel_control_data_desc {
#define EIP197_OPTION_MAGIC_VALUE BIT(0)
#define EIP197_OPTION_64BIT_CTX BIT(1)
+#define EIP197_OPTION_RC_AUTO (0x2 << 3)
#define EIP197_OPTION_CTX_CTRL_IN_CMD BIT(8)
#define EIP197_OPTION_2_TOKEN_IV_CMD GENMASK(11, 10)
#define EIP197_OPTION_4_TOKEN_IV_CMD GENMASK(11, 9)
@@ -629,7 +644,7 @@ struct safexcel_ahash_export_state {
u32 digest;
u32 state[SHA512_DIGEST_SIZE / sizeof(u32)];
- u8 cache[SHA512_BLOCK_SIZE];
+ u8 cache[SHA512_BLOCK_SIZE << 1];
};
/*
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index de4be10b172f..8cdbdbe35681 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -51,6 +51,8 @@ struct safexcel_cipher_ctx {
struct safexcel_cipher_req {
enum safexcel_cipher_direction direction;
+ /* Number of result descriptors associated to the request */
+ unsigned int rdescs;
bool needs_inv;
};
@@ -59,27 +61,26 @@ static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
u32 length)
{
struct safexcel_token *token;
- unsigned offset = 0;
+ u32 offset = 0, block_sz = 0;
if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
switch (ctx->alg) {
case SAFEXCEL_DES:
- offset = DES_BLOCK_SIZE / sizeof(u32);
- memcpy(cdesc->control_data.token, iv, DES_BLOCK_SIZE);
+ block_sz = DES_BLOCK_SIZE;
cdesc->control_data.options |= EIP197_OPTION_2_TOKEN_IV_CMD;
break;
case SAFEXCEL_3DES:
- offset = DES3_EDE_BLOCK_SIZE / sizeof(u32);
- memcpy(cdesc->control_data.token, iv, DES3_EDE_BLOCK_SIZE);
+ block_sz = DES3_EDE_BLOCK_SIZE;
cdesc->control_data.options |= EIP197_OPTION_2_TOKEN_IV_CMD;
break;
-
case SAFEXCEL_AES:
- offset = AES_BLOCK_SIZE / sizeof(u32);
- memcpy(cdesc->control_data.token, iv, AES_BLOCK_SIZE);
+ block_sz = AES_BLOCK_SIZE;
cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
break;
}
+
+ offset = block_sz / sizeof(u32);
+ memcpy(cdesc->control_data.token, iv, block_sz);
}
token = (struct safexcel_token *)(cdesc->control_data.token + offset);
@@ -91,6 +92,25 @@ static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
token[0].instructions = EIP197_TOKEN_INS_LAST |
EIP197_TOKEN_INS_TYPE_CRYTO |
EIP197_TOKEN_INS_TYPE_OUTPUT;
+
+ if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+ u32 last = (EIP197_MAX_TOKENS - 1) - offset;
+
+ token[last].opcode = EIP197_TOKEN_OPCODE_CTX_ACCESS;
+ token[last].packet_length = EIP197_TOKEN_DIRECTION_EXTERNAL |
+ EIP197_TOKEN_EXEC_IF_SUCCESSFUL|
+ EIP197_TOKEN_CTX_OFFSET(0x2);
+ token[last].stat = EIP197_TOKEN_STAT_LAST_HASH |
+ EIP197_TOKEN_STAT_LAST_PACKET;
+ token[last].instructions =
+ EIP197_TOKEN_INS_ORIGIN_LEN(block_sz / sizeof(u32)) |
+ EIP197_TOKEN_INS_ORIGIN_IV0;
+
+ /* Store the updated IV values back in the internal context
+ * registers.
+ */
+ cdesc->control_data.control1 |= CONTEXT_CONTROL_CRYPTO_STORE;
+ }
}
static void safexcel_aead_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
@@ -333,7 +353,10 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin
*ret = 0;
- do {
+ if (unlikely(!sreq->rdescs))
+ return 0;
+
+ while (sreq->rdescs--) {
rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
if (IS_ERR(rdesc)) {
dev_err(priv->dev,
@@ -346,21 +369,15 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin
*ret = safexcel_rdesc_check_errors(priv, rdesc);
ndesc++;
- } while (!rdesc->last_seg);
+ }
safexcel_complete(priv, ring);
if (src == dst) {
- dma_unmap_sg(priv->dev, src,
- sg_nents_for_len(src, cryptlen),
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(priv->dev, src, sg_nents(src), DMA_BIDIRECTIONAL);
} else {
- dma_unmap_sg(priv->dev, src,
- sg_nents_for_len(src, cryptlen),
- DMA_TO_DEVICE);
- dma_unmap_sg(priv->dev, dst,
- sg_nents_for_len(dst, cryptlen),
- DMA_FROM_DEVICE);
+ dma_unmap_sg(priv->dev, src, sg_nents(src), DMA_TO_DEVICE);
+ dma_unmap_sg(priv->dev, dst, sg_nents(dst), DMA_FROM_DEVICE);
}
*should_complete = true;
@@ -385,26 +402,21 @@ static int safexcel_send_req(struct crypto_async_request *base, int ring,
int i, ret = 0;
if (src == dst) {
- nr_src = dma_map_sg(priv->dev, src,
- sg_nents_for_len(src, totlen),
+ nr_src = dma_map_sg(priv->dev, src, sg_nents(src),
DMA_BIDIRECTIONAL);
nr_dst = nr_src;
if (!nr_src)
return -EINVAL;
} else {
- nr_src = dma_map_sg(priv->dev, src,
- sg_nents_for_len(src, totlen),
+ nr_src = dma_map_sg(priv->dev, src, sg_nents(src),
DMA_TO_DEVICE);
if (!nr_src)
return -EINVAL;
- nr_dst = dma_map_sg(priv->dev, dst,
- sg_nents_for_len(dst, totlen),
+ nr_dst = dma_map_sg(priv->dev, dst, sg_nents(dst),
DMA_FROM_DEVICE);
if (!nr_dst) {
- dma_unmap_sg(priv->dev, src,
- sg_nents_for_len(src, totlen),
- DMA_TO_DEVICE);
+ dma_unmap_sg(priv->dev, src, nr_src, DMA_TO_DEVICE);
return -EINVAL;
}
}
@@ -454,7 +466,7 @@ static int safexcel_send_req(struct crypto_async_request *base, int ring,
/* result descriptors */
for_each_sg(dst, sg, nr_dst, i) {
- bool first = !i, last = (i == nr_dst - 1);
+ bool first = !i, last = sg_is_last(sg);
u32 len = sg_dma_len(sg);
rdesc = safexcel_add_rdesc(priv, ring, first, last,
@@ -483,16 +495,10 @@ cdesc_rollback:
safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
if (src == dst) {
- dma_unmap_sg(priv->dev, src,
- sg_nents_for_len(src, totlen),
- DMA_BIDIRECTIONAL);
+ dma_unmap_sg(priv->dev, src, nr_src, DMA_BIDIRECTIONAL);
} else {
- dma_unmap_sg(priv->dev, src,
- sg_nents_for_len(src, totlen),
- DMA_TO_DEVICE);
- dma_unmap_sg(priv->dev, dst,
- sg_nents_for_len(dst, totlen),
- DMA_FROM_DEVICE);
+ dma_unmap_sg(priv->dev, src, nr_src, DMA_TO_DEVICE);
+ dma_unmap_sg(priv->dev, dst, nr_dst, DMA_FROM_DEVICE);
}
return ret;
@@ -501,6 +507,7 @@ cdesc_rollback:
static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
int ring,
struct crypto_async_request *base,
+ struct safexcel_cipher_req *sreq,
bool *should_complete, int *ret)
{
struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
@@ -509,7 +516,10 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
*ret = 0;
- do {
+ if (unlikely(!sreq->rdescs))
+ return 0;
+
+ while (sreq->rdescs--) {
rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
if (IS_ERR(rdesc)) {
dev_err(priv->dev,
@@ -522,7 +532,7 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
*ret = safexcel_rdesc_check_errors(priv, rdesc);
ndesc++;
- } while (!rdesc->last_seg);
+ }
safexcel_complete(priv, ring);
@@ -560,16 +570,35 @@ static int safexcel_skcipher_handle_result(struct safexcel_crypto_priv *priv,
{
struct skcipher_request *req = skcipher_request_cast(async);
struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+ struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(async->tfm);
int err;
if (sreq->needs_inv) {
sreq->needs_inv = false;
- err = safexcel_handle_inv_result(priv, ring, async,
+ err = safexcel_handle_inv_result(priv, ring, async, sreq,
should_complete, ret);
} else {
err = safexcel_handle_req_result(priv, ring, async, req->src,
req->dst, req->cryptlen, sreq,
should_complete, ret);
+
+ if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+ u32 block_sz = 0;
+
+ switch (ctx->alg) {
+ case SAFEXCEL_DES:
+ block_sz = DES_BLOCK_SIZE;
+ break;
+ case SAFEXCEL_3DES:
+ block_sz = DES3_EDE_BLOCK_SIZE;
+ break;
+ case SAFEXCEL_AES:
+ block_sz = AES_BLOCK_SIZE;
+ break;
+ }
+
+ memcpy(req->iv, ctx->base.ctxr->data, block_sz);
+ }
}
return err;
@@ -587,7 +616,7 @@ static int safexcel_aead_handle_result(struct safexcel_crypto_priv *priv,
if (sreq->needs_inv) {
sreq->needs_inv = false;
- err = safexcel_handle_inv_result(priv, ring, async,
+ err = safexcel_handle_inv_result(priv, ring, async, sreq,
should_complete, ret);
} else {
err = safexcel_handle_req_result(priv, ring, async, req->src,
@@ -633,6 +662,8 @@ static int safexcel_skcipher_send(struct crypto_async_request *async, int ring,
ret = safexcel_send_req(async, ring, sreq, req->src,
req->dst, req->cryptlen, 0, 0, req->iv,
commands, results);
+
+ sreq->rdescs = *results;
return ret;
}
@@ -655,6 +686,7 @@ static int safexcel_aead_send(struct crypto_async_request *async, int ring,
req->cryptlen, req->assoclen,
crypto_aead_authsize(tfm), req->iv,
commands, results);
+ sreq->rdescs = *results;
return ret;
}
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index ac9282c1a5ec..a80a5e757b1f 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -41,19 +41,21 @@ struct safexcel_ahash_req {
u64 len[2];
u64 processed[2];
- u8 cache[SHA512_BLOCK_SIZE] __aligned(sizeof(u32));
+ u8 cache[SHA512_BLOCK_SIZE << 1] __aligned(sizeof(u32));
dma_addr_t cache_dma;
unsigned int cache_sz;
- u8 cache_next[SHA512_BLOCK_SIZE] __aligned(sizeof(u32));
+ u8 cache_next[SHA512_BLOCK_SIZE << 1] __aligned(sizeof(u32));
};
static inline u64 safexcel_queued_len(struct safexcel_ahash_req *req)
{
- if (req->len[1] > req->processed[1])
- return 0xffffffff - (req->len[0] - req->processed[0]);
+ u64 len, processed;
- return req->len[0] - req->processed[0];
+ len = (0xffffffff * req->len[1]) + req->len[0];
+ processed = (0xffffffff * req->processed[1]) + req->processed[0];
+
+ return len - processed;
}
static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
@@ -87,6 +89,9 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
cdesc->control_data.control0 |= ctx->alg;
cdesc->control_data.control0 |= req->digest;
+ if (!req->finish)
+ cdesc->control_data.control0 |= CONTEXT_CONTROL_NO_FINISH_HASH;
+
if (req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
if (req->processed[0] || req->processed[1]) {
if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_MD5)
@@ -105,9 +110,6 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
cdesc->control_data.control0 |= CONTEXT_CONTROL_RESTART_HASH;
}
- if (!req->finish)
- cdesc->control_data.control0 |= CONTEXT_CONTROL_NO_FINISH_HASH;
-
/*
* Copy the input digest if needed, and setup the context
* fields. Do this now as we need it to setup the first command
@@ -183,6 +185,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin
dma_unmap_single(priv->dev, sreq->cache_dma, sreq->cache_sz,
DMA_TO_DEVICE);
sreq->cache_dma = 0;
+ sreq->cache_sz = 0;
}
if (sreq->finish)
@@ -209,11 +212,15 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
struct safexcel_result_desc *rdesc;
struct scatterlist *sg;
- int i, extra, n_cdesc = 0, ret = 0;
- u64 queued, len, cache_len;
+ int i, extra = 0, n_cdesc = 0, ret = 0;
+ u64 queued, len, cache_len, cache_max;
+
+ cache_max = crypto_ahash_blocksize(ahash);
+ if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+ cache_max <<= 1;
queued = len = safexcel_queued_len(req);
- if (queued <= crypto_ahash_blocksize(ahash))
+ if (queued <= cache_max)
cache_len = queued;
else
cache_len = queued - areq->nbytes;
@@ -223,26 +230,23 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
* fit into full blocks, cache it for the next send() call.
*/
extra = queued & (crypto_ahash_blocksize(ahash) - 1);
+
+ if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC &&
+ extra < crypto_ahash_blocksize(ahash))
+ extra += crypto_ahash_blocksize(ahash);
+
+ /* If this is not the last request and the queued data
+ * is a multiple of a block, cache the last one for now.
+ */
if (!extra)
- /* If this is not the last request and the queued data
- * is a multiple of a block, cache the last one for now.
- */
extra = crypto_ahash_blocksize(ahash);
- if (extra) {
- sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
- req->cache_next, extra,
- areq->nbytes - extra);
-
- queued -= extra;
- len -= extra;
+ sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+ req->cache_next, extra,
+ areq->nbytes - extra);
- if (!queued) {
- *commands = 0;
- *results = 0;
- return 0;
- }
- }
+ queued -= extra;
+ len -= extra;
}
/* Add a command descriptor for the cached data, if any */
@@ -269,8 +273,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
}
/* Now handle the current ahash request buffer(s) */
- req->nents = dma_map_sg(priv->dev, areq->src,
- sg_nents_for_len(areq->src, areq->nbytes),
+ req->nents = dma_map_sg(priv->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
if (!req->nents) {
ret = -ENOMEM;
@@ -345,6 +348,7 @@ unmap_cache:
if (req->cache_dma) {
dma_unmap_single(priv->dev, req->cache_dma, req->cache_sz,
DMA_TO_DEVICE);
+ req->cache_dma = 0;
req->cache_sz = 0;
}
@@ -486,7 +490,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
struct safexcel_inv_result result = {};
int ring = ctx->base.ring;
- memset(req, 0, sizeof(struct ahash_request));
+ memset(req, 0, EIP197_AHASH_REQ_SIZE);
/* create invalidation request */
init_completion(&result.completion);
@@ -519,10 +523,9 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
/* safexcel_ahash_cache: cache data until at least one request can be sent to
* the engine, aka. when there is at least 1 block size in the pipe.
*/
-static int safexcel_ahash_cache(struct ahash_request *areq)
+static int safexcel_ahash_cache(struct ahash_request *areq, u32 cache_max)
{
struct safexcel_ahash_req *req = ahash_request_ctx(areq);
- struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
u64 queued, cache_len;
/* queued: everything accepted by the driver which will be handled by
@@ -539,7 +542,7 @@ static int safexcel_ahash_cache(struct ahash_request *areq)
* In case there isn't enough bytes to proceed (less than a
* block size), cache the data until we have enough.
*/
- if (cache_len + areq->nbytes <= crypto_ahash_blocksize(ahash)) {
+ if (cache_len + areq->nbytes <= cache_max) {
sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
req->cache + cache_len,
areq->nbytes, 0);
@@ -599,6 +602,7 @@ static int safexcel_ahash_update(struct ahash_request *areq)
{
struct safexcel_ahash_req *req = ahash_request_ctx(areq);
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+ u32 cache_max;
/* If the request is 0 length, do nothing */
if (!areq->nbytes)
@@ -608,7 +612,11 @@ static int safexcel_ahash_update(struct ahash_request *areq)
if (req->len[0] < areq->nbytes)
req->len[1]++;
- safexcel_ahash_cache(areq);
+ cache_max = crypto_ahash_blocksize(ahash);
+ if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+ cache_max <<= 1;
+
+ safexcel_ahash_cache(areq, cache_max);
/*
* We're not doing partial updates when performing an hmac request.
@@ -621,7 +629,7 @@ static int safexcel_ahash_update(struct ahash_request *areq)
return safexcel_ahash_enqueue(areq);
if (!req->last_req &&
- safexcel_queued_len(req) > crypto_ahash_blocksize(ahash))
+ safexcel_queued_len(req) > cache_max)
return safexcel_ahash_enqueue(areq);
return 0;
@@ -678,6 +686,11 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out)
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
struct safexcel_ahash_req *req = ahash_request_ctx(areq);
struct safexcel_ahash_export_state *export = out;
+ u32 cache_sz;
+
+ cache_sz = crypto_ahash_blocksize(ahash);
+ if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+ cache_sz <<= 1;
export->len[0] = req->len[0];
export->len[1] = req->len[1];
@@ -687,7 +700,7 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out)
export->digest = req->digest;
memcpy(export->state, req->state, req->state_sz);
- memcpy(export->cache, req->cache, crypto_ahash_blocksize(ahash));
+ memcpy(export->cache, req->cache, cache_sz);
return 0;
}
@@ -697,12 +710,17 @@ static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
struct safexcel_ahash_req *req = ahash_request_ctx(areq);
const struct safexcel_ahash_export_state *export = in;
+ u32 cache_sz;
int ret;
ret = crypto_ahash_init(areq);
if (ret)
return ret;
+ cache_sz = crypto_ahash_blocksize(ahash);
+ if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+ cache_sz <<= 1;
+
req->len[0] = export->len[0];
req->len[1] = export->len[1];
req->processed[0] = export->processed[0];
@@ -710,7 +728,7 @@ static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
req->digest = export->digest;
- memcpy(req->cache, export->cache, crypto_ahash_blocksize(ahash));
+ memcpy(req->cache, export->cache, cache_sz);
memcpy(req->state, export->state, req->state_sz);
return 0;
diff --git a/drivers/crypto/inside-secure/safexcel_ring.c b/drivers/crypto/inside-secure/safexcel_ring.c
index eb75fa684876..142bc3f5c45c 100644
--- a/drivers/crypto/inside-secure/safexcel_ring.c
+++ b/drivers/crypto/inside-secure/safexcel_ring.c
@@ -145,6 +145,9 @@ struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *pr
(lower_32_bits(context) & GENMASK(31, 2)) >> 2;
cdesc->control_data.context_hi = upper_32_bits(context);
+ if (priv->version == EIP197B || priv->version == EIP197D)
+ cdesc->control_data.options |= EIP197_OPTION_RC_AUTO;
+
/* TODO: large xform HMAC with SHA-384/512 uses refresh = 3 */
cdesc->control_data.refresh = 2;
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index e5cf3a59c420..acedafe3fa98 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -100,7 +100,7 @@ struct buffer_desc {
u16 pkt_len;
u16 buf_len;
#endif
- u32 phys_addr;
+ dma_addr_t phys_addr;
u32 __reserved[4];
struct buffer_desc *next;
enum dma_data_direction dir;
@@ -117,9 +117,9 @@ struct crypt_ctl {
u8 mode; /* NPE_OP_* operation mode */
#endif
u8 iv[MAX_IVLEN]; /* IV for CBC mode or CTR IV for CTR mode */
- u32 icv_rev_aes; /* icv or rev aes */
- u32 src_buf;
- u32 dst_buf;
+ dma_addr_t icv_rev_aes; /* icv or rev aes */
+ dma_addr_t src_buf;
+ dma_addr_t dst_buf;
#ifdef __ARMEB__
u16 auth_offs; /* Authentication start offset */
u16 auth_len; /* Authentication data length */
@@ -320,7 +320,8 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
}
}
-static void free_buf_chain(struct device *dev, struct buffer_desc *buf,u32 phys)
+static void free_buf_chain(struct device *dev, struct buffer_desc *buf,
+ dma_addr_t phys)
{
while (buf) {
struct buffer_desc *buf1;
@@ -602,7 +603,7 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
struct buffer_desc *buf;
int i;
u8 *pad;
- u32 pad_phys, buf_phys;
+ dma_addr_t pad_phys, buf_phys;
BUILD_BUG_ON(NPE_CTX_LEN < HMAC_PAD_BLOCKLEN);
pad = dma_pool_alloc(ctx_pool, GFP_KERNEL, &pad_phys);
@@ -787,7 +788,7 @@ static struct buffer_desc *chainup_buffers(struct device *dev,
for (; nbytes > 0; sg = sg_next(sg)) {
unsigned len = min(nbytes, sg->length);
struct buffer_desc *next_buf;
- u32 next_buf_phys;
+ dma_addr_t next_buf_phys;
void *ptr;
nbytes -= len;
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index bdc4c42d3ac8..f1fa637cb029 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -986,8 +986,6 @@ static int mxs_dcp_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct dcp *sdcp = NULL;
int i, ret;
-
- struct resource *iores;
int dcp_vmi_irq, dcp_irq;
if (global_sdcp) {
@@ -995,7 +993,6 @@ static int mxs_dcp_probe(struct platform_device *pdev)
return -ENODEV;
}
- iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
dcp_vmi_irq = platform_get_irq(pdev, 0);
if (dcp_vmi_irq < 0) {
dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_vmi_irq);
@@ -1013,7 +1010,7 @@ static int mxs_dcp_probe(struct platform_device *pdev)
return -ENOMEM;
sdcp->dev = dev;
- sdcp->base = devm_ioremap_resource(dev, iores);
+ sdcp->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(sdcp->base))
return PTR_ERR(sdcp->base);
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index 4acbc47973e9..e78ff5c65ed6 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -27,8 +27,6 @@ MODULE_ALIAS_CRYPTO("842-nx");
#define WORKMEM_ALIGN (CRB_ALIGN)
#define CSB_WAIT_MAX (5000) /* ms */
#define VAS_RETRIES (10)
-/* # of requests allowed per RxFIFO at a time. 0 for unlimited */
-#define MAX_CREDITS_PER_RXFIFO (1024)
struct nx842_workmem {
/* Below fields must be properly aligned */
@@ -812,7 +810,11 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
rxattr.lnotify_lpid = lpid;
rxattr.lnotify_pid = pid;
rxattr.lnotify_tid = tid;
- rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
+ /*
+ * Maximum RX window credits can not be more than #CRBs in
+ * RxFIFO. Otherwise, can get checkstop if RxFIFO overruns.
+ */
+ rxattr.wcreds_max = fifo_size / CRB_SIZE;
/*
* Open a VAS receice window which is used to configure RxFIFO
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 5c4aa606208c..2de5e3672e42 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -856,7 +856,7 @@ static ssize_t nx842_##_name##_show(struct device *dev, \
rcu_read_lock(); \
local_devdata = rcu_dereference(devdata); \
if (local_devdata) \
- p = snprintf(buf, PAGE_SIZE, "%ld\n", \
+ p = snprintf(buf, PAGE_SIZE, "%lld\n", \
atomic64_read(&local_devdata->counters->_name)); \
rcu_read_unlock(); \
return p; \
@@ -909,7 +909,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
}
for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
- bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
+ bytes = snprintf(p, bytes_remain, "%u-%uus:\t%lld\n",
i ? (2<<(i-1)) : 0, (2<<i)-1,
atomic64_read(&times[i]));
bytes_remain -= bytes;
@@ -917,7 +917,7 @@ static ssize_t nx842_timehist_show(struct device *dev,
}
/* The last bucket holds everything over
* 2<<(NX842_HIST_SLOTS - 2) us */
- bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
+ bytes = snprintf(p, bytes_remain, "%uus - :\t%lld\n",
2<<(NX842_HIST_SLOTS - 2),
atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
p += bytes;
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 428c273a1ab6..28817880c76d 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -569,9 +569,7 @@ static int nx_register_algs(void)
memset(&nx_driver.stats, 0, sizeof(struct nx_stats));
- rc = NX_DEBUGFS_INIT(&nx_driver);
- if (rc)
- goto out;
+ NX_DEBUGFS_INIT(&nx_driver);
nx_driver.of.status = NX_OKAY;
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index c3e54af18645..c6b5a3be02be 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -76,20 +76,12 @@ struct nx_stats {
atomic_t last_error_pid;
};
-struct nx_debugfs {
- struct dentry *dfs_root;
- struct dentry *dfs_aes_ops, *dfs_aes_bytes;
- struct dentry *dfs_sha256_ops, *dfs_sha256_bytes;
- struct dentry *dfs_sha512_ops, *dfs_sha512_bytes;
- struct dentry *dfs_errors, *dfs_last_error, *dfs_last_error_pid;
-};
-
struct nx_crypto_driver {
struct nx_stats stats;
struct nx_of of;
struct vio_dev *viodev;
struct vio_driver viodriver;
- struct nx_debugfs dfs;
+ struct dentry *dfs_root;
};
#define NX_GCM4106_NONCE_LEN (4)
@@ -177,7 +169,7 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int,
#define NX_DEBUGFS_INIT(drv) nx_debugfs_init(drv)
#define NX_DEBUGFS_FINI(drv) nx_debugfs_fini(drv)
-int nx_debugfs_init(struct nx_crypto_driver *);
+void nx_debugfs_init(struct nx_crypto_driver *);
void nx_debugfs_fini(struct nx_crypto_driver *);
#else
#define NX_DEBUGFS_INIT(drv) (0)
diff --git a/drivers/crypto/nx/nx_debugfs.c b/drivers/crypto/nx/nx_debugfs.c
index 03e4f0363c6a..e0d44a5512ab 100644
--- a/drivers/crypto/nx/nx_debugfs.c
+++ b/drivers/crypto/nx/nx_debugfs.c
@@ -30,62 +30,37 @@
* Documentation/ABI/testing/debugfs-pfo-nx-crypto
*/
-int nx_debugfs_init(struct nx_crypto_driver *drv)
+void nx_debugfs_init(struct nx_crypto_driver *drv)
{
- struct nx_debugfs *dfs = &drv->dfs;
+ struct dentry *root;
- dfs->dfs_root = debugfs_create_dir(NX_NAME, NULL);
+ root = debugfs_create_dir(NX_NAME, NULL);
+ drv->dfs_root = root;
- dfs->dfs_aes_ops =
- debugfs_create_u32("aes_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.aes_ops);
- dfs->dfs_sha256_ops =
- debugfs_create_u32("sha256_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha256_ops);
- dfs->dfs_sha512_ops =
- debugfs_create_u32("sha512_ops",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.sha512_ops);
- dfs->dfs_aes_bytes =
- debugfs_create_u64("aes_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.aes_bytes);
- dfs->dfs_sha256_bytes =
- debugfs_create_u64("sha256_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha256_bytes);
- dfs->dfs_sha512_bytes =
- debugfs_create_u64("sha512_bytes",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u64 *)&drv->stats.sha512_bytes);
- dfs->dfs_errors =
- debugfs_create_u32("errors",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root, (u32 *)&drv->stats.errors);
- dfs->dfs_last_error =
- debugfs_create_u32("last_error",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error);
- dfs->dfs_last_error_pid =
- debugfs_create_u32("last_error_pid",
- S_IRUSR | S_IRGRP | S_IROTH,
- dfs->dfs_root,
- (u32 *)&drv->stats.last_error_pid);
- return 0;
+ debugfs_create_u32("aes_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.aes_ops);
+ debugfs_create_u32("sha256_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha256_ops);
+ debugfs_create_u32("sha512_ops", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.sha512_ops);
+ debugfs_create_u64("aes_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.aes_bytes);
+ debugfs_create_u64("sha256_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha256_bytes);
+ debugfs_create_u64("sha512_bytes", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u64 *)&drv->stats.sha512_bytes);
+ debugfs_create_u32("errors", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.errors);
+ debugfs_create_u32("last_error", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.last_error);
+ debugfs_create_u32("last_error_pid", S_IRUSR | S_IRGRP | S_IROTH,
+ root, (u32 *)&drv->stats.last_error_pid);
}
void
nx_debugfs_fini(struct nx_crypto_driver *drv)
{
- debugfs_remove_recursive(drv->dfs.dfs_root);
+ debugfs_remove_recursive(drv->dfs_root);
}
#endif
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index c8d401646902..b50eb55f8f57 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -131,7 +131,6 @@ struct qat_alg_ablkcipher_ctx {
struct icp_qat_fw_la_bulk_req dec_fw_req;
struct qat_crypto_instance *inst;
struct crypto_tfm *tfm;
- spinlock_t lock; /* protects qat_alg_ablkcipher_ctx struct */
};
static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg)
@@ -223,6 +222,9 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
return -EFAULT;
offset = round_up(qat_get_inter_state_size(ctx->qat_hash_alg), 8);
+ if (offset < 0)
+ return -EFAULT;
+
hash_state_out = (__be32 *)(hash->sha.state1 + offset);
hash512_state_out = (__be64 *)hash_state_out;
@@ -253,7 +255,24 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash,
return 0;
}
-static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header)
+static void qat_alg_init_hdr_iv_updt(struct icp_qat_fw_comn_req_hdr *header)
+{
+ ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags,
+ ICP_QAT_FW_CIPH_IV_64BIT_PTR);
+ ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags,
+ ICP_QAT_FW_LA_UPDATE_STATE);
+}
+
+static void qat_alg_init_hdr_no_iv_updt(struct icp_qat_fw_comn_req_hdr *header)
+{
+ ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags,
+ ICP_QAT_FW_CIPH_IV_16BYTE_DATA);
+ ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags,
+ ICP_QAT_FW_LA_NO_UPDATE_STATE);
+}
+
+static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header,
+ int aead)
{
header->hdr_flags =
ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
@@ -263,12 +282,12 @@ static void qat_alg_init_common_hdr(struct icp_qat_fw_comn_req_hdr *header)
QAT_COMN_PTR_TYPE_SGL);
ICP_QAT_FW_LA_PARTIAL_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_PARTIAL_NONE);
- ICP_QAT_FW_LA_CIPH_IV_FLD_FLAG_SET(header->serv_specif_flags,
- ICP_QAT_FW_CIPH_IV_16BYTE_DATA);
+ if (aead)
+ qat_alg_init_hdr_no_iv_updt(header);
+ else
+ qat_alg_init_hdr_iv_updt(header);
ICP_QAT_FW_LA_PROTO_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_NO_PROTO);
- ICP_QAT_FW_LA_UPDATE_STATE_SET(header->serv_specif_flags,
- ICP_QAT_FW_LA_NO_UPDATE_STATE);
}
static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm,
@@ -303,7 +322,7 @@ static int qat_alg_aead_init_enc_session(struct crypto_aead *aead_tfm,
return -EFAULT;
/* Request setup */
- qat_alg_init_common_hdr(header);
+ qat_alg_init_common_hdr(header, 1);
header->service_cmd_id = ICP_QAT_FW_LA_CMD_CIPHER_HASH;
ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_DIGEST_IN_BUFFER);
@@ -390,7 +409,7 @@ static int qat_alg_aead_init_dec_session(struct crypto_aead *aead_tfm,
return -EFAULT;
/* Request setup */
- qat_alg_init_common_hdr(header);
+ qat_alg_init_common_hdr(header, 1);
header->service_cmd_id = ICP_QAT_FW_LA_CMD_HASH_CIPHER;
ICP_QAT_FW_LA_DIGEST_IN_BUFFER_SET(header->serv_specif_flags,
ICP_QAT_FW_LA_DIGEST_IN_BUFFER);
@@ -454,7 +473,7 @@ static void qat_alg_ablkcipher_init_com(struct qat_alg_ablkcipher_ctx *ctx,
struct icp_qat_fw_cipher_cd_ctrl_hdr *cd_ctrl = (void *)&req->cd_ctrl;
memcpy(cd->aes.key, key, keylen);
- qat_alg_init_common_hdr(header);
+ qat_alg_init_common_hdr(header, 0);
header->service_cmd_id = ICP_QAT_FW_LA_CMD_CIPHER;
cd_pars->u.s.content_desc_params_sz =
sizeof(struct icp_qat_hw_cipher_algo_blk) >> 3;
@@ -576,45 +595,52 @@ bad_key:
return -EINVAL;
}
-static int qat_alg_aead_setkey(struct crypto_aead *tfm, const uint8_t *key,
+static int qat_alg_aead_rekey(struct crypto_aead *tfm, const uint8_t *key,
+ unsigned int keylen)
+{
+ struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
+ memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
+ memset(&ctx->enc_fw_req, 0, sizeof(ctx->enc_fw_req));
+ memset(&ctx->dec_fw_req, 0, sizeof(ctx->dec_fw_req));
+
+ return qat_alg_aead_init_sessions(tfm, key, keylen,
+ ICP_QAT_HW_CIPHER_CBC_MODE);
+}
+
+static int qat_alg_aead_newkey(struct crypto_aead *tfm, const uint8_t *key,
unsigned int keylen)
{
struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct qat_crypto_instance *inst = NULL;
+ int node = get_current_node();
struct device *dev;
+ int ret;
- if (ctx->enc_cd) {
- /* rekeying */
- dev = &GET_DEV(ctx->inst->accel_dev);
- memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
- memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
- memset(&ctx->enc_fw_req, 0, sizeof(ctx->enc_fw_req));
- memset(&ctx->dec_fw_req, 0, sizeof(ctx->dec_fw_req));
- } else {
- /* new key */
- int node = get_current_node();
- struct qat_crypto_instance *inst =
- qat_crypto_get_instance_node(node);
- if (!inst) {
- return -EINVAL;
- }
-
- dev = &GET_DEV(inst->accel_dev);
- ctx->inst = inst;
- ctx->enc_cd = dma_alloc_coherent(dev, sizeof(*ctx->enc_cd),
- &ctx->enc_cd_paddr,
- GFP_ATOMIC);
- if (!ctx->enc_cd) {
- return -ENOMEM;
- }
- ctx->dec_cd = dma_alloc_coherent(dev, sizeof(*ctx->dec_cd),
- &ctx->dec_cd_paddr,
- GFP_ATOMIC);
- if (!ctx->dec_cd) {
- goto out_free_enc;
- }
+ inst = qat_crypto_get_instance_node(node);
+ if (!inst)
+ return -EINVAL;
+ dev = &GET_DEV(inst->accel_dev);
+ ctx->inst = inst;
+ ctx->enc_cd = dma_alloc_coherent(dev, sizeof(*ctx->enc_cd),
+ &ctx->enc_cd_paddr,
+ GFP_ATOMIC);
+ if (!ctx->enc_cd) {
+ ret = -ENOMEM;
+ goto out_free_inst;
+ }
+ ctx->dec_cd = dma_alloc_coherent(dev, sizeof(*ctx->dec_cd),
+ &ctx->dec_cd_paddr,
+ GFP_ATOMIC);
+ if (!ctx->dec_cd) {
+ ret = -ENOMEM;
+ goto out_free_enc;
}
- if (qat_alg_aead_init_sessions(tfm, key, keylen,
- ICP_QAT_HW_CIPHER_CBC_MODE))
+
+ ret = qat_alg_aead_init_sessions(tfm, key, keylen,
+ ICP_QAT_HW_CIPHER_CBC_MODE);
+ if (ret)
goto out_free_all;
return 0;
@@ -629,7 +655,21 @@ out_free_enc:
dma_free_coherent(dev, sizeof(struct qat_alg_cd),
ctx->enc_cd, ctx->enc_cd_paddr);
ctx->enc_cd = NULL;
- return -ENOMEM;
+out_free_inst:
+ ctx->inst = NULL;
+ qat_crypto_put_instance(inst);
+ return ret;
+}
+
+static int qat_alg_aead_setkey(struct crypto_aead *tfm, const uint8_t *key,
+ unsigned int keylen)
+{
+ struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ if (ctx->enc_cd)
+ return qat_alg_aead_rekey(tfm, key, keylen);
+ else
+ return qat_alg_aead_newkey(tfm, key, keylen);
}
static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
@@ -677,8 +717,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
dma_addr_t blp;
dma_addr_t bloutp = 0;
struct scatterlist *sg;
- size_t sz_out, sz = sizeof(struct qat_alg_buf_list) +
- ((1 + n) * sizeof(struct qat_alg_buf));
+ size_t sz_out, sz = struct_size(bufl, bufers, n + 1);
if (unlikely(!n))
return -EINVAL;
@@ -715,8 +754,7 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
struct qat_alg_buf *bufers;
n = sg_nents(sglout);
- sz_out = sizeof(struct qat_alg_buf_list) +
- ((1 + n) * sizeof(struct qat_alg_buf));
+ sz_out = struct_size(buflout, bufers, n + 1);
sg_nctr = 0;
buflout = kzalloc_node(sz_out, GFP_ATOMIC,
dev_to_node(&GET_DEV(inst->accel_dev)));
@@ -801,11 +839,17 @@ static void qat_ablkcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
struct qat_crypto_instance *inst = ctx->inst;
struct ablkcipher_request *areq = qat_req->ablkcipher_req;
uint8_t stat_filed = qat_resp->comn_resp.comn_status;
+ struct device *dev = &GET_DEV(ctx->inst->accel_dev);
int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
qat_alg_free_bufl(inst, qat_req);
if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
res = -EINVAL;
+
+ memcpy(areq->info, qat_req->iv, AES_BLOCK_SIZE);
+ dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
+ qat_req->iv_paddr);
+
areq->base.complete(&areq->base, res);
}
@@ -905,50 +949,49 @@ static int qat_alg_aead_enc(struct aead_request *areq)
return -EINPROGRESS;
}
-static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
+static int qat_alg_ablkcipher_rekey(struct qat_alg_ablkcipher_ctx *ctx,
+ const u8 *key, unsigned int keylen,
+ int mode)
+{
+ memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
+ memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
+ memset(&ctx->enc_fw_req, 0, sizeof(ctx->enc_fw_req));
+ memset(&ctx->dec_fw_req, 0, sizeof(ctx->dec_fw_req));
+
+ return qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode);
+}
+
+static int qat_alg_ablkcipher_newkey(struct qat_alg_ablkcipher_ctx *ctx,
const u8 *key, unsigned int keylen,
int mode)
{
- struct qat_alg_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+ struct qat_crypto_instance *inst = NULL;
struct device *dev;
+ int node = get_current_node();
+ int ret;
- spin_lock(&ctx->lock);
- if (ctx->enc_cd) {
- /* rekeying */
- dev = &GET_DEV(ctx->inst->accel_dev);
- memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd));
- memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd));
- memset(&ctx->enc_fw_req, 0, sizeof(ctx->enc_fw_req));
- memset(&ctx->dec_fw_req, 0, sizeof(ctx->dec_fw_req));
- } else {
- /* new key */
- int node = get_current_node();
- struct qat_crypto_instance *inst =
- qat_crypto_get_instance_node(node);
- if (!inst) {
- spin_unlock(&ctx->lock);
- return -EINVAL;
- }
-
- dev = &GET_DEV(inst->accel_dev);
- ctx->inst = inst;
- ctx->enc_cd = dma_alloc_coherent(dev, sizeof(*ctx->enc_cd),
- &ctx->enc_cd_paddr,
- GFP_ATOMIC);
- if (!ctx->enc_cd) {
- spin_unlock(&ctx->lock);
- return -ENOMEM;
- }
- ctx->dec_cd = dma_alloc_coherent(dev, sizeof(*ctx->dec_cd),
- &ctx->dec_cd_paddr,
- GFP_ATOMIC);
- if (!ctx->dec_cd) {
- spin_unlock(&ctx->lock);
- goto out_free_enc;
- }
+ inst = qat_crypto_get_instance_node(node);
+ if (!inst)
+ return -EINVAL;
+ dev = &GET_DEV(inst->accel_dev);
+ ctx->inst = inst;
+ ctx->enc_cd = dma_alloc_coherent(dev, sizeof(*ctx->enc_cd),
+ &ctx->enc_cd_paddr,
+ GFP_ATOMIC);
+ if (!ctx->enc_cd) {
+ ret = -ENOMEM;
+ goto out_free_instance;
+ }
+ ctx->dec_cd = dma_alloc_coherent(dev, sizeof(*ctx->dec_cd),
+ &ctx->dec_cd_paddr,
+ GFP_ATOMIC);
+ if (!ctx->dec_cd) {
+ ret = -ENOMEM;
+ goto out_free_enc;
}
- spin_unlock(&ctx->lock);
- if (qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode))
+
+ ret = qat_alg_ablkcipher_init_sessions(ctx, key, keylen, mode);
+ if (ret)
goto out_free_all;
return 0;
@@ -963,7 +1006,22 @@ out_free_enc:
dma_free_coherent(dev, sizeof(*ctx->enc_cd),
ctx->enc_cd, ctx->enc_cd_paddr);
ctx->enc_cd = NULL;
- return -ENOMEM;
+out_free_instance:
+ ctx->inst = NULL;
+ qat_crypto_put_instance(inst);
+ return ret;
+}
+
+static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
+ const u8 *key, unsigned int keylen,
+ int mode)
+{
+ struct qat_alg_ablkcipher_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (ctx->enc_cd)
+ return qat_alg_ablkcipher_rekey(ctx, key, keylen, mode);
+ else
+ return qat_alg_ablkcipher_newkey(ctx, key, keylen, mode);
}
static int qat_alg_ablkcipher_cbc_setkey(struct crypto_ablkcipher *tfm,
@@ -995,11 +1053,23 @@ static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
struct qat_crypto_request *qat_req = ablkcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_bulk_req *msg;
+ struct device *dev = &GET_DEV(ctx->inst->accel_dev);
int ret, ctr = 0;
+ if (req->nbytes == 0)
+ return 0;
+
+ qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE,
+ &qat_req->iv_paddr, GFP_ATOMIC);
+ if (!qat_req->iv)
+ return -ENOMEM;
+
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
+ qat_req->iv_paddr);
return ret;
+ }
msg = &qat_req->req;
*msg = ctx->enc_fw_req;
@@ -1012,18 +1082,29 @@ static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req)
cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
cipher_param->cipher_length = req->nbytes;
cipher_param->cipher_offset = 0;
- memcpy(cipher_param->u.cipher_IV_array, req->info, AES_BLOCK_SIZE);
+ cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
+ memcpy(qat_req->iv, req->info, AES_BLOCK_SIZE);
do {
ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
qat_alg_free_bufl(ctx->inst, qat_req);
+ dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
+ qat_req->iv_paddr);
return -EBUSY;
}
return -EINPROGRESS;
}
+static int qat_alg_ablkcipher_blk_encrypt(struct ablkcipher_request *req)
+{
+ if (req->nbytes % AES_BLOCK_SIZE != 0)
+ return -EINVAL;
+
+ return qat_alg_ablkcipher_encrypt(req);
+}
+
static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
{
struct crypto_ablkcipher *atfm = crypto_ablkcipher_reqtfm(req);
@@ -1032,11 +1113,23 @@ static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
struct qat_crypto_request *qat_req = ablkcipher_request_ctx(req);
struct icp_qat_fw_la_cipher_req_params *cipher_param;
struct icp_qat_fw_la_bulk_req *msg;
+ struct device *dev = &GET_DEV(ctx->inst->accel_dev);
int ret, ctr = 0;
+ if (req->nbytes == 0)
+ return 0;
+
+ qat_req->iv = dma_alloc_coherent(dev, AES_BLOCK_SIZE,
+ &qat_req->iv_paddr, GFP_ATOMIC);
+ if (!qat_req->iv)
+ return -ENOMEM;
+
ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
+ qat_req->iv_paddr);
return ret;
+ }
msg = &qat_req->req;
*msg = ctx->dec_fw_req;
@@ -1049,18 +1142,28 @@ static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req)
cipher_param = (void *)&qat_req->req.serv_specif_rqpars;
cipher_param->cipher_length = req->nbytes;
cipher_param->cipher_offset = 0;
- memcpy(cipher_param->u.cipher_IV_array, req->info, AES_BLOCK_SIZE);
+ cipher_param->u.s.cipher_IV_ptr = qat_req->iv_paddr;
+ memcpy(qat_req->iv, req->info, AES_BLOCK_SIZE);
do {
ret = adf_send_message(ctx->inst->sym_tx, (uint32_t *)msg);
} while (ret == -EAGAIN && ctr++ < 10);
if (ret == -EAGAIN) {
qat_alg_free_bufl(ctx->inst, qat_req);
+ dma_free_coherent(dev, AES_BLOCK_SIZE, qat_req->iv,
+ qat_req->iv_paddr);
return -EBUSY;
}
return -EINPROGRESS;
}
+static int qat_alg_ablkcipher_blk_decrypt(struct ablkcipher_request *req)
+{
+ if (req->nbytes % AES_BLOCK_SIZE != 0)
+ return -EINVAL;
+
+ return qat_alg_ablkcipher_decrypt(req);
+}
static int qat_alg_aead_init(struct crypto_aead *tfm,
enum icp_qat_hw_auth_algo hash,
const char *hash_name)
@@ -1119,7 +1222,6 @@ static int qat_alg_ablkcipher_init(struct crypto_tfm *tfm)
{
struct qat_alg_ablkcipher_ctx *ctx = crypto_tfm_ctx(tfm);
- spin_lock_init(&ctx->lock);
tfm->crt_ablkcipher.reqsize = sizeof(struct qat_crypto_request);
ctx->tfm = tfm;
return 0;
@@ -1221,8 +1323,8 @@ static struct crypto_alg qat_algs[] = { {
.cra_u = {
.ablkcipher = {
.setkey = qat_alg_ablkcipher_cbc_setkey,
- .decrypt = qat_alg_ablkcipher_decrypt,
- .encrypt = qat_alg_ablkcipher_encrypt,
+ .decrypt = qat_alg_ablkcipher_blk_decrypt,
+ .encrypt = qat_alg_ablkcipher_blk_encrypt,
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
@@ -1233,7 +1335,7 @@ static struct crypto_alg qat_algs[] = { {
.cra_driver_name = "qat_aes_ctr",
.cra_priority = 4001,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_ctxsize = sizeof(struct qat_alg_ablkcipher_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
@@ -1265,8 +1367,8 @@ static struct crypto_alg qat_algs[] = { {
.cra_u = {
.ablkcipher = {
.setkey = qat_alg_ablkcipher_xts_setkey,
- .decrypt = qat_alg_ablkcipher_decrypt,
- .encrypt = qat_alg_ablkcipher_encrypt,
+ .decrypt = qat_alg_ablkcipher_blk_decrypt,
+ .encrypt = qat_alg_ablkcipher_blk_encrypt,
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index dc0273fe3620..c77a80020cde 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -88,6 +88,8 @@ struct qat_crypto_request {
struct qat_crypto_request_buffs buf;
void (*cb)(struct icp_qat_fw_la_resp *resp,
struct qat_crypto_request *req);
+ void *iv;
+ dma_addr_t iv_paddr;
};
#endif
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 6b498a90181e..b0b8e3d48aef 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1384,7 +1384,6 @@ MODULE_DEVICE_TABLE(of, sahara_dt_ids);
static int sahara_probe(struct platform_device *pdev)
{
struct sahara_dev *dev;
- struct resource *res;
u32 version;
int irq;
int err;
@@ -1398,8 +1397,7 @@ static int sahara_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, dev);
/* Get the base address */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- dev->regs_base = devm_ioremap_resource(&pdev->dev, res);
+ dev->regs_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(dev->regs_base))
return PTR_ERR(dev->regs_base);
diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
index ce77e38c77e0..518e0e0b11a9 100644
--- a/drivers/crypto/stm32/Makefile
+++ b/drivers/crypto/stm32/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_STM32_CRC) += stm32_crc32.o
+obj-$(CONFIG_CRYPTO_DEV_STM32_CRC) += stm32-crc32.o
obj-$(CONFIG_CRYPTO_DEV_STM32_HASH) += stm32-hash.o
obj-$(CONFIG_CRYPTO_DEV_STM32_CRYP) += stm32-cryp.o
diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 440c9f1bd006..440c9f1bd006 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 29519d1c403f..23061f2bc74b 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -349,7 +349,7 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev,
return -ETIMEDOUT;
if ((hdev->flags & HASH_FLAGS_HMAC) &&
- (hdev->flags & ~HASH_FLAGS_HMAC_KEY)) {
+ (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) {
hdev->flags |= HASH_FLAGS_HMAC_KEY;
stm32_hash_write_key(hdev);
if (stm32_hash_wait_busy(hdev))
@@ -447,8 +447,8 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
dma_async_issue_pending(hdev->dma_lch);
- if (!wait_for_completion_interruptible_timeout(&hdev->dma_completion,
- msecs_to_jiffies(100)))
+ if (!wait_for_completion_timeout(&hdev->dma_completion,
+ msecs_to_jiffies(100)))
err = -ETIMEDOUT;
if (dma_async_is_tx_complete(hdev->dma_lch, cookie,
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index 7b0c42882830..4ab14d58e85b 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -12,7 +12,7 @@
*/
#include "sun4i-ss.h"
-static int sun4i_ss_opti_poll(struct skcipher_request *areq)
+static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
@@ -114,6 +114,29 @@ release_ss:
return err;
}
+
+static int noinline_for_stack sun4i_ss_cipher_poll_fallback(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
+ int err;
+
+ skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
+ skcipher_request_set_callback(subreq, areq->base.flags, NULL,
+ NULL);
+ skcipher_request_set_crypt(subreq, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ if (ctx->mode & SS_DECRYPTION)
+ err = crypto_skcipher_decrypt(subreq);
+ else
+ err = crypto_skcipher_encrypt(subreq);
+ skcipher_request_zero(subreq);
+
+ return err;
+}
+
/* Generic function that support SG with size not multiple of 4 */
static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
{
@@ -140,8 +163,6 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
unsigned int todo;
struct sg_mapping_iter mi, mo;
unsigned int oi, oo; /* offset for in and out */
- char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */
- char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */
unsigned int ob = 0; /* offset in buf */
unsigned int obo = 0; /* offset in bufo*/
unsigned int obl = 0; /* length of data in bufo */
@@ -178,20 +199,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
if (no_chunk == 1 && !need_fallback)
return sun4i_ss_opti_poll(areq);
- if (need_fallback) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, op->fallback_tfm);
- skcipher_request_set_sync_tfm(subreq, op->fallback_tfm);
- skcipher_request_set_callback(subreq, areq->base.flags, NULL,
- NULL);
- skcipher_request_set_crypt(subreq, areq->src, areq->dst,
- areq->cryptlen, areq->iv);
- if (ctx->mode & SS_DECRYPTION)
- err = crypto_skcipher_decrypt(subreq);
- else
- err = crypto_skcipher_encrypt(subreq);
- skcipher_request_zero(subreq);
- return err;
- }
+ if (need_fallback)
+ return sun4i_ss_cipher_poll_fallback(areq);
spin_lock_irqsave(&ss->slock, flags);
@@ -224,6 +233,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
while (oleft) {
if (ileft) {
+ char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */
+
/*
* todo is the number of consecutive 4byte word that we
* can read from current SG
@@ -281,6 +292,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq)
oo = 0;
}
} else {
+ char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */
+
/*
* read obl bytes in bufo, we read at maximum for
* emptying the device
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index fbc7bf9d7380..c9d686a0e805 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -265,11 +265,11 @@ static int init_device(struct device *dev)
* callback must check err and feedback in descriptor header
* for device processing status.
*/
-int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
- void (*callback)(struct device *dev,
- struct talitos_desc *desc,
- void *context, int error),
- void *context)
+static int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
+ void (*callback)(struct device *dev,
+ struct talitos_desc *desc,
+ void *context, int error),
+ void *context)
{
struct talitos_private *priv = dev_get_drvdata(dev);
struct talitos_request *request;
@@ -319,7 +319,21 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
return -EINPROGRESS;
}
-EXPORT_SYMBOL(talitos_submit);
+
+static __be32 get_request_hdr(struct talitos_request *request, bool is_sec1)
+{
+ struct talitos_edesc *edesc;
+
+ if (!is_sec1)
+ return request->desc->hdr;
+
+ if (!request->desc->next_desc)
+ return request->desc->hdr1;
+
+ edesc = container_of(request->desc, struct talitos_edesc, desc);
+
+ return ((struct talitos_desc *)(edesc->buf + edesc->dma_len))->hdr1;
+}
/*
* process what was done, notify callback of error if not
@@ -342,12 +356,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
/* descriptors with their done bits set don't get the error */
rmb();
- if (!is_sec1)
- hdr = request->desc->hdr;
- else if (request->desc->next_desc)
- hdr = (request->desc + 1)->hdr1;
- else
- hdr = request->desc->hdr1;
+ hdr = get_request_hdr(request, is_sec1);
if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
status = 0;
@@ -477,8 +486,14 @@ static u32 current_desc_hdr(struct device *dev, int ch)
}
}
- if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc)
- return (priv->chan[ch].fifo[iter].desc + 1)->hdr;
+ if (priv->chan[ch].fifo[iter].desc->next_desc == cur_desc) {
+ struct talitos_edesc *edesc;
+
+ edesc = container_of(priv->chan[ch].fifo[iter].desc,
+ struct talitos_edesc, desc);
+ return ((struct talitos_desc *)
+ (edesc->buf + edesc->dma_len))->hdr;
+ }
return priv->chan[ch].fifo[iter].desc->hdr;
}
@@ -824,7 +839,11 @@ static void talitos_unregister_rng(struct device *dev)
* HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP
*/
#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1)
+#ifdef CONFIG_CRYPTO_DEV_TALITOS2
#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA512_BLOCK_SIZE)
+#else
+#define TALITOS_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + SHA256_BLOCK_SIZE)
+#endif
#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
struct talitos_ctx {
@@ -948,36 +967,6 @@ badkey:
goto out;
}
-/*
- * talitos_edesc - s/w-extended descriptor
- * @src_nents: number of segments in input scatterlist
- * @dst_nents: number of segments in output scatterlist
- * @icv_ool: whether ICV is out-of-line
- * @iv_dma: dma address of iv for checking continuity and link table
- * @dma_len: length of dma mapped link_tbl space
- * @dma_link_tbl: bus physical address of link_tbl/buf
- * @desc: h/w descriptor
- * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
- * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
- *
- * if decrypting (with authcheck), or either one of src_nents or dst_nents
- * is greater than 1, an integrity check value is concatenated to the end
- * of link_tbl data
- */
-struct talitos_edesc {
- int src_nents;
- int dst_nents;
- bool icv_ool;
- dma_addr_t iv_dma;
- int dma_len;
- dma_addr_t dma_link_tbl;
- struct talitos_desc desc;
- union {
- struct talitos_ptr link_tbl[0];
- u8 buf[0];
- };
-};
-
static void talitos_sg_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct scatterlist *src,
@@ -1008,11 +997,13 @@ static void talitos_sg_unmap(struct device *dev,
static void ipsec_esp_unmap(struct device *dev,
struct talitos_edesc *edesc,
- struct aead_request *areq)
+ struct aead_request *areq, bool encrypt)
{
struct crypto_aead *aead = crypto_aead_reqtfm(areq);
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
unsigned int ivsize = crypto_aead_ivsize(aead);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
bool is_ipsec_esp = edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP;
struct talitos_ptr *civ_ptr = &edesc->desc.ptr[is_ipsec_esp ? 2 : 3];
@@ -1021,8 +1012,8 @@ static void ipsec_esp_unmap(struct device *dev,
DMA_FROM_DEVICE);
unmap_single_talitos_ptr(dev, civ_ptr, DMA_TO_DEVICE);
- talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen,
- areq->assoclen);
+ talitos_sg_unmap(dev, edesc, areq->src, areq->dst,
+ cryptlen + authsize, areq->assoclen);
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
@@ -1032,7 +1023,7 @@ static void ipsec_esp_unmap(struct device *dev,
unsigned int dst_nents = edesc->dst_nents ? : 1;
sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize,
- areq->assoclen + areq->cryptlen - ivsize);
+ areq->assoclen + cryptlen - ivsize);
}
}
@@ -1043,31 +1034,14 @@ static void ipsec_esp_encrypt_done(struct device *dev,
struct talitos_desc *desc, void *context,
int err)
{
- struct talitos_private *priv = dev_get_drvdata(dev);
- bool is_sec1 = has_ftr_sec1(priv);
struct aead_request *areq = context;
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
- unsigned int authsize = crypto_aead_authsize(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
struct talitos_edesc *edesc;
- struct scatterlist *sg;
- void *icvdata;
edesc = container_of(desc, struct talitos_edesc, desc);
- ipsec_esp_unmap(dev, edesc, areq);
-
- /* copy the generated ICV to dst */
- if (edesc->icv_ool) {
- if (is_sec1)
- icvdata = edesc->buf + areq->assoclen + areq->cryptlen;
- else
- icvdata = &edesc->link_tbl[edesc->src_nents +
- edesc->dst_nents + 2];
- sg = sg_last(areq->dst, edesc->dst_nents);
- memcpy((char *)sg_virt(sg) + sg->length - authsize,
- icvdata, authsize);
- }
+ ipsec_esp_unmap(dev, edesc, areq, true);
dma_unmap_single(dev, edesc->iv_dma, ivsize, DMA_TO_DEVICE);
@@ -1084,32 +1058,16 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_edesc *edesc;
- struct scatterlist *sg;
char *oicv, *icv;
- struct talitos_private *priv = dev_get_drvdata(dev);
- bool is_sec1 = has_ftr_sec1(priv);
edesc = container_of(desc, struct talitos_edesc, desc);
- ipsec_esp_unmap(dev, edesc, req);
+ ipsec_esp_unmap(dev, edesc, req, false);
if (!err) {
/* auth check */
- sg = sg_last(req->dst, edesc->dst_nents ? : 1);
- icv = (char *)sg_virt(sg) + sg->length - authsize;
-
- if (edesc->dma_len) {
- if (is_sec1)
- oicv = (char *)&edesc->dma_link_tbl +
- req->assoclen + req->cryptlen;
- else
- oicv = (char *)
- &edesc->link_tbl[edesc->src_nents +
- edesc->dst_nents + 2];
- if (edesc->icv_ool)
- icv = oicv + authsize;
- } else
- oicv = (char *)&edesc->link_tbl[0];
+ oicv = edesc->buf + edesc->dma_len;
+ icv = oicv - authsize;
err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
}
@@ -1128,7 +1086,7 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
edesc = container_of(desc, struct talitos_edesc, desc);
- ipsec_esp_unmap(dev, edesc, req);
+ ipsec_esp_unmap(dev, edesc, req, false);
/* check ICV auth status */
if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
@@ -1145,11 +1103,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
* stop at cryptlen bytes
*/
static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
- unsigned int offset, int cryptlen,
+ unsigned int offset, int datalen, int elen,
struct talitos_ptr *link_tbl_ptr)
{
- int n_sg = sg_count;
+ int n_sg = elen ? sg_count + 1 : sg_count;
int count = 0;
+ int cryptlen = datalen + elen;
while (cryptlen && sg && n_sg--) {
unsigned int len = sg_dma_len(sg);
@@ -1164,11 +1123,20 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count,
if (len > cryptlen)
len = cryptlen;
+ if (datalen > 0 && len > datalen) {
+ to_talitos_ptr(link_tbl_ptr + count,
+ sg_dma_address(sg) + offset, datalen, 0);
+ to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
+ count++;
+ len -= datalen;
+ offset += datalen;
+ }
to_talitos_ptr(link_tbl_ptr + count,
sg_dma_address(sg) + offset, len, 0);
to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0);
count++;
cryptlen -= len;
+ datalen -= len;
offset = 0;
next:
@@ -1178,7 +1146,7 @@ next:
/* tag end of link table */
if (count > 0)
to_talitos_ptr_ext_set(link_tbl_ptr + count - 1,
- DESC_PTR_LNKTBL_RETURN, 0);
+ DESC_PTR_LNKTBL_RET, 0);
return count;
}
@@ -1186,7 +1154,8 @@ next:
static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
unsigned int len, struct talitos_edesc *edesc,
struct talitos_ptr *ptr, int sg_count,
- unsigned int offset, int tbl_off, int elen)
+ unsigned int offset, int tbl_off, int elen,
+ bool force)
{
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
@@ -1196,7 +1165,7 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
return 1;
}
to_talitos_ptr_ext_set(ptr, elen, is_sec1);
- if (sg_count == 1) {
+ if (sg_count == 1 && !force) {
to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
return sg_count;
}
@@ -1204,9 +1173,9 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1);
return sg_count;
}
- sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len + elen,
+ sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen,
&edesc->link_tbl[tbl_off]);
- if (sg_count == 1) {
+ if (sg_count == 1 && !force) {
/* Only one segment now, so no link tbl needed*/
copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1);
return sg_count;
@@ -1224,13 +1193,14 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
unsigned int offset, int tbl_off)
{
return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset,
- tbl_off, 0);
+ tbl_off, 0, false);
}
/*
* fill in and submit ipsec_esp descriptor
*/
static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
+ bool encrypt,
void (*callback)(struct device *dev,
struct talitos_desc *desc,
void *context, int error))
@@ -1240,7 +1210,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
struct talitos_ctx *ctx = crypto_aead_ctx(aead);
struct device *dev = ctx->dev;
struct talitos_desc *desc = &edesc->desc;
- unsigned int cryptlen = areq->cryptlen;
+ unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
unsigned int ivsize = crypto_aead_ivsize(aead);
int tbl_off = 0;
int sg_count, ret;
@@ -1251,6 +1221,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
bool is_ipsec_esp = desc->hdr & DESC_HDR_TYPE_IPSEC_ESP;
struct talitos_ptr *civ_ptr = &desc->ptr[is_ipsec_esp ? 2 : 3];
struct talitos_ptr *ckey_ptr = &desc->ptr[is_ipsec_esp ? 3 : 2];
+ dma_addr_t dma_icv = edesc->dma_link_tbl + edesc->dma_len - authsize;
/* hmac key */
to_talitos_ptr(&desc->ptr[0], ctx->dma_key, ctx->authkeylen, is_sec1);
@@ -1290,7 +1261,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
elen = authsize;
ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4],
- sg_count, areq->assoclen, tbl_off, elen);
+ sg_count, areq->assoclen, tbl_off, elen,
+ false);
if (ret > 1) {
tbl_off += ret;
@@ -1304,55 +1276,32 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE);
}
- ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
- sg_count, areq->assoclen, tbl_off);
-
- if (is_ipsec_esp)
- to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1);
-
- /* ICV data */
- if (ret > 1) {
- tbl_off += ret;
- edesc->icv_ool = true;
- sync_needed = true;
-
- if (is_ipsec_esp) {
- struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
- int offset = (edesc->src_nents + edesc->dst_nents + 2) *
- sizeof(struct talitos_ptr) + authsize;
-
- /* Add an entry to the link table for ICV data */
- to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1);
- to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN,
- is_sec1);
+ if (is_ipsec_esp && encrypt)
+ elen = authsize;
+ else
+ elen = 0;
+ ret = talitos_sg_map_ext(dev, areq->dst, cryptlen, edesc, &desc->ptr[5],
+ sg_count, areq->assoclen, tbl_off, elen,
+ is_ipsec_esp && !encrypt);
+ tbl_off += ret;
- /* icv data follows link tables */
- to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset,
- authsize, is_sec1);
- } else {
- dma_addr_t addr = edesc->dma_link_tbl;
+ if (!encrypt && is_ipsec_esp) {
+ struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
- if (is_sec1)
- addr += areq->assoclen + cryptlen;
- else
- addr += sizeof(struct talitos_ptr) * tbl_off;
+ /* Add an entry to the link table for ICV data */
+ to_talitos_ptr_ext_set(tbl_ptr - 1, 0, is_sec1);
+ to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RET, is_sec1);
- to_talitos_ptr(&desc->ptr[6], addr, authsize, is_sec1);
- }
+ /* icv data follows link tables */
+ to_talitos_ptr(tbl_ptr, dma_icv, authsize, is_sec1);
+ to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1);
+ sync_needed = true;
+ } else if (!encrypt) {
+ to_talitos_ptr(&desc->ptr[6], dma_icv, authsize, is_sec1);
+ sync_needed = true;
} else if (!is_ipsec_esp) {
- ret = talitos_sg_map(dev, areq->dst, authsize, edesc,
- &desc->ptr[6], sg_count, areq->assoclen +
- cryptlen,
- tbl_off);
- if (ret > 1) {
- tbl_off += ret;
- edesc->icv_ool = true;
- sync_needed = true;
- } else {
- edesc->icv_ool = false;
- }
- } else {
- edesc->icv_ool = false;
+ talitos_sg_map(dev, areq->dst, authsize, edesc, &desc->ptr[6],
+ sg_count, areq->assoclen + cryptlen, tbl_off);
}
/* iv out */
@@ -1367,7 +1316,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
- ipsec_esp_unmap(dev, edesc, areq);
+ ipsec_esp_unmap(dev, edesc, areq, encrypt);
kfree(edesc);
}
return ret;
@@ -1435,18 +1384,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
* and space for two sets of ICVs (stashed and generated)
*/
alloc_len = sizeof(struct talitos_edesc);
- if (src_nents || dst_nents) {
+ if (src_nents || dst_nents || !encrypt) {
if (is_sec1)
dma_len = (src_nents ? src_len : 0) +
- (dst_nents ? dst_len : 0);
+ (dst_nents ? dst_len : 0) + authsize;
else
dma_len = (src_nents + dst_nents + 2) *
- sizeof(struct talitos_ptr) + authsize * 2;
+ sizeof(struct talitos_ptr) + authsize;
alloc_len += dma_len;
} else {
dma_len = 0;
- alloc_len += icv_stashing ? authsize : 0;
}
+ alloc_len += icv_stashing ? authsize : 0;
/* if its a ahash, add space for a second desc next to the first one */
if (is_sec1 && !dst)
@@ -1466,15 +1415,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
edesc->dst_nents = dst_nents;
edesc->iv_dma = iv_dma;
edesc->dma_len = dma_len;
- if (dma_len) {
- void *addr = &edesc->link_tbl[0];
-
- if (is_sec1 && !dst)
- addr += sizeof(struct talitos_desc);
- edesc->dma_link_tbl = dma_map_single(dev, addr,
+ if (dma_len)
+ edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
edesc->dma_len,
DMA_BIDIRECTIONAL);
- }
+
return edesc;
}
@@ -1485,9 +1430,10 @@ static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, u8 *iv,
unsigned int authsize = crypto_aead_authsize(authenc);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
unsigned int ivsize = crypto_aead_ivsize(authenc);
+ unsigned int cryptlen = areq->cryptlen - (encrypt ? 0 : authsize);
return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
- iv, areq->assoclen, areq->cryptlen,
+ iv, areq->assoclen, cryptlen,
authsize, ivsize, icv_stashing,
areq->base.flags, encrypt);
}
@@ -1506,7 +1452,7 @@ static int aead_encrypt(struct aead_request *req)
/* set encrypt */
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
- return ipsec_esp(edesc, req, ipsec_esp_encrypt_done);
+ return ipsec_esp(edesc, req, true, ipsec_esp_encrypt_done);
}
static int aead_decrypt(struct aead_request *req)
@@ -1516,17 +1462,15 @@ static int aead_decrypt(struct aead_request *req)
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct talitos_private *priv = dev_get_drvdata(ctx->dev);
struct talitos_edesc *edesc;
- struct scatterlist *sg;
void *icvdata;
- req->cryptlen -= authsize;
-
/* allocate extended descriptor */
edesc = aead_edesc_alloc(req, req->iv, 1, false);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
- if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
+ if ((edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) &&
+ (priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
((!edesc->src_nents && !edesc->dst_nents) ||
priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
@@ -1537,24 +1481,20 @@ static int aead_decrypt(struct aead_request *req)
/* reset integrity check result bits */
- return ipsec_esp(edesc, req, ipsec_esp_decrypt_hwauth_done);
+ return ipsec_esp(edesc, req, false,
+ ipsec_esp_decrypt_hwauth_done);
}
/* Have to check the ICV with software */
edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
/* stash incoming ICV for later cmp with ICV generated by the h/w */
- if (edesc->dma_len)
- icvdata = (char *)&edesc->link_tbl[edesc->src_nents +
- edesc->dst_nents + 2];
- else
- icvdata = &edesc->link_tbl[0];
+ icvdata = edesc->buf + edesc->dma_len;
- sg = sg_last(req->src, edesc->src_nents ? : 1);
+ sg_pcopy_to_buffer(req->src, edesc->src_nents ? : 1, icvdata, authsize,
+ req->assoclen + req->cryptlen - authsize);
- memcpy(icvdata, (char *)sg_virt(sg) + sg->length - authsize, authsize);
-
- return ipsec_esp(edesc, req, ipsec_esp_decrypt_swauth_done);
+ return ipsec_esp(edesc, req, false, ipsec_esp_decrypt_swauth_done);
}
static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
@@ -1605,6 +1545,18 @@ static int ablkcipher_des3_setkey(struct crypto_ablkcipher *cipher,
return ablkcipher_setkey(cipher, key, keylen);
}
+static int ablkcipher_aes_setkey(struct crypto_ablkcipher *cipher,
+ const u8 *key, unsigned int keylen)
+{
+ if (keylen == AES_KEYSIZE_128 || keylen == AES_KEYSIZE_192 ||
+ keylen == AES_KEYSIZE_256)
+ return ablkcipher_setkey(cipher, key, keylen);
+
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+ return -EINVAL;
+}
+
static void common_nonsnoop_unmap(struct device *dev,
struct talitos_edesc *edesc,
struct ablkcipher_request *areq)
@@ -1624,11 +1576,15 @@ static void ablkcipher_done(struct device *dev,
int err)
{
struct ablkcipher_request *areq = context;
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
struct talitos_edesc *edesc;
edesc = container_of(desc, struct talitos_edesc, desc);
common_nonsnoop_unmap(dev, edesc, areq);
+ memcpy(areq->info, ctx->iv, ivsize);
kfree(edesc);
@@ -1723,6 +1679,14 @@ static int ablkcipher_encrypt(struct ablkcipher_request *areq)
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
+ unsigned int blocksize =
+ crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+
+ if (!areq->nbytes)
+ return 0;
+
+ if (areq->nbytes % blocksize)
+ return -EINVAL;
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(areq, true);
@@ -1740,6 +1704,14 @@ static int ablkcipher_decrypt(struct ablkcipher_request *areq)
struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
struct talitos_edesc *edesc;
+ unsigned int blocksize =
+ crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(cipher));
+
+ if (!areq->nbytes)
+ return 0;
+
+ if (areq->nbytes % blocksize)
+ return -EINVAL;
/* allocate extended descriptor */
edesc = ablkcipher_edesc_alloc(areq, false);
@@ -1759,14 +1731,16 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
struct talitos_desc *desc = &edesc->desc;
- struct talitos_desc *desc2 = desc + 1;
+ struct talitos_desc *desc2 = (struct talitos_desc *)
+ (edesc->buf + edesc->dma_len);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
if (desc->next_desc &&
desc->ptr[5].ptr != desc2->ptr[5].ptr)
unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
- talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
+ if (req_ctx->psrc)
+ talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
/* When using hashctx-in, must unmap it. */
if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
@@ -1833,7 +1807,6 @@ static void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
static int common_nonsnoop_hash(struct talitos_edesc *edesc,
struct ahash_request *areq, unsigned int length,
- unsigned int offset,
void (*callback) (struct device *dev,
struct talitos_desc *desc,
void *context, int error))
@@ -1872,9 +1845,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
sg_count = edesc->src_nents ?: 1;
if (is_sec1 && sg_count > 1)
- sg_pcopy_to_buffer(req_ctx->psrc, sg_count,
- edesc->buf + sizeof(struct talitos_desc),
- length, req_ctx->nbuf);
+ sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
else if (length)
sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
DMA_TO_DEVICE);
@@ -1887,7 +1858,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
DMA_TO_DEVICE);
} else {
sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
- &desc->ptr[3], sg_count, offset, 0);
+ &desc->ptr[3], sg_count, 0, 0);
if (sg_count > 1)
sync_needed = true;
}
@@ -1911,7 +1882,8 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
if (is_sec1 && req_ctx->nbuf && length) {
- struct talitos_desc *desc2 = desc + 1;
+ struct talitos_desc *desc2 = (struct talitos_desc *)
+ (edesc->buf + edesc->dma_len);
dma_addr_t next_desc;
memset(desc2, 0, sizeof(*desc2));
@@ -1932,7 +1904,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
DMA_TO_DEVICE);
copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
- &desc2->ptr[3], sg_count, offset, 0);
+ &desc2->ptr[3], sg_count, 0, 0);
if (sg_count > 1)
sync_needed = true;
copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
@@ -2043,7 +2015,6 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
struct device *dev = ctx->dev;
struct talitos_private *priv = dev_get_drvdata(dev);
bool is_sec1 = has_ftr_sec1(priv);
- int offset = 0;
u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
@@ -2083,6 +2054,8 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
sg_chain(req_ctx->bufsl, 2, areq->src);
req_ctx->psrc = req_ctx->bufsl;
} else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
+ int offset;
+
if (nbytes_to_hash > blocksize)
offset = blocksize - req_ctx->nbuf;
else
@@ -2095,7 +2068,8 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
sg_copy_to_buffer(areq->src, nents,
ctx_buf + req_ctx->nbuf, offset);
req_ctx->nbuf += offset;
- req_ctx->psrc = areq->src;
+ req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src,
+ offset);
} else
req_ctx->psrc = areq->src;
@@ -2135,8 +2109,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
if (ctx->keylen && (req_ctx->first || req_ctx->last))
edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
- return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, offset,
- ahash_done);
+ return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
}
static int ahash_update(struct ahash_request *areq)
@@ -2339,7 +2312,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha1),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha1-"
- "cbc-aes-talitos",
+ "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2384,7 +2357,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha1),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha1-"
- "cbc-3des-talitos",
+ "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2427,7 +2400,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha224),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha224-"
- "cbc-aes-talitos",
+ "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2472,7 +2445,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha224),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha224-"
- "cbc-3des-talitos",
+ "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2515,7 +2488,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(sha256),cbc(aes))",
.cra_driver_name = "authenc-hmac-sha256-"
- "cbc-aes-talitos",
+ "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2560,7 +2533,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_name = "authenc(hmac(sha256),"
"cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-sha256-"
- "cbc-3des-talitos",
+ "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2689,7 +2662,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(aes))",
.cra_driver_name = "authenc-hmac-md5-"
- "cbc-aes-talitos",
+ "cbc-aes-talitos-hsna",
.cra_blocksize = AES_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2732,7 +2705,7 @@ static struct talitos_alg_template driver_algs[] = {
.base = {
.cra_name = "authenc(hmac(md5),cbc(des3_ede))",
.cra_driver_name = "authenc-hmac-md5-"
- "cbc-3des-talitos",
+ "cbc-3des-talitos-hsna",
.cra_blocksize = DES3_EDE_BLOCK_SIZE,
.cra_flags = CRYPTO_ALG_ASYNC,
},
@@ -2760,7 +2733,7 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2777,6 +2750,7 @@ static struct talitos_alg_template driver_algs[] = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
@@ -2787,13 +2761,14 @@ static struct talitos_alg_template driver_algs[] = {
.alg.crypto = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-talitos",
- .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_blocksize = 1,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
CRYPTO_ALG_ASYNC,
.cra_ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .setkey = ablkcipher_aes_setkey,
}
},
.desc_hdr_template = DESC_HDR_TYPE_AESU_CTR_NONSNOOP |
@@ -2810,7 +2785,6 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = DES_KEY_SIZE,
.max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
.setkey = ablkcipher_des_setkey,
}
},
@@ -2845,7 +2819,6 @@ static struct talitos_alg_template driver_algs[] = {
.cra_ablkcipher = {
.min_keysize = DES3_EDE_KEY_SIZE,
.max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
.setkey = ablkcipher_des3_setkey,
}
},
@@ -3270,7 +3243,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
alg->cra_priority = t_alg->algt.priority;
else
alg->cra_priority = TALITOS_CRA_PRIORITY;
- alg->cra_alignmask = 0;
+ if (has_ftr_sec1(priv))
+ alg->cra_alignmask = 3;
+ else
+ alg->cra_alignmask = 0;
alg->cra_ctxsize = sizeof(struct talitos_ctx);
alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY;
@@ -3418,7 +3394,7 @@ static int talitos_probe(struct platform_device *ofdev)
if (err)
goto err_out;
- if (of_device_is_compatible(np, "fsl,sec1.0")) {
+ if (has_ftr_sec1(priv)) {
if (priv->num_channels == 1)
tasklet_init(&priv->done_task[0], talitos1_done_ch0,
(unsigned long)dev);
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index a65a63e0d6c1..1469b956948a 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -1,31 +1,8 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Freescale SEC (talitos) device register and descriptor header defines
*
* Copyright (c) 2006-2011 Freescale Semiconductor, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#define TALITOS_TIMEOUT 100000
@@ -65,6 +42,34 @@ struct talitos_desc {
#define TALITOS_DESC_SIZE (sizeof(struct talitos_desc) - sizeof(__be32))
+/*
+ * talitos_edesc - s/w-extended descriptor
+ * @src_nents: number of segments in input scatterlist
+ * @dst_nents: number of segments in output scatterlist
+ * @iv_dma: dma address of iv for checking continuity and link table
+ * @dma_len: length of dma mapped link_tbl space
+ * @dma_link_tbl: bus physical address of link_tbl/buf
+ * @desc: h/w descriptor
+ * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
+ * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
+ *
+ * if decrypting (with authcheck), or either one of src_nents or dst_nents
+ * is greater than 1, an integrity check value is concatenated to the end
+ * of link_tbl data
+ */
+struct talitos_edesc {
+ int src_nents;
+ int dst_nents;
+ dma_addr_t iv_dma;
+ int dma_len;
+ dma_addr_t dma_link_tbl;
+ struct talitos_desc desc;
+ union {
+ struct talitos_ptr link_tbl[0];
+ u8 buf[0];
+ };
+};
+
/**
* talitos_request - descriptor submission request
* @desc: descriptor pointer (kernel virtual)
@@ -150,12 +155,6 @@ struct talitos_private {
bool rng_registered;
};
-extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
- void (*callback)(struct device *dev,
- struct talitos_desc *desc,
- void *context, int error),
- void *context);
-
/* .features flag */
#define TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT 0x00000001
#define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
@@ -170,13 +169,11 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
*/
static inline bool has_ftr_sec1(struct talitos_private *priv)
{
-#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2)
- return priv->features & TALITOS_FTR_SEC1 ? true : false;
-#elif defined(CONFIG_CRYPTO_DEV_TALITOS1)
- return true;
-#else
- return false;
-#endif
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS1) &&
+ IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS2))
+ return priv->features & TALITOS_FTR_SEC1;
+
+ return IS_ENABLED(CONFIG_CRYPTO_DEV_TALITOS1);
}
/*
@@ -412,5 +409,5 @@ static inline bool has_ftr_sec1(struct talitos_private *priv)
/* link table extent field bits */
#define DESC_PTR_LNKTBL_JUMP 0x80
-#define DESC_PTR_LNKTBL_RETURN 0x02
+#define DESC_PTR_LNKTBL_RET 0x02
#define DESC_PTR_LNKTBL_NEXT 0x01
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index c7e515a1bc97..d88084447f1c 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -7,64 +7,52 @@
* Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
*/
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
#include "aesp8-ppc.h"
struct p8_aes_cbc_ctx {
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
struct aes_key enc_key;
struct aes_key dec_key;
};
-static int p8_aes_cbc_init(struct crypto_tfm *tfm)
+static int p8_aes_cbc_init(struct crypto_skcipher *tfm)
{
- const char *alg = crypto_tfm_alg_name(tfm);
- struct crypto_sync_skcipher *fallback;
- struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
-
- fallback = crypto_alloc_sync_skcipher(alg, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
+ fallback = crypto_alloc_skcipher("cbc(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(fallback)) {
- printk(KERN_ERR
- "Failed to allocate transformation for '%s': %ld\n",
- alg, PTR_ERR(fallback));
+ pr_err("Failed to allocate cbc(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
return PTR_ERR(fallback);
}
- crypto_sync_skcipher_set_flags(
- fallback,
- crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
ctx->fallback = fallback;
-
return 0;
}
-static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
+static void p8_aes_cbc_exit(struct crypto_skcipher *tfm)
{
- struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (ctx->fallback) {
- crypto_free_sync_skcipher(ctx->fallback);
- ctx->fallback = NULL;
- }
+ crypto_free_skcipher(ctx->fallback);
}
-static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
- struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
preempt_disable();
pagefault_disable();
@@ -75,108 +63,71 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
pagefault_enable();
preempt_enable();
- ret |= crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
return ret ? -EINVAL : 0;
}
-static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int ret;
- struct blkcipher_walk walk;
- struct p8_aes_cbc_ctx *ctx =
- crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
if (!crypto_simd_usable()) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
- skcipher_request_set_sync_tfm(req, ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- ret = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- } else {
- blkcipher_walk_init(&walk, dst, src, nbytes);
- ret = blkcipher_walk_virt(desc, &walk);
- while ((nbytes = walk.nbytes)) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_cbc_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- nbytes & AES_BLOCK_MASK,
- &ctx->enc_key, walk.iv, 1);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- nbytes &= AES_BLOCK_SIZE - 1;
- ret = blkcipher_walk_done(desc, &walk, nbytes);
- }
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return enc ? crypto_skcipher_encrypt(subreq) :
+ crypto_skcipher_decrypt(subreq);
}
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_cbc_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ enc ? &ctx->enc_key : &ctx->dec_key,
+ walk.iv, enc);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
+ }
return ret;
}
-static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int p8_aes_cbc_encrypt(struct skcipher_request *req)
{
- int ret;
- struct blkcipher_walk walk;
- struct p8_aes_cbc_ctx *ctx =
- crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-
- if (!crypto_simd_usable()) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
- skcipher_request_set_sync_tfm(req, ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- ret = crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
- } else {
- blkcipher_walk_init(&walk, dst, src, nbytes);
- ret = blkcipher_walk_virt(desc, &walk);
- while ((nbytes = walk.nbytes)) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_cbc_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- nbytes & AES_BLOCK_MASK,
- &ctx->dec_key, walk.iv, 0);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- nbytes &= AES_BLOCK_SIZE - 1;
- ret = blkcipher_walk_done(desc, &walk, nbytes);
- }
- }
-
- return ret;
+ return p8_aes_cbc_crypt(req, 1);
}
+static int p8_aes_cbc_decrypt(struct skcipher_request *req)
+{
+ return p8_aes_cbc_crypt(req, 0);
+}
-struct crypto_alg p8_aes_cbc_alg = {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "p8_aes_cbc",
- .cra_module = THIS_MODULE,
- .cra_priority = 2000,
- .cra_type = &crypto_blkcipher_type,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
- .cra_alignmask = 0,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
- .cra_init = p8_aes_cbc_init,
- .cra_exit = p8_aes_cbc_exit,
- .cra_blkcipher = {
- .ivsize = AES_BLOCK_SIZE,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = p8_aes_cbc_setkey,
- .encrypt = p8_aes_cbc_encrypt,
- .decrypt = p8_aes_cbc_decrypt,
- },
+struct skcipher_alg p8_aes_cbc_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "p8_aes_cbc",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
+ .setkey = p8_aes_cbc_setkey,
+ .encrypt = p8_aes_cbc_encrypt,
+ .decrypt = p8_aes_cbc_decrypt,
+ .init = p8_aes_cbc_init,
+ .exit = p8_aes_cbc_exit,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
};
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index dd017ef42fa9..79ba062ee1c1 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -7,62 +7,51 @@
* Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
*/
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
#include "aesp8-ppc.h"
struct p8_aes_ctr_ctx {
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
struct aes_key enc_key;
};
-static int p8_aes_ctr_init(struct crypto_tfm *tfm)
+static int p8_aes_ctr_init(struct crypto_skcipher *tfm)
{
- const char *alg = crypto_tfm_alg_name(tfm);
- struct crypto_sync_skcipher *fallback;
- struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
- fallback = crypto_alloc_sync_skcipher(alg, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ fallback = crypto_alloc_skcipher("ctr(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(fallback)) {
- printk(KERN_ERR
- "Failed to allocate transformation for '%s': %ld\n",
- alg, PTR_ERR(fallback));
+ pr_err("Failed to allocate ctr(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
return PTR_ERR(fallback);
}
- crypto_sync_skcipher_set_flags(
- fallback,
- crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
ctx->fallback = fallback;
-
return 0;
}
-static void p8_aes_ctr_exit(struct crypto_tfm *tfm)
+static void p8_aes_ctr_exit(struct crypto_skcipher *tfm)
{
- struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (ctx->fallback) {
- crypto_free_sync_skcipher(ctx->fallback);
- ctx->fallback = NULL;
- }
+ crypto_free_skcipher(ctx->fallback);
}
-static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
- struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
preempt_disable();
pagefault_disable();
@@ -72,13 +61,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
pagefault_enable();
preempt_enable();
- ret |= crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
return ret ? -EINVAL : 0;
}
-static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
- struct blkcipher_walk *walk)
+static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx,
+ struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[AES_BLOCK_SIZE];
@@ -98,77 +87,63 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
-static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int p8_aes_ctr_crypt(struct skcipher_request *req)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int ret;
- u64 inc;
- struct blkcipher_walk walk;
- struct p8_aes_ctr_ctx *ctx =
- crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
if (!crypto_simd_usable()) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
- skcipher_request_set_sync_tfm(req, ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- ret = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- } else {
- blkcipher_walk_init(&walk, dst, src, nbytes);
- ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
- walk.dst.virt.addr,
- (nbytes &
- AES_BLOCK_MASK) /
- AES_BLOCK_SIZE,
- &ctx->enc_key,
- walk.iv);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- /* We need to update IV mostly for last bytes/round */
- inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE;
- if (inc > 0)
- while (inc--)
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
-
- nbytes &= AES_BLOCK_SIZE - 1;
- ret = blkcipher_walk_done(desc, &walk, nbytes);
- }
- if (walk.nbytes) {
- p8_aes_ctr_final(ctx, &walk);
- ret = blkcipher_walk_done(desc, &walk, 0);
- }
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return crypto_skcipher_encrypt(subreq);
}
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes / AES_BLOCK_SIZE,
+ &ctx->enc_key, walk.iv);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ do {
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE);
+
+ ret = skcipher_walk_done(&walk, nbytes);
+ }
+ if (nbytes) {
+ p8_aes_ctr_final(ctx, &walk);
+ ret = skcipher_walk_done(&walk, 0);
+ }
return ret;
}
-struct crypto_alg p8_aes_ctr_alg = {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "p8_aes_ctr",
- .cra_module = THIS_MODULE,
- .cra_priority = 2000,
- .cra_type = &crypto_blkcipher_type,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
- .cra_alignmask = 0,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
- .cra_init = p8_aes_ctr_init,
- .cra_exit = p8_aes_ctr_exit,
- .cra_blkcipher = {
- .ivsize = AES_BLOCK_SIZE,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = p8_aes_ctr_setkey,
- .encrypt = p8_aes_ctr_crypt,
- .decrypt = p8_aes_ctr_crypt,
- },
+struct skcipher_alg p8_aes_ctr_alg = {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "p8_aes_ctr",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
+ .setkey = p8_aes_ctr_setkey,
+ .encrypt = p8_aes_ctr_crypt,
+ .decrypt = p8_aes_ctr_crypt,
+ .init = p8_aes_ctr_init,
+ .exit = p8_aes_ctr_exit,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
};
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 536167e737a0..49f7258045fa 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -7,67 +7,56 @@
* Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
*/
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
#include <asm/simd.h>
#include <asm/switch_to.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
-#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
-#include <crypto/skcipher.h>
#include "aesp8-ppc.h"
struct p8_aes_xts_ctx {
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
struct aes_key enc_key;
struct aes_key dec_key;
struct aes_key tweak_key;
};
-static int p8_aes_xts_init(struct crypto_tfm *tfm)
+static int p8_aes_xts_init(struct crypto_skcipher *tfm)
{
- const char *alg = crypto_tfm_alg_name(tfm);
- struct crypto_sync_skcipher *fallback;
- struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
- fallback = crypto_alloc_sync_skcipher(alg, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ fallback = crypto_alloc_skcipher("xts(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
if (IS_ERR(fallback)) {
- printk(KERN_ERR
- "Failed to allocate transformation for '%s': %ld\n",
- alg, PTR_ERR(fallback));
+ pr_err("Failed to allocate xts(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
return PTR_ERR(fallback);
}
- crypto_sync_skcipher_set_flags(
- fallback,
- crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
ctx->fallback = fallback;
-
return 0;
}
-static void p8_aes_xts_exit(struct crypto_tfm *tfm)
+static void p8_aes_xts_exit(struct crypto_skcipher *tfm)
{
- struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (ctx->fallback) {
- crypto_free_sync_skcipher(ctx->fallback);
- ctx->fallback = NULL;
- }
+ crypto_free_skcipher(ctx->fallback);
}
-static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
- struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- ret = xts_check_key(tfm, key, keylen);
+ ret = xts_verify_key(tfm, key, keylen);
if (ret)
return ret;
@@ -81,100 +70,90 @@ static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
pagefault_enable();
preempt_enable();
- ret |= crypto_sync_skcipher_setkey(ctx->fallback, key, keylen);
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
return ret ? -EINVAL : 0;
}
-static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes, int enc)
+static int p8_aes_xts_crypt(struct skcipher_request *req, int enc)
{
- int ret;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
u8 tweak[AES_BLOCK_SIZE];
- u8 *iv;
- struct blkcipher_walk walk;
- struct p8_aes_xts_ctx *ctx =
- crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
+ int ret;
if (!crypto_simd_usable()) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
- skcipher_request_set_sync_tfm(req, ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
- } else {
- blkcipher_walk_init(&walk, dst, src, nbytes);
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return enc ? crypto_skcipher_encrypt(subreq) :
+ crypto_skcipher_decrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
- ret = blkcipher_walk_virt(desc, &walk);
+ aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key);
+
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ while ((nbytes = walk.nbytes) != 0) {
preempt_disable();
pagefault_disable();
enable_kernel_vsx();
-
- iv = walk.iv;
- memset(tweak, 0, AES_BLOCK_SIZE);
- aes_p8_encrypt(iv, tweak, &ctx->tweak_key);
-
+ if (enc)
+ aes_p8_xts_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ &ctx->enc_key, NULL, tweak);
+ else
+ aes_p8_xts_decrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ &ctx->dec_key, NULL, tweak);
disable_kernel_vsx();
pagefault_enable();
preempt_enable();
- while ((nbytes = walk.nbytes)) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- if (enc)
- aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
- nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak);
- else
- aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
- nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- nbytes &= AES_BLOCK_SIZE - 1;
- ret = blkcipher_walk_done(desc, &walk, nbytes);
- }
+ ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
return ret;
}
-static int p8_aes_xts_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int p8_aes_xts_encrypt(struct skcipher_request *req)
{
- return p8_aes_xts_crypt(desc, dst, src, nbytes, 1);
+ return p8_aes_xts_crypt(req, 1);
}
-static int p8_aes_xts_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int p8_aes_xts_decrypt(struct skcipher_request *req)
{
- return p8_aes_xts_crypt(desc, dst, src, nbytes, 0);
+ return p8_aes_xts_crypt(req, 0);
}
-struct crypto_alg p8_aes_xts_alg = {
- .cra_name = "xts(aes)",
- .cra_driver_name = "p8_aes_xts",
- .cra_module = THIS_MODULE,
- .cra_priority = 2000,
- .cra_type = &crypto_blkcipher_type,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
- .cra_alignmask = 0,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
- .cra_init = p8_aes_xts_init,
- .cra_exit = p8_aes_xts_exit,
- .cra_blkcipher = {
- .ivsize = AES_BLOCK_SIZE,
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .setkey = p8_aes_xts_setkey,
- .encrypt = p8_aes_xts_encrypt,
- .decrypt = p8_aes_xts_decrypt,
- }
+struct skcipher_alg p8_aes_xts_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "p8_aes_xts",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
+ .setkey = p8_aes_xts_setkey,
+ .encrypt = p8_aes_xts_encrypt,
+ .decrypt = p8_aes_xts_decrypt,
+ .init = p8_aes_xts_init,
+ .exit = p8_aes_xts_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
index 349646b73754..01774a4d26a2 100644
--- a/drivers/crypto/vmx/aesp8-ppc.h
+++ b/drivers/crypto/vmx/aesp8-ppc.h
@@ -2,8 +2,6 @@
#include <linux/types.h>
#include <crypto/aes.h>
-#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
-
struct aes_key {
u8 key[AES_MAX_KEYLENGTH];
int rounds;
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
index 9c6b5c1d6a1a..db874367b602 100644
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -1286,6 +1286,24 @@ ___
#########################################################################
{{{ # CTR procedure[s] #
+
+####################### WARNING: Here be dragons! #######################
+#
+# This code is written as 'ctr32', based on a 32-bit counter used
+# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
+# a 128-bit counter.
+#
+# This leads to subtle changes from the upstream code: the counter
+# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
+# both the bulk (8 blocks at a time) path, and in the individual block
+# path. Be aware of this when doing updates.
+#
+# See:
+# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
+# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
+# https://github.com/openssl/openssl/pull/8942
+#
+#########################################################################
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
@@ -1357,7 +1375,7 @@ Loop_ctr32_enc:
addi $idx,$idx,16
bdnz Loop_ctr32_enc
- vadduqm $ivec,$ivec,$one
+ vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
vmr $dat,$inptail
lvx $inptail,0,$inp
addi $inp,$inp,16
@@ -1501,7 +1519,7 @@ Load_ctr32_enc_key:
$SHL $len,$len,4
vadduqm $out1,$ivec,$one # counter values ...
- vadduqm $out2,$ivec,$two
+ vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
le?li $idx,8
vadduqm $out3,$out1,$two
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
index 6c4c77f4e159..3e0335fb406c 100644
--- a/drivers/crypto/vmx/vmx.c
+++ b/drivers/crypto/vmx/vmx.c
@@ -15,54 +15,58 @@
#include <linux/crypto.h>
#include <asm/cputable.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
extern struct shash_alg p8_ghash_alg;
extern struct crypto_alg p8_aes_alg;
-extern struct crypto_alg p8_aes_cbc_alg;
-extern struct crypto_alg p8_aes_ctr_alg;
-extern struct crypto_alg p8_aes_xts_alg;
-static struct crypto_alg *algs[] = {
- &p8_aes_alg,
- &p8_aes_cbc_alg,
- &p8_aes_ctr_alg,
- &p8_aes_xts_alg,
- NULL,
-};
+extern struct skcipher_alg p8_aes_cbc_alg;
+extern struct skcipher_alg p8_aes_ctr_alg;
+extern struct skcipher_alg p8_aes_xts_alg;
static int __init p8_init(void)
{
- int ret = 0;
- struct crypto_alg **alg_it;
+ int ret;
- for (alg_it = algs; *alg_it; alg_it++) {
- ret = crypto_register_alg(*alg_it);
- printk(KERN_INFO "crypto_register_alg '%s' = %d\n",
- (*alg_it)->cra_name, ret);
- if (ret) {
- for (alg_it--; alg_it >= algs; alg_it--)
- crypto_unregister_alg(*alg_it);
- break;
- }
- }
+ ret = crypto_register_shash(&p8_ghash_alg);
if (ret)
- return ret;
+ goto err;
- ret = crypto_register_shash(&p8_ghash_alg);
- if (ret) {
- for (alg_it = algs; *alg_it; alg_it++)
- crypto_unregister_alg(*alg_it);
- }
+ ret = crypto_register_alg(&p8_aes_alg);
+ if (ret)
+ goto err_unregister_ghash;
+
+ ret = crypto_register_skcipher(&p8_aes_cbc_alg);
+ if (ret)
+ goto err_unregister_aes;
+
+ ret = crypto_register_skcipher(&p8_aes_ctr_alg);
+ if (ret)
+ goto err_unregister_aes_cbc;
+
+ ret = crypto_register_skcipher(&p8_aes_xts_alg);
+ if (ret)
+ goto err_unregister_aes_ctr;
+
+ return 0;
+
+err_unregister_aes_ctr:
+ crypto_unregister_skcipher(&p8_aes_ctr_alg);
+err_unregister_aes_cbc:
+ crypto_unregister_skcipher(&p8_aes_cbc_alg);
+err_unregister_aes:
+ crypto_unregister_alg(&p8_aes_alg);
+err_unregister_ghash:
+ crypto_unregister_shash(&p8_ghash_alg);
+err:
return ret;
}
static void __exit p8_exit(void)
{
- struct crypto_alg **alg_it;
-
- for (alg_it = algs; *alg_it; alg_it++) {
- printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name);
- crypto_unregister_alg(*alg_it);
- }
+ crypto_unregister_skcipher(&p8_aes_xts_alg);
+ crypto_unregister_skcipher(&p8_aes_ctr_alg);
+ crypto_unregister_skcipher(&p8_aes_cbc_alg);
+ crypto_unregister_alg(&p8_aes_alg);
crypto_unregister_shash(&p8_ghash_alg);
}
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index 263bee76ef0d..6b8c4c458e8a 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -718,12 +718,13 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
{
struct jz4780_dma_dev *jzdma = data;
unsigned int nb_channels = jzdma->soc_data->nb_channels;
- uint32_t pending, dmac;
+ unsigned long pending;
+ uint32_t dmac;
int i;
pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
- for_each_set_bit(i, (unsigned long *)&pending, nb_channels) {
+ for_each_set_bit(i, &pending, nb_channels) {
if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]))
pending &= ~BIT(i);
}
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 99d9f431ae2c..4ec84a633bd3 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -703,7 +703,7 @@ static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
spin_lock_irqsave(&sdma->channel_0_lock, flags);
bd0->mode.command = C0_SETPM;
- bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
bd0->mode.count = size / 2;
bd0->buffer_addr = buf_phys;
bd0->ext_buffer_addr = address;
@@ -1025,7 +1025,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
context->gReg[7] = sdmac->watermark_level;
bd0->mode.command = C0_SETDM;
- bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
bd0->mode.count = sizeof(*context) / 4;
bd0->buffer_addr = sdma->context_phys;
bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
@@ -2096,27 +2096,6 @@ static int sdma_probe(struct platform_device *pdev)
if (pdata && pdata->script_addrs)
sdma_add_scripts(sdma, pdata->script_addrs);
- if (pdata) {
- ret = sdma_get_firmware(sdma, pdata->fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
- } else {
- /*
- * Because that device tree does not encode ROM script address,
- * the RAM script in firmware is mandatory for device tree
- * probe, otherwise it fails.
- */
- ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
- &fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware name\n");
- else {
- ret = sdma_get_firmware(sdma, fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
- }
- }
-
sdma->dma_device.dev = &pdev->dev;
sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
@@ -2161,6 +2140,33 @@ static int sdma_probe(struct platform_device *pdev)
of_node_put(spba_bus);
}
+ /*
+ * Kick off firmware loading as the very last step:
+ * attempt to load firmware only if we're not on the error path, because
+ * the firmware callback requires a fully functional and allocated sdma
+ * instance.
+ */
+ if (pdata) {
+ ret = sdma_get_firmware(sdma, pdata->fw_name);
+ if (ret)
+ dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
+ } else {
+ /*
+ * Because that device tree does not encode ROM script address,
+ * the RAM script in firmware is mandatory for device tree
+ * probe, otherwise it fails.
+ */
+ ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+ &fw_name);
+ if (ret) {
+ dev_warn(&pdev->dev, "failed to get firmware name\n");
+ } else {
+ ret = sdma_get_firmware(sdma, fw_name);
+ if (ret)
+ dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
+ }
+ }
+
return 0;
err_register:
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 4b43844f6af5..8e90a405939d 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -799,6 +799,9 @@ static u32 process_channel_irqs(struct bam_device *bdev)
/* Number of bytes available to read */
avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
+ if (offset < bchan->head)
+ avail--;
+
list_for_each_entry_safe(async_desc, tmp,
&bchan->desc_list, desc_node) {
/* Not enough data to read */
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4b7cf7bc0ded..ad3b1f4866b3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -52,6 +52,7 @@ struct efi __read_mostly efi = {
.mem_attr_table = EFI_INVALID_TABLE_ADDR,
.rng_seed = EFI_INVALID_TABLE_ADDR,
.tpm_log = EFI_INVALID_TABLE_ADDR,
+ .tpm_final_log = EFI_INVALID_TABLE_ADDR,
.mem_reserve = EFI_INVALID_TABLE_ADDR,
};
EXPORT_SYMBOL(efi);
@@ -484,6 +485,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
{LINUX_EFI_RANDOM_SEED_TABLE_GUID, "RNG", &efi.rng_seed},
{LINUX_EFI_TPM_EVENT_LOG_GUID, "TPMEventLog", &efi.tpm_log},
+ {LINUX_EFI_TPM_FINAL_LOG_GUID, "TPMFinalLog", &efi.tpm_final_log},
{LINUX_EFI_MEMRESERVE_TABLE_GUID, "MEMRESERVE", &efi.mem_reserve},
{NULL_GUID, NULL, NULL},
};
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index e4610e72b78f..1db780c0f07b 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -926,3 +926,18 @@ free_map:
fail:
return status;
}
+
+void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid)
+{
+ efi_config_table_t *tables = (efi_config_table_t *)sys_table->tables;
+ int i;
+
+ for (i = 0; i < sys_table->nr_tables; i++) {
+ if (efi_guidcmp(tables[i].guid, guid) != 0)
+ continue;
+
+ return (void *)tables[i].table;
+ }
+
+ return NULL;
+}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 1b1dfcaa6fb9..7f1556fd867d 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -65,6 +65,8 @@ efi_status_t check_platform_features(efi_system_table_t *sys_table_arg);
efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg);
+void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid);
+
/* Helper macros for the usual case of using simple C variables: */
#ifndef fdt_setprop_inplace_var
#define fdt_setprop_inplace_var(fdt, node_offset, name, var) \
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 5440ba17a1c5..0bf0190917e0 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -363,26 +363,17 @@ fail:
void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size)
{
- efi_guid_t fdt_guid = DEVICE_TREE_GUID;
- efi_config_table_t *tables;
- int i;
+ void *fdt;
- tables = (efi_config_table_t *)sys_table->tables;
+ fdt = get_efi_config_table(sys_table, DEVICE_TREE_GUID);
- for (i = 0; i < sys_table->nr_tables; i++) {
- void *fdt;
+ if (!fdt)
+ return NULL;
- if (efi_guidcmp(tables[i].guid, fdt_guid) != 0)
- continue;
-
- fdt = (void *)tables[i].table;
- if (fdt_check_header(fdt) != 0) {
- pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
- return NULL;
- }
- *fdt_size = fdt_totalsize(fdt);
- return fdt;
+ if (fdt_check_header(fdt) != 0) {
+ pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+ return NULL;
}
-
- return NULL;
+ *fdt_size = fdt_totalsize(fdt);
+ return fdt;
}
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index 5bd04f75d8d6..eb9af83e4d59 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -57,31 +57,40 @@ void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg)
#endif
-static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
+void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
{
efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
efi_status_t status;
efi_physical_addr_t log_location = 0, log_last_entry = 0;
struct linux_efi_tpm_eventlog *log_tbl = NULL;
+ struct efi_tcg2_final_events_table *final_events_table;
unsigned long first_entry_addr, last_entry_addr;
size_t log_size, last_entry_size;
efi_bool_t truncated;
+ int version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
void *tcg2_protocol = NULL;
+ int final_events_size = 0;
status = efi_call_early(locate_protocol, &tcg2_guid, NULL,
&tcg2_protocol);
if (status != EFI_SUCCESS)
return;
- status = efi_call_proto(efi_tcg2_protocol, get_event_log, tcg2_protocol,
- EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2,
- &log_location, &log_last_entry, &truncated);
- if (status != EFI_SUCCESS)
- return;
+ status = efi_call_proto(efi_tcg2_protocol, get_event_log,
+ tcg2_protocol, version, &log_location,
+ &log_last_entry, &truncated);
+
+ if (status != EFI_SUCCESS || !log_location) {
+ version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+ status = efi_call_proto(efi_tcg2_protocol, get_event_log,
+ tcg2_protocol, version, &log_location,
+ &log_last_entry, &truncated);
+ if (status != EFI_SUCCESS || !log_location)
+ return;
+
+ }
- if (!log_location)
- return;
first_entry_addr = (unsigned long) log_location;
/*
@@ -96,8 +105,23 @@ static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
* We need to calculate its size to deduce the full size of
* the logs.
*/
- last_entry_size = sizeof(struct tcpa_event) +
- ((struct tcpa_event *) last_entry_addr)->event_size;
+ if (version == EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
+ /*
+ * The TCG2 log format has variable length entries,
+ * and the information to decode the hash algorithms
+ * back into a size is contained in the first entry -
+ * pass a pointer to the final entry (to calculate its
+ * size) and the first entry (so we know how long each
+ * digest is)
+ */
+ last_entry_size =
+ __calc_tpm2_event_size((void *)last_entry_addr,
+ (void *)(long)log_location,
+ false);
+ } else {
+ last_entry_size = sizeof(struct tcpa_event) +
+ ((struct tcpa_event *) last_entry_addr)->event_size;
+ }
log_size = log_last_entry - log_location + last_entry_size;
}
@@ -112,9 +136,37 @@ static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
return;
}
+ /*
+ * Figure out whether any events have already been logged to the
+ * final events structure, and if so how much space they take up
+ */
+ final_events_table = get_efi_config_table(sys_table_arg,
+ LINUX_EFI_TPM_FINAL_LOG_GUID);
+ if (final_events_table && final_events_table->nr_events) {
+ struct tcg_pcr_event2_head *header;
+ int offset;
+ void *data;
+ int event_size;
+ int i = final_events_table->nr_events;
+
+ data = (void *)final_events_table;
+ offset = sizeof(final_events_table->version) +
+ sizeof(final_events_table->nr_events);
+
+ while (i > 0) {
+ header = data + offset + final_events_size;
+ event_size = __calc_tpm2_event_size(header,
+ (void *)(long)log_location,
+ false);
+ final_events_size += event_size;
+ i--;
+ }
+ }
+
memset(log_tbl, 0, sizeof(*log_tbl) + log_size);
log_tbl->size = log_size;
- log_tbl->version = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
+ log_tbl->final_events_preboot_size = final_events_size;
+ log_tbl->version = version;
memcpy(log_tbl->log, (void *) first_entry_addr, log_size);
status = efi_call_early(install_configuration_table,
@@ -126,9 +178,3 @@ static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
err_free:
efi_call_early(free_pool, log_tbl);
}
-
-void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table_arg)
-{
- /* Only try to retrieve the logs in 1.2 format. */
- efi_retrieve_tpm2_eventlog_1_2(sys_table_arg);
-}
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index 3a689b40ccc0..1d3f5ca3eaaf 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -4,11 +4,34 @@
* Thiebaud Weksteen <tweek@google.com>
*/
+#define TPM_MEMREMAP(start, size) early_memremap(start, size)
+#define TPM_MEMUNMAP(start, size) early_memunmap(start, size)
+
+#include <asm/early_ioremap.h>
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/memblock.h>
+#include <linux/tpm_eventlog.h>
-#include <asm/early_ioremap.h>
+int efi_tpm_final_log_size;
+EXPORT_SYMBOL(efi_tpm_final_log_size);
+
+static int tpm2_calc_event_log_size(void *data, int count, void *size_info)
+{
+ struct tcg_pcr_event2_head *header;
+ int event_size, size = 0;
+
+ while (count > 0) {
+ header = data + size;
+ event_size = __calc_tpm2_event_size(header, size_info, true);
+ if (event_size == 0)
+ return -1;
+ size += event_size;
+ count--;
+ }
+
+ return size;
+}
/*
* Reserve the memory associated with the TPM Event Log configuration table.
@@ -16,22 +39,54 @@
int __init efi_tpm_eventlog_init(void)
{
struct linux_efi_tpm_eventlog *log_tbl;
+ struct efi_tcg2_final_events_table *final_tbl;
unsigned int tbl_size;
+ int ret = 0;
- if (efi.tpm_log == EFI_INVALID_TABLE_ADDR)
+ if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) {
+ /*
+ * We can't calculate the size of the final events without the
+ * first entry in the TPM log, so bail here.
+ */
return 0;
+ }
log_tbl = early_memremap(efi.tpm_log, sizeof(*log_tbl));
if (!log_tbl) {
pr_err("Failed to map TPM Event Log table @ 0x%lx\n",
- efi.tpm_log);
+ efi.tpm_log);
efi.tpm_log = EFI_INVALID_TABLE_ADDR;
return -ENOMEM;
}
tbl_size = sizeof(*log_tbl) + log_tbl->size;
memblock_reserve(efi.tpm_log, tbl_size);
+
+ if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR)
+ goto out;
+
+ final_tbl = early_memremap(efi.tpm_final_log, sizeof(*final_tbl));
+
+ if (!final_tbl) {
+ pr_err("Failed to map TPM Final Event Log table @ 0x%lx\n",
+ efi.tpm_final_log);
+ efi.tpm_final_log = EFI_INVALID_TABLE_ADDR;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ tbl_size = tpm2_calc_event_log_size((void *)efi.tpm_final_log
+ + sizeof(final_tbl->version)
+ + sizeof(final_tbl->nr_events),
+ final_tbl->nr_events,
+ log_tbl->log);
+ memblock_reserve((unsigned long)final_tbl,
+ tbl_size + sizeof(*final_tbl));
+ early_memunmap(final_tbl, sizeof(*final_tbl));
+ efi_tpm_final_log_size = tbl_size;
+
+out:
early_memunmap(log_tbl, sizeof(*log_tbl));
- return 0;
+ return ret;
}
diff --git a/drivers/fmc/Kconfig b/drivers/fmc/Kconfig
deleted file mode 100644
index ae3d7f634932..000000000000
--- a/drivers/fmc/Kconfig
+++ /dev/null
@@ -1,52 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# FMC (ANSI-VITA 57.1) bus support
-#
-
-menuconfig FMC
- tristate "FMC support"
- help
-
- FMC (FPGA Mezzanine Carrier) is a mechanical and electrical
- standard for mezzanine cards that plug into a carrier board.
- This kernel subsystem supports the matching between carrier
- and mezzanine based on identifiers stored in the internal I2C
- EEPROM, as well as having carrier-independent drivers.
-
- The framework was born outside of the kernel and at this time
- the off-tree code base is more complete. Code and documentation
- is at git://ohwr.org/fmc-projects/fmc-bus.git .
-
-if FMC
-
-config FMC_FAKEDEV
- tristate "FMC fake device (software testing)"
- help
- This is a fake carrier, bringing a default EEPROM content
- that can be rewritten at run time and usef for matching
- mezzanines.
-
-config FMC_TRIVIAL
- tristate "FMC trivial mezzanine driver (software testing)"
- help
- This is a fake mezzanine driver, to show how FMC works and test it.
- The driver also handles interrupts (we used it with a real carrier
- before the mezzanines were produced)
-
-config FMC_WRITE_EEPROM
- tristate "FMC mezzanine driver to write I2C EEPROM"
- help
- This driver matches every mezzanine device and can write the
- internal EEPROM of the PCB, using the firmware loader to get
- its binary and the function carrier->reprogram to actually do it.
- It is useful when the mezzanines are produced.
-
-config FMC_CHARDEV
- tristate "FMC mezzanine driver that registers a char device"
- help
- This driver matches every mezzanine device and allows user
- space to read and write registers using a char device. It
- can be used to write user-space drivers, or just get
- acquainted with a mezzanine before writing its specific driver.
-
-endif # FMC
diff --git a/drivers/fmc/Makefile b/drivers/fmc/Makefile
deleted file mode 100644
index e3da6192cf39..000000000000
--- a/drivers/fmc/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_FMC) += fmc.o
-
-fmc-y = fmc-core.o
-fmc-y += fmc-match.o
-fmc-y += fmc-sdb.o
-fmc-y += fru-parse.o
-fmc-y += fmc-dump.o
-fmc-y += fmc-debug.o
-
-obj-$(CONFIG_FMC_FAKEDEV) += fmc-fakedev.o
-obj-$(CONFIG_FMC_TRIVIAL) += fmc-trivial.o
-obj-$(CONFIG_FMC_WRITE_EEPROM) += fmc-write-eeprom.o
-obj-$(CONFIG_FMC_CHARDEV) += fmc-chardev.o
diff --git a/drivers/fmc/fmc-chardev.c b/drivers/fmc/fmc-chardev.c
deleted file mode 100644
index 7d2091b5e978..000000000000
--- a/drivers/fmc/fmc-chardev.c
+++ /dev/null
@@ -1,199 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/spinlock.h>
-#include <linux/fmc.h>
-#include <linux/uaccess.h>
-
-static LIST_HEAD(fc_devices);
-static DEFINE_SPINLOCK(fc_lock);
-
-struct fc_instance {
- struct list_head list;
- struct fmc_device *fmc;
- struct miscdevice misc;
-};
-
-/* at open time, we must identify our device */
-static int fc_open(struct inode *ino, struct file *f)
-{
- struct fmc_device *fmc;
- struct fc_instance *fc;
- int minor = iminor(ino);
-
- list_for_each_entry(fc, &fc_devices, list)
- if (fc->misc.minor == minor)
- break;
- if (fc->misc.minor != minor)
- return -ENODEV;
- fmc = fc->fmc;
- if (try_module_get(fmc->owner) == 0)
- return -ENODEV;
-
- f->private_data = fmc;
- return 0;
-}
-
-static int fc_release(struct inode *ino, struct file *f)
-{
- struct fmc_device *fmc = f->private_data;
- module_put(fmc->owner);
- return 0;
-}
-
-/* read and write are simple after the default llseek has been used */
-static ssize_t fc_read(struct file *f, char __user *buf, size_t count,
- loff_t *offp)
-{
- struct fmc_device *fmc = f->private_data;
- unsigned long addr;
- uint32_t val;
-
- if (count < sizeof(val))
- return -EINVAL;
- count = sizeof(val);
-
- addr = *offp;
- if (addr > fmc->memlen)
- return -ESPIPE; /* Illegal seek */
- val = fmc_readl(fmc, addr);
- if (copy_to_user(buf, &val, count))
- return -EFAULT;
- *offp += count;
- return count;
-}
-
-static ssize_t fc_write(struct file *f, const char __user *buf, size_t count,
- loff_t *offp)
-{
- struct fmc_device *fmc = f->private_data;
- unsigned long addr;
- uint32_t val;
-
- if (count < sizeof(val))
- return -EINVAL;
- count = sizeof(val);
-
- addr = *offp;
- if (addr > fmc->memlen)
- return -ESPIPE; /* Illegal seek */
- if (copy_from_user(&val, buf, count))
- return -EFAULT;
- fmc_writel(fmc, val, addr);
- *offp += count;
- return count;
-}
-
-static const struct file_operations fc_fops = {
- .owner = THIS_MODULE,
- .open = fc_open,
- .release = fc_release,
- .llseek = generic_file_llseek,
- .read = fc_read,
- .write = fc_write,
-};
-
-
-/* Device part .. */
-static int fc_probe(struct fmc_device *fmc);
-static int fc_remove(struct fmc_device *fmc);
-
-static struct fmc_driver fc_drv = {
- .version = FMC_VERSION,
- .driver.name = KBUILD_MODNAME,
- .probe = fc_probe,
- .remove = fc_remove,
- /* no table: we want to match everything */
-};
-
-/* We accept the generic busid parameter */
-FMC_PARAM_BUSID(fc_drv);
-
-/* probe and remove must allocate and release a misc device */
-static int fc_probe(struct fmc_device *fmc)
-{
- int ret;
- int index = 0;
-
- struct fc_instance *fc;
-
- index = fmc_validate(fmc, &fc_drv);
- if (index < 0)
- return -EINVAL; /* not our device: invalid */
-
- /* Create a char device: we want to create it anew */
- fc = kzalloc(sizeof(*fc), GFP_KERNEL);
- if (!fc)
- return -ENOMEM;
- fc->fmc = fmc;
- fc->misc.minor = MISC_DYNAMIC_MINOR;
- fc->misc.fops = &fc_fops;
- fc->misc.name = kstrdup(dev_name(&fmc->dev), GFP_KERNEL);
-
- ret = misc_register(&fc->misc);
- if (ret < 0)
- goto out;
- spin_lock(&fc_lock);
- list_add(&fc->list, &fc_devices);
- spin_unlock(&fc_lock);
- dev_info(&fc->fmc->dev, "Created misc device \"%s\"\n",
- fc->misc.name);
- return 0;
-
-out:
- kfree(fc->misc.name);
- kfree(fc);
- return ret;
-}
-
-static int fc_remove(struct fmc_device *fmc)
-{
- struct fc_instance *fc;
-
- list_for_each_entry(fc, &fc_devices, list)
- if (fc->fmc == fmc)
- break;
- if (fc->fmc != fmc) {
- dev_err(&fmc->dev, "remove called but not found\n");
- return -ENODEV;
- }
-
- spin_lock(&fc_lock);
- list_del(&fc->list);
- spin_unlock(&fc_lock);
- misc_deregister(&fc->misc);
- kfree(fc->misc.name);
- kfree(fc);
-
- return 0;
-}
-
-
-static int fc_init(void)
-{
- int ret;
-
- ret = fmc_driver_register(&fc_drv);
- return ret;
-}
-
-static void fc_exit(void)
-{
- fmc_driver_unregister(&fc_drv);
-}
-
-module_init(fc_init);
-module_exit(fc_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/fmc/fmc-core.c b/drivers/fmc/fmc-core.c
deleted file mode 100644
index 573f5471f680..000000000000
--- a/drivers/fmc/fmc-core.c
+++ /dev/null
@@ -1,388 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/fmc.h>
-#include <linux/fmc-sdb.h>
-
-#include "fmc-private.h"
-
-static int fmc_check_version(unsigned long version, const char *name)
-{
- if (__FMC_MAJOR(version) != FMC_MAJOR) {
- pr_err("%s: \"%s\" has wrong major (has %li, expected %i)\n",
- __func__, name, __FMC_MAJOR(version), FMC_MAJOR);
- return -EINVAL;
- }
-
- if (__FMC_MINOR(version) != FMC_MINOR)
- pr_info("%s: \"%s\" has wrong minor (has %li, expected %i)\n",
- __func__, name, __FMC_MINOR(version), FMC_MINOR);
- return 0;
-}
-
-static int fmc_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- /* struct fmc_device *fdev = to_fmc_device(dev); */
-
- /* FIXME: The MODALIAS */
- add_uevent_var(env, "MODALIAS=%s", "fmc");
- return 0;
-}
-
-static int fmc_probe(struct device *dev)
-{
- struct fmc_driver *fdrv = to_fmc_driver(dev->driver);
- struct fmc_device *fdev = to_fmc_device(dev);
-
- return fdrv->probe(fdev);
-}
-
-static int fmc_remove(struct device *dev)
-{
- struct fmc_driver *fdrv = to_fmc_driver(dev->driver);
- struct fmc_device *fdev = to_fmc_device(dev);
-
- return fdrv->remove(fdev);
-}
-
-static void fmc_shutdown(struct device *dev)
-{
- /* not implemented but mandatory */
-}
-
-static struct bus_type fmc_bus_type = {
- .name = "fmc",
- .match = fmc_match,
- .uevent = fmc_uevent,
- .probe = fmc_probe,
- .remove = fmc_remove,
- .shutdown = fmc_shutdown,
-};
-
-static void fmc_release(struct device *dev)
-{
- struct fmc_device *fmc = container_of(dev, struct fmc_device, dev);
-
- kfree(fmc);
-}
-
-/*
- * The eeprom is exported in sysfs, through a binary attribute
- */
-
-static ssize_t fmc_read_eeprom(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t count)
-{
- struct device *dev;
- struct fmc_device *fmc;
- int eelen;
-
- dev = container_of(kobj, struct device, kobj);
- fmc = container_of(dev, struct fmc_device, dev);
- eelen = fmc->eeprom_len;
- if (off > eelen)
- return -ESPIPE;
- if (off == eelen)
- return 0; /* EOF */
- if (off + count > eelen)
- count = eelen - off;
- memcpy(buf, fmc->eeprom + off, count);
- return count;
-}
-
-static ssize_t fmc_write_eeprom(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t off, size_t count)
-{
- struct device *dev;
- struct fmc_device *fmc;
-
- dev = container_of(kobj, struct device, kobj);
- fmc = container_of(dev, struct fmc_device, dev);
- return fmc->op->write_ee(fmc, off, buf, count);
-}
-
-static struct bin_attribute fmc_eeprom_attr = {
- .attr = { .name = "eeprom", .mode = S_IRUGO | S_IWUSR, },
- .size = 8192, /* more or less standard */
- .read = fmc_read_eeprom,
- .write = fmc_write_eeprom,
-};
-
-int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h,
- char *name, int flags)
-{
- if (fmc->op->irq_request)
- return fmc->op->irq_request(fmc, h, name, flags);
- return -EPERM;
-}
-EXPORT_SYMBOL(fmc_irq_request);
-
-void fmc_irq_free(struct fmc_device *fmc)
-{
- if (fmc->op->irq_free)
- fmc->op->irq_free(fmc);
-}
-EXPORT_SYMBOL(fmc_irq_free);
-
-void fmc_irq_ack(struct fmc_device *fmc)
-{
- if (likely(fmc->op->irq_ack))
- fmc->op->irq_ack(fmc);
-}
-EXPORT_SYMBOL(fmc_irq_ack);
-
-int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv)
-{
- if (fmc->op->validate)
- return fmc->op->validate(fmc, drv);
- return -EPERM;
-}
-EXPORT_SYMBOL(fmc_validate);
-
-int fmc_gpio_config(struct fmc_device *fmc, struct fmc_gpio *gpio, int ngpio)
-{
- if (fmc->op->gpio_config)
- return fmc->op->gpio_config(fmc, gpio, ngpio);
- return -EPERM;
-}
-EXPORT_SYMBOL(fmc_gpio_config);
-
-int fmc_read_ee(struct fmc_device *fmc, int pos, void *d, int l)
-{
- if (fmc->op->read_ee)
- return fmc->op->read_ee(fmc, pos, d, l);
- return -EPERM;
-}
-EXPORT_SYMBOL(fmc_read_ee);
-
-int fmc_write_ee(struct fmc_device *fmc, int pos, const void *d, int l)
-{
- if (fmc->op->write_ee)
- return fmc->op->write_ee(fmc, pos, d, l);
- return -EPERM;
-}
-EXPORT_SYMBOL(fmc_write_ee);
-
-/*
- * Functions for client modules follow
- */
-
-int fmc_driver_register(struct fmc_driver *drv)
-{
- if (fmc_check_version(drv->version, drv->driver.name))
- return -EINVAL;
- drv->driver.bus = &fmc_bus_type;
- return driver_register(&drv->driver);
-}
-EXPORT_SYMBOL(fmc_driver_register);
-
-void fmc_driver_unregister(struct fmc_driver *drv)
-{
- driver_unregister(&drv->driver);
-}
-EXPORT_SYMBOL(fmc_driver_unregister);
-
-/*
- * When a device set is registered, all eeproms must be read
- * and all FRUs must be parsed
- */
-int fmc_device_register_n_gw(struct fmc_device **devs, int n,
- struct fmc_gateware *gw)
-{
- struct fmc_device *fmc, **devarray;
- uint32_t device_id;
- int i, ret = 0;
-
- if (n < 1)
- return 0;
-
- /* Check the version of the first data structure (function prints) */
- if (fmc_check_version(devs[0]->version, devs[0]->carrier_name))
- return -EINVAL;
-
- devarray = kmemdup(devs, n * sizeof(*devs), GFP_KERNEL);
- if (!devarray)
- return -ENOMEM;
-
- /* Make all other checks before continuing, for all devices */
- for (i = 0; i < n; i++) {
- fmc = devarray[i];
- if (!fmc->hwdev) {
- pr_err("%s: device nr. %i has no hwdev pointer\n",
- __func__, i);
- ret = -EINVAL;
- break;
- }
- if (fmc->flags & FMC_DEVICE_NO_MEZZANINE) {
- dev_info(fmc->hwdev, "absent mezzanine in slot %d\n",
- fmc->slot_id);
- continue;
- }
- if (!fmc->eeprom) {
- dev_err(fmc->hwdev, "no eeprom provided for slot %i\n",
- fmc->slot_id);
- ret = -EINVAL;
- }
- if (!fmc->eeprom_addr) {
- dev_err(fmc->hwdev, "no eeprom_addr for slot %i\n",
- fmc->slot_id);
- ret = -EINVAL;
- }
- if (!fmc->carrier_name || !fmc->carrier_data ||
- !fmc->device_id) {
- dev_err(fmc->hwdev,
- "device nr %i: carrier name, "
- "data or dev_id not set\n", i);
- ret = -EINVAL;
- }
- if (ret)
- break;
-
- }
- if (ret) {
- kfree(devarray);
- return ret;
- }
-
- /* Validation is ok. Now init and register the devices */
- for (i = 0; i < n; i++) {
- fmc = devarray[i];
-
- fmc->nr_slots = n; /* each slot must know how many are there */
- fmc->devarray = devarray;
-
- device_initialize(&fmc->dev);
- fmc->dev.release = fmc_release;
- fmc->dev.parent = fmc->hwdev;
-
- /* Fill the identification stuff (may fail) */
- fmc_fill_id_info(fmc);
-
- fmc->dev.bus = &fmc_bus_type;
-
- /* Name from mezzanine info or carrier info. Or 0,1,2.. */
- device_id = fmc->device_id;
- if (!fmc->mezzanine_name)
- dev_set_name(&fmc->dev, "fmc-%04x", device_id);
- else
- dev_set_name(&fmc->dev, "%s-%04x", fmc->mezzanine_name,
- device_id);
-
- if (gw) {
- /*
- * The carrier already know the bitstream to load
- * for this set of FMC mezzanines.
- */
- ret = fmc->op->reprogram_raw(fmc, NULL,
- gw->bitstream, gw->len);
- if (ret) {
- dev_warn(fmc->hwdev,
- "Invalid gateware for FMC mezzanine\n");
- goto out;
- }
- }
-
- ret = device_add(&fmc->dev);
- if (ret < 0) {
- dev_err(fmc->hwdev, "Slot %i: Failed in registering "
- "\"%s\"\n", fmc->slot_id, fmc->dev.kobj.name);
- goto out;
- }
- ret = sysfs_create_bin_file(&fmc->dev.kobj, &fmc_eeprom_attr);
- if (ret < 0) {
- dev_err(&fmc->dev, "Failed in registering eeprom\n");
- goto out1;
- }
- /* This device went well, give information to the user */
- fmc_dump_eeprom(fmc);
- fmc_debug_init(fmc);
- }
- return 0;
-
-out1:
- device_del(&fmc->dev);
-out:
- kfree(devarray);
- for (i--; i >= 0; i--) {
- fmc_debug_exit(devs[i]);
- sysfs_remove_bin_file(&devs[i]->dev.kobj, &fmc_eeprom_attr);
- device_del(&devs[i]->dev);
- fmc_free_id_info(devs[i]);
- put_device(&devs[i]->dev);
- }
- return ret;
-
-}
-EXPORT_SYMBOL(fmc_device_register_n_gw);
-
-int fmc_device_register_n(struct fmc_device **devs, int n)
-{
- return fmc_device_register_n_gw(devs, n, NULL);
-}
-EXPORT_SYMBOL(fmc_device_register_n);
-
-int fmc_device_register_gw(struct fmc_device *fmc, struct fmc_gateware *gw)
-{
- return fmc_device_register_n_gw(&fmc, 1, gw);
-}
-EXPORT_SYMBOL(fmc_device_register_gw);
-
-int fmc_device_register(struct fmc_device *fmc)
-{
- return fmc_device_register_n(&fmc, 1);
-}
-EXPORT_SYMBOL(fmc_device_register);
-
-void fmc_device_unregister_n(struct fmc_device **devs, int n)
-{
- int i;
-
- if (n < 1)
- return;
-
- /* Free devarray first, not used by the later loop */
- kfree(devs[0]->devarray);
-
- for (i = 0; i < n; i++) {
- fmc_debug_exit(devs[i]);
- sysfs_remove_bin_file(&devs[i]->dev.kobj, &fmc_eeprom_attr);
- device_del(&devs[i]->dev);
- fmc_free_id_info(devs[i]);
- put_device(&devs[i]->dev);
- }
-}
-EXPORT_SYMBOL(fmc_device_unregister_n);
-
-void fmc_device_unregister(struct fmc_device *fmc)
-{
- fmc_device_unregister_n(&fmc, 1);
-}
-EXPORT_SYMBOL(fmc_device_unregister);
-
-/* Init and exit are trivial */
-static int fmc_init(void)
-{
- return bus_register(&fmc_bus_type);
-}
-
-static void fmc_exit(void)
-{
- bus_unregister(&fmc_bus_type);
-}
-
-module_init(fmc_init);
-module_exit(fmc_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/fmc/fmc-debug.c b/drivers/fmc/fmc-debug.c
deleted file mode 100644
index 1734c7cf0e76..000000000000
--- a/drivers/fmc/fmc-debug.c
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2015 CERN (www.cern.ch)
- * Author: Federico Vaga <federico.vaga@cern.ch>
- */
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <asm/byteorder.h>
-
-#include <linux/fmc.h>
-#include <linux/sdb.h>
-#include <linux/fmc-sdb.h>
-
-#define FMC_DBG_SDB_DUMP "dump_sdb"
-
-static char *__strip_trailing_space(char *buf, char *str, int len)
-{
- int i = len - 1;
-
- memcpy(buf, str, len);
- buf[len] = '\0';
- while (i >= 0 && buf[i] == ' ')
- buf[i--] = '\0';
- return buf;
-}
-
-#define __sdb_string(buf, field) ({ \
- BUILD_BUG_ON(sizeof(buf) < sizeof(field)); \
- __strip_trailing_space(buf, (void *)(field), sizeof(field)); \
- })
-
-/**
- * We do not check seq_printf() errors because we want to see things in any case
- */
-static void fmc_sdb_dump_recursive(struct fmc_device *fmc, struct seq_file *s,
- const struct sdb_array *arr)
-{
- unsigned long base = arr->baseaddr;
- int i, j, n = arr->len, level = arr->level;
- char tmp[64];
-
- for (i = 0; i < n; i++) {
- union sdb_record *r;
- struct sdb_product *p;
- struct sdb_component *c;
-
- r = &arr->record[i];
- c = &r->dev.sdb_component;
- p = &c->product;
-
- for (j = 0; j < level; j++)
- seq_printf(s, " ");
- switch (r->empty.record_type) {
- case sdb_type_interconnect:
- seq_printf(s, "%08llx:%08x %.19s\n",
- __be64_to_cpu(p->vendor_id),
- __be32_to_cpu(p->device_id),
- p->name);
- break;
- case sdb_type_device:
- seq_printf(s, "%08llx:%08x %.19s (%08llx-%08llx)\n",
- __be64_to_cpu(p->vendor_id),
- __be32_to_cpu(p->device_id),
- p->name,
- __be64_to_cpu(c->addr_first) + base,
- __be64_to_cpu(c->addr_last) + base);
- break;
- case sdb_type_bridge:
- seq_printf(s, "%08llx:%08x %.19s (bridge: %08llx)\n",
- __be64_to_cpu(p->vendor_id),
- __be32_to_cpu(p->device_id),
- p->name,
- __be64_to_cpu(c->addr_first) + base);
- if (IS_ERR(arr->subtree[i])) {
- seq_printf(s, "SDB: (bridge error %li)\n",
- PTR_ERR(arr->subtree[i]));
- break;
- }
- fmc_sdb_dump_recursive(fmc, s, arr->subtree[i]);
- break;
- case sdb_type_integration:
- seq_printf(s, "integration\n");
- break;
- case sdb_type_repo_url:
- seq_printf(s, "Synthesis repository: %s\n",
- __sdb_string(tmp, r->repo_url.repo_url));
- break;
- case sdb_type_synthesis:
- seq_printf(s, "Bitstream '%s' ",
- __sdb_string(tmp, r->synthesis.syn_name));
- seq_printf(s, "synthesized %08x by %s ",
- __be32_to_cpu(r->synthesis.date),
- __sdb_string(tmp, r->synthesis.user_name));
- seq_printf(s, "(%s version %x), ",
- __sdb_string(tmp, r->synthesis.tool_name),
- __be32_to_cpu(r->synthesis.tool_version));
- seq_printf(s, "commit %pm\n",
- r->synthesis.commit_id);
- break;
- case sdb_type_empty:
- seq_printf(s, "empty\n");
- break;
- default:
- seq_printf(s, "UNKNOWN TYPE 0x%02x\n",
- r->empty.record_type);
- break;
- }
- }
-}
-
-static int fmc_sdb_dump(struct seq_file *s, void *offset)
-{
- struct fmc_device *fmc = s->private;
-
- if (!fmc->sdb) {
- seq_printf(s, "no SDB information\n");
- return 0;
- }
-
- seq_printf(s, "FMC: %s (%s), slot %i, device %s\n", dev_name(fmc->hwdev),
- fmc->carrier_name, fmc->slot_id, dev_name(&fmc->dev));
- /* Dump SDB information */
- fmc_sdb_dump_recursive(fmc, s, fmc->sdb);
-
- return 0;
-}
-
-
-static int fmc_sdb_dump_open(struct inode *inode, struct file *file)
-{
- struct fmc_device *fmc = inode->i_private;
-
- return single_open(file, fmc_sdb_dump, fmc);
-}
-
-
-const struct file_operations fmc_dbgfs_sdb_dump = {
- .owner = THIS_MODULE,
- .open = fmc_sdb_dump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-int fmc_debug_init(struct fmc_device *fmc)
-{
- fmc->dbg_dir = debugfs_create_dir(dev_name(&fmc->dev), NULL);
- if (IS_ERR_OR_NULL(fmc->dbg_dir)) {
- pr_err("FMC: Cannot create debugfs\n");
- return PTR_ERR(fmc->dbg_dir);
- }
-
- fmc->dbg_sdb_dump = debugfs_create_file(FMC_DBG_SDB_DUMP, 0444,
- fmc->dbg_dir, fmc,
- &fmc_dbgfs_sdb_dump);
- if (IS_ERR_OR_NULL(fmc->dbg_sdb_dump))
- pr_err("FMC: Cannot create debugfs file %s\n",
- FMC_DBG_SDB_DUMP);
-
- return 0;
-}
-
-void fmc_debug_exit(struct fmc_device *fmc)
-{
- if (fmc->dbg_dir)
- debugfs_remove_recursive(fmc->dbg_dir);
-}
diff --git a/drivers/fmc/fmc-dump.c b/drivers/fmc/fmc-dump.c
deleted file mode 100644
index 6c81dbde1d16..000000000000
--- a/drivers/fmc/fmc-dump.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2013 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/fmc.h>
-#include <linux/fmc-sdb.h>
-
-static int fmc_must_dump_eeprom;
-module_param_named(dump_eeprom, fmc_must_dump_eeprom, int, 0644);
-
-#define LINELEN 16
-
-/* Dumping 8k takes oh so much: avoid duplicate lines */
-static const uint8_t *dump_line(int addr, const uint8_t *line,
- const uint8_t *prev)
-{
- int i;
-
- if (!prev || memcmp(line, prev, LINELEN)) {
- pr_info("%04x: ", addr);
- for (i = 0; i < LINELEN; ) {
- printk(KERN_CONT "%02x", line[i]);
- i++;
- printk(i & 3 ? " " : i & (LINELEN - 1) ? " " : "\n");
- }
- return line;
- }
- /* repeated line */
- if (line == prev + LINELEN)
- pr_info("[...]\n");
- return prev;
-}
-
-void fmc_dump_eeprom(const struct fmc_device *fmc)
-{
- const uint8_t *line, *prev;
- int i;
-
- if (!fmc_must_dump_eeprom)
- return;
-
- pr_info("FMC: %s (%s), slot %i, device %s\n", dev_name(fmc->hwdev),
- fmc->carrier_name, fmc->slot_id, dev_name(&fmc->dev));
- pr_info("FMC: dumping eeprom 0x%x (%i) bytes\n", fmc->eeprom_len,
- fmc->eeprom_len);
-
- line = fmc->eeprom;
- prev = NULL;
- for (i = 0; i < fmc->eeprom_len; i += LINELEN, line += LINELEN)
- prev = dump_line(i, line, prev);
-}
diff --git a/drivers/fmc/fmc-fakedev.c b/drivers/fmc/fmc-fakedev.c
deleted file mode 100644
index 941d0930969a..000000000000
--- a/drivers/fmc/fmc-fakedev.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * The software is provided "as is"; the copyright holders disclaim
- * all warranties and liabilities, to the extent permitted by
- * applicable law.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/firmware.h>
-#include <linux/workqueue.h>
-#include <linux/err.h>
-#include <linux/fmc.h>
-
-#define FF_EEPROM_SIZE 8192 /* The standard eeprom size */
-#define FF_MAX_MEZZANINES 4 /* Fakes a multi-mezzanine carrier */
-
-/* The user can pass up to 4 names of eeprom images to load */
-static char *ff_eeprom[FF_MAX_MEZZANINES];
-static int ff_nr_eeprom;
-module_param_array_named(eeprom, ff_eeprom, charp, &ff_nr_eeprom, 0444);
-
-/* The user can ask for a multi-mezzanine carrier, with the default eeprom */
-static int ff_nr_dev = 1;
-module_param_named(ndev, ff_nr_dev, int, 0444);
-
-
-/* Lazily, don't support the "standard" module parameters */
-
-/*
- * Eeprom built from these commands:
-
- ../fru-generator -v fake-vendor -n fake-design-for-testing \
- -s 01234 -p none > IPMI-FRU
-
- gensdbfs . ../fake-eeprom.bin
-*/
-static char ff_eeimg[FF_MAX_MEZZANINES][FF_EEPROM_SIZE] = {
- {
- 0x01, 0x00, 0x00, 0x01, 0x00, 0x0c, 0x00, 0xf2, 0x01, 0x0b, 0x00, 0xb2,
- 0x86, 0x87, 0xcb, 0x66, 0x61, 0x6b, 0x65, 0x2d, 0x76, 0x65, 0x6e, 0x64,
- 0x6f, 0x72, 0xd7, 0x66, 0x61, 0x6b, 0x65, 0x2d, 0x64, 0x65, 0x73, 0x69,
- 0x67, 0x6e, 0x2d, 0x66, 0x6f, 0x72, 0x2d, 0x74, 0x65, 0x73, 0x74, 0x69,
- 0x6e, 0x67, 0xc5, 0x30, 0x31, 0x32, 0x33, 0x34, 0xc4, 0x6e, 0x6f, 0x6e,
- 0x65, 0xda, 0x32, 0x30, 0x31, 0x32, 0x2d, 0x31, 0x31, 0x2d, 0x31, 0x39,
- 0x20, 0x32, 0x32, 0x3a, 0x34, 0x32, 0x3a, 0x33, 0x30, 0x2e, 0x30, 0x37,
- 0x34, 0x30, 0x35, 0x35, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87,
- 0x02, 0x02, 0x0d, 0xf7, 0xf8, 0x02, 0xb0, 0x04, 0x74, 0x04, 0xec, 0x04,
- 0x00, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x02, 0x02, 0x0d, 0x5c, 0x93, 0x01,
- 0x4a, 0x01, 0x39, 0x01, 0x5a, 0x01, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x0b,
- 0x02, 0x02, 0x0d, 0x63, 0x8c, 0x00, 0xfa, 0x00, 0xed, 0x00, 0x06, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0xa0, 0x0f, 0x01, 0x02, 0x0d, 0xfb, 0xf5, 0x05,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x02, 0x0d, 0xfc, 0xf4, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x0d, 0xfd, 0xf3, 0x03,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xfa, 0x82, 0x0b, 0xea, 0x8f, 0xa2, 0x12, 0x00, 0x00, 0x1e, 0x44, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x53, 0x44, 0x42, 0x2d, 0x00, 0x03, 0x01, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x01, 0xc4, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x61, 0x74, 0x61,
- 0x2e, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x2e, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
- 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc0,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xc4, 0x46, 0x69, 0x6c, 0x65,
- 0x44, 0x61, 0x74, 0x61, 0x6e, 0x61, 0x6d, 0x65, 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x6e, 0x61, 0x6d, 0x65, 0x20, 0x20, 0x20, 0x20,
- 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x01,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdf,
- 0x46, 0x69, 0x6c, 0x65, 0x44, 0x61, 0x74, 0x61, 0x49, 0x50, 0x4d, 0x49,
- 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x49, 0x50, 0x4d, 0x49,
- 0x2d, 0x46, 0x52, 0x55, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
- 0x20, 0x20, 0x20, 0x01, 0x66, 0x61, 0x6b, 0x65, 0x0a,
- },
-};
-
-struct ff_dev {
- struct fmc_device *fmc[FF_MAX_MEZZANINES];
- struct device dev;
-};
-
-static struct ff_dev *ff_current_dev; /* We have 1 carrier, 1 slot */
-
-static int ff_reprogram(struct fmc_device *fmc, struct fmc_driver *drv,
- char *gw)
-{
- const struct firmware *fw;
- int ret;
-
- if (!gw) {
- /* program golden: success */
- fmc->flags &= ~FMC_DEVICE_HAS_CUSTOM;
- fmc->flags |= FMC_DEVICE_HAS_GOLDEN;
- return 0;
- }
-
- dev_info(&fmc->dev, "reprogramming with %s\n", gw);
- ret = request_firmware(&fw, gw, &fmc->dev);
- if (ret < 0) {
- dev_warn(&fmc->dev, "request firmware \"%s\": error %i\n",
- gw, ret);
- goto out;
- }
- fmc->flags &= ~FMC_DEVICE_HAS_GOLDEN;
- fmc->flags |= FMC_DEVICE_HAS_CUSTOM;
-
-out:
- release_firmware(fw);
- return ret;
-}
-
-static int ff_irq_request(struct fmc_device *fmc, irq_handler_t handler,
- char *name, int flags)
-{
- return -EOPNOTSUPP;
-}
-
-/* FIXME: should also have some fake FMC GPIO mapping */
-
-
-/*
- * This work function is called when we changed the eeprom. It removes the
- * current fmc device and registers a new one, with different identifiers.
- */
-static struct ff_dev *ff_dev_create(void); /* defined later */
-
-static void ff_work_fn(struct work_struct *work)
-{
- struct ff_dev *ff = ff_current_dev;
- int ret;
-
- fmc_device_unregister_n(ff->fmc, ff_nr_dev);
- device_unregister(&ff->dev);
- ff_current_dev = NULL;
-
- ff = ff_dev_create();
- if (IS_ERR(ff)) {
- pr_warning("%s: can't re-create FMC devices\n", __func__);
- return;
- }
- ret = fmc_device_register_n(ff->fmc, ff_nr_dev);
- if (ret < 0) {
- dev_warn(&ff->dev, "can't re-register FMC devices\n");
- device_unregister(&ff->dev);
- return;
- }
-
- ff_current_dev = ff;
-}
-
-static DECLARE_DELAYED_WORK(ff_work, ff_work_fn);
-
-
-/* low-level i2c */
-static int ff_eeprom_read(struct fmc_device *fmc, uint32_t offset,
- void *buf, size_t size)
-{
- if (offset > FF_EEPROM_SIZE)
- return -EINVAL;
- if (offset + size > FF_EEPROM_SIZE)
- size = FF_EEPROM_SIZE - offset;
- memcpy(buf, fmc->eeprom + offset, size);
- return size;
-}
-
-static int ff_eeprom_write(struct fmc_device *fmc, uint32_t offset,
- const void *buf, size_t size)
-{
- if (offset > FF_EEPROM_SIZE)
- return -EINVAL;
- if (offset + size > FF_EEPROM_SIZE)
- size = FF_EEPROM_SIZE - offset;
- dev_info(&fmc->dev, "write_eeprom: offset %i, size %zi\n",
- (int)offset, size);
- memcpy(fmc->eeprom + offset, buf, size);
- schedule_delayed_work(&ff_work, HZ * 2); /* remove, replug, in 2s */
- return size;
-}
-
-/* i2c operations for fmc */
-static int ff_read_ee(struct fmc_device *fmc, int pos, void *data, int len)
-{
- if (!(fmc->flags & FMC_DEVICE_HAS_GOLDEN))
- return -EOPNOTSUPP;
- return ff_eeprom_read(fmc, pos, data, len);
-}
-
-static int ff_write_ee(struct fmc_device *fmc, int pos,
- const void *data, int len)
-{
- if (!(fmc->flags & FMC_DEVICE_HAS_GOLDEN))
- return -EOPNOTSUPP;
- return ff_eeprom_write(fmc, pos, data, len);
-}
-
-/* readl and writel do not do anything. Don't waste RAM with "base" */
-static uint32_t ff_readl(struct fmc_device *fmc, int offset)
-{
- return 0;
-}
-
-static void ff_writel(struct fmc_device *fmc, uint32_t value, int offset)
-{
- return;
-}
-
-/* validate is useful so fmc-write-eeprom will not reprogram every 2 seconds */
-static int ff_validate(struct fmc_device *fmc, struct fmc_driver *drv)
-{
- int i;
-
- if (!drv->busid_n)
- return 0; /* everyhing is valid */
- for (i = 0; i < drv->busid_n; i++)
- if (drv->busid_val[i] == fmc->device_id)
- return i;
- return -ENOENT;
-}
-
-
-
-static struct fmc_operations ff_fmc_operations = {
- .read32 = ff_readl,
- .write32 = ff_writel,
- .reprogram = ff_reprogram,
- .irq_request = ff_irq_request,
- .read_ee = ff_read_ee,
- .write_ee = ff_write_ee,
- .validate = ff_validate,
-};
-
-/* This device is kmalloced: release it */
-static void ff_dev_release(struct device *dev)
-{
- struct ff_dev *ff = container_of(dev, struct ff_dev, dev);
- kfree(ff);
-}
-
-static struct fmc_device ff_template_fmc = {
- .version = FMC_VERSION,
- .owner = THIS_MODULE,
- .carrier_name = "fake-fmc-carrier",
- .device_id = 0xf001, /* fool */
- .eeprom_len = sizeof(ff_eeimg[0]),
- .memlen = 0x1000, /* 4k, to show something */
- .op = &ff_fmc_operations,
- .hwdev = NULL, /* filled at creation time */
- .flags = FMC_DEVICE_HAS_GOLDEN,
-};
-
-static struct ff_dev *ff_dev_create(void)
-{
- struct ff_dev *ff;
- struct fmc_device *fmc;
- int i, ret;
-
- ff = kzalloc(sizeof(*ff), GFP_KERNEL);
- if (!ff)
- return ERR_PTR(-ENOMEM);
- dev_set_name(&ff->dev, "fake-fmc-carrier");
- ff->dev.release = ff_dev_release;
-
- ret = device_register(&ff->dev);
- if (ret < 0) {
- put_device(&ff->dev);
- return ERR_PTR(ret);
- }
-
- /* Create fmc structures that refer to this new "hw" device */
- for (i = 0; i < ff_nr_dev; i++) {
- fmc = kmemdup(&ff_template_fmc, sizeof(ff_template_fmc),
- GFP_KERNEL);
- fmc->hwdev = &ff->dev;
- fmc->carrier_data = ff;
- fmc->nr_slots = ff_nr_dev;
- /* the following fields are different for each slot */
- fmc->eeprom = ff_eeimg[i];
- fmc->eeprom_addr = 0x50 + 2 * i;
- fmc->slot_id = i;
- ff->fmc[i] = fmc;
- /* increment the identifier, each must be different */
- ff_template_fmc.device_id++;
- }
- return ff;
-}
-
-/* init and exit */
-static int ff_init(void)
-{
- struct ff_dev *ff;
- const struct firmware *fw;
- int i, len, ret = 0;
-
- /* Replicate the default eeprom for the max number of mezzanines */
- for (i = 1; i < FF_MAX_MEZZANINES; i++)
- memcpy(ff_eeimg[i], ff_eeimg[0], sizeof(ff_eeimg[0]));
-
- if (ff_nr_eeprom > ff_nr_dev)
- ff_nr_dev = ff_nr_eeprom;
-
- ff = ff_dev_create();
- if (IS_ERR(ff))
- return PTR_ERR(ff);
-
- /* If the user passed "eeprom=" as a parameter, fetch them */
- for (i = 0; i < ff_nr_eeprom; i++) {
- if (!strlen(ff_eeprom[i]))
- continue;
- ret = request_firmware(&fw, ff_eeprom[i], &ff->dev);
- if (ret < 0) {
- dev_err(&ff->dev, "Mezzanine %i: can't load \"%s\" "
- "(error %i)\n", i, ff_eeprom[i], -ret);
- } else {
- len = min_t(size_t, fw->size, (size_t)FF_EEPROM_SIZE);
- memcpy(ff_eeimg[i], fw->data, len);
- release_firmware(fw);
- dev_info(&ff->dev, "Mezzanine %i: eeprom \"%s\"\n", i,
- ff_eeprom[i]);
- }
- }
-
- ret = fmc_device_register_n(ff->fmc, ff_nr_dev);
- if (ret) {
- device_unregister(&ff->dev);
- return ret;
- }
- ff_current_dev = ff;
- return ret;
-}
-
-static void ff_exit(void)
-{
- if (ff_current_dev) {
- fmc_device_unregister_n(ff_current_dev->fmc, ff_nr_dev);
- device_unregister(&ff_current_dev->dev);
- }
- cancel_delayed_work_sync(&ff_work);
-}
-
-module_init(ff_init);
-module_exit(ff_exit);
-
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/fmc/fmc-match.c b/drivers/fmc/fmc-match.c
deleted file mode 100644
index 995bd6041a67..000000000000
--- a/drivers/fmc/fmc-match.c
+++ /dev/null
@@ -1,113 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/fmc.h>
-#include <linux/ipmi-fru.h>
-
-/* The fru parser is both user and kernel capable: it needs alloc */
-void *fru_alloc(size_t size)
-{
- return kzalloc(size, GFP_KERNEL);
-}
-
-/* The actual match function */
-int fmc_match(struct device *dev, struct device_driver *drv)
-{
- struct fmc_driver *fdrv = to_fmc_driver(drv);
- struct fmc_device *fdev = to_fmc_device(dev);
- struct fmc_fru_id *fid;
- int i, matched = 0;
-
- /* This currently only matches the EEPROM (FRU id) */
- fid = fdrv->id_table.fru_id;
- if (!fid) {
- dev_warn(&fdev->dev, "Driver has no ID: matches all\n");
- matched = 1;
- } else {
- if (!fdev->id.manufacturer || !fdev->id.product_name)
- return 0; /* the device has no FRU information */
- for (i = 0; i < fdrv->id_table.fru_id_nr; i++, fid++) {
- if (fid->manufacturer &&
- strcmp(fid->manufacturer, fdev->id.manufacturer))
- continue;
- if (fid->product_name &&
- strcmp(fid->product_name, fdev->id.product_name))
- continue;
- matched = 1;
- break;
- }
- }
-
- /* FIXME: match SDB contents */
- return matched;
-}
-
-/* This function creates ID info for a newly registered device */
-int fmc_fill_id_info(struct fmc_device *fmc)
-{
- struct fru_common_header *h;
- struct fru_board_info_area *bia;
- int ret, allocated = 0;
-
- /* If we know the eeprom length, try to read it off the device */
- if (fmc->eeprom_len && !fmc->eeprom) {
- fmc->eeprom = kzalloc(fmc->eeprom_len, GFP_KERNEL);
- if (!fmc->eeprom)
- return -ENOMEM;
- allocated = 1;
- ret = fmc_read_ee(fmc, 0, fmc->eeprom, fmc->eeprom_len);
- if (ret < 0)
- goto out;
- }
-
- /* If no eeprom, continue with other matches */
- if (!fmc->eeprom)
- return 0;
-
- dev_info(fmc->hwdev, "mezzanine %i\n", fmc->slot_id); /* header */
-
- /* So we have the eeprom: parse the FRU part (if any) */
- h = (void *)fmc->eeprom;
- if (h->format != 1) {
- pr_info(" EEPROM has no FRU information\n");
- goto out;
- }
- if (!fru_header_cksum_ok(h)) {
- pr_info(" FRU: wrong header checksum\n");
- goto out;
- }
- bia = fru_get_board_area(h);
- if (!fru_bia_cksum_ok(bia)) {
- pr_info(" FRU: wrong board area checksum\n");
- goto out;
- }
- fmc->id.manufacturer = fru_get_board_manufacturer(h);
- fmc->id.product_name = fru_get_product_name(h);
- pr_info(" Manufacturer: %s\n", fmc->id.manufacturer);
- pr_info(" Product name: %s\n", fmc->id.product_name);
-
- /* Create the short name (FIXME: look in sdb as well) */
- fmc->mezzanine_name = kstrdup(fmc->id.product_name, GFP_KERNEL);
-
-out:
- if (allocated) {
- kfree(fmc->eeprom);
- fmc->eeprom = NULL;
- }
- return 0; /* no error: let other identification work */
-}
-
-/* Some ID data is allocated using fru_alloc() above, so release it */
-void fmc_free_id_info(struct fmc_device *fmc)
-{
- kfree(fmc->mezzanine_name);
- kfree(fmc->id.manufacturer);
- kfree(fmc->id.product_name);
-}
diff --git a/drivers/fmc/fmc-private.h b/drivers/fmc/fmc-private.h
deleted file mode 100644
index 93cb8030f764..000000000000
--- a/drivers/fmc/fmc-private.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 CERN (www.cern.ch)
- * Author: Federico Vaga <federico.vaga@cern.ch>
- */
-
-extern int fmc_debug_init(struct fmc_device *fmc);
-extern void fmc_debug_exit(struct fmc_device *fmc);
diff --git a/drivers/fmc/fmc-sdb.c b/drivers/fmc/fmc-sdb.c
deleted file mode 100644
index 14758db1a5fb..000000000000
--- a/drivers/fmc/fmc-sdb.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/fmc.h>
-#include <linux/sdb.h>
-#include <linux/err.h>
-#include <linux/fmc-sdb.h>
-#include <asm/byteorder.h>
-
-static uint32_t __sdb_rd(struct fmc_device *fmc, unsigned long address,
- int convert)
-{
- uint32_t res = fmc_readl(fmc, address);
- if (convert)
- return __be32_to_cpu(res);
- return res;
-}
-
-static struct sdb_array *__fmc_scan_sdb_tree(struct fmc_device *fmc,
- unsigned long sdb_addr,
- unsigned long reg_base, int level)
-{
- uint32_t onew;
- int i, j, n, convert = 0;
- struct sdb_array *arr, *sub;
-
- onew = fmc_readl(fmc, sdb_addr);
- if (onew == SDB_MAGIC) {
- /* Uh! If we are little-endian, we must convert */
- if (SDB_MAGIC != __be32_to_cpu(SDB_MAGIC))
- convert = 1;
- } else if (onew == __be32_to_cpu(SDB_MAGIC)) {
- /* ok, don't convert */
- } else {
- return ERR_PTR(-ENOENT);
- }
- /* So, the magic was there: get the count from offset 4*/
- onew = __sdb_rd(fmc, sdb_addr + 4, convert);
- n = __be16_to_cpu(*(uint16_t *)&onew);
- arr = kzalloc(sizeof(*arr), GFP_KERNEL);
- if (!arr)
- return ERR_PTR(-ENOMEM);
- arr->record = kcalloc(n, sizeof(arr->record[0]), GFP_KERNEL);
- arr->subtree = kcalloc(n, sizeof(arr->subtree[0]), GFP_KERNEL);
- if (!arr->record || !arr->subtree) {
- kfree(arr->record);
- kfree(arr->subtree);
- kfree(arr);
- return ERR_PTR(-ENOMEM);
- }
-
- arr->len = n;
- arr->level = level;
- arr->fmc = fmc;
- for (i = 0; i < n; i++) {
- union sdb_record *r;
-
- for (j = 0; j < sizeof(arr->record[0]); j += 4) {
- *(uint32_t *)((void *)(arr->record + i) + j) =
- __sdb_rd(fmc, sdb_addr + (i * 64) + j, convert);
- }
- r = &arr->record[i];
- arr->subtree[i] = ERR_PTR(-ENODEV);
- if (r->empty.record_type == sdb_type_bridge) {
- struct sdb_component *c = &r->bridge.sdb_component;
- uint64_t subaddr = __be64_to_cpu(r->bridge.sdb_child);
- uint64_t newbase = __be64_to_cpu(c->addr_first);
-
- subaddr += reg_base;
- newbase += reg_base;
- sub = __fmc_scan_sdb_tree(fmc, subaddr, newbase,
- level + 1);
- arr->subtree[i] = sub; /* may be error */
- if (IS_ERR(sub))
- continue;
- sub->parent = arr;
- sub->baseaddr = newbase;
- }
- }
- return arr;
-}
-
-int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address)
-{
- struct sdb_array *ret;
- if (fmc->sdb)
- return -EBUSY;
- ret = __fmc_scan_sdb_tree(fmc, address, 0 /* regs */, 0);
- if (IS_ERR(ret))
- return PTR_ERR(ret);
- fmc->sdb = ret;
- return 0;
-}
-EXPORT_SYMBOL(fmc_scan_sdb_tree);
-
-static void __fmc_sdb_free(struct sdb_array *arr)
-{
- int i, n;
-
- if (!arr)
- return;
- n = arr->len;
- for (i = 0; i < n; i++) {
- if (IS_ERR(arr->subtree[i]))
- continue;
- __fmc_sdb_free(arr->subtree[i]);
- }
- kfree(arr->record);
- kfree(arr->subtree);
- kfree(arr);
-}
-
-int fmc_free_sdb_tree(struct fmc_device *fmc)
-{
- __fmc_sdb_free(fmc->sdb);
- fmc->sdb = NULL;
- return 0;
-}
-EXPORT_SYMBOL(fmc_free_sdb_tree);
-
-/* This helper calls reprogram and inizialized sdb as well */
-int fmc_reprogram_raw(struct fmc_device *fmc, struct fmc_driver *d,
- void *gw, unsigned long len, int sdb_entry)
-{
- int ret;
-
- ret = fmc->op->reprogram_raw(fmc, d, gw, len);
- if (ret < 0)
- return ret;
- if (sdb_entry < 0)
- return ret;
-
- /* We are required to find SDB at a given offset */
- ret = fmc_scan_sdb_tree(fmc, sdb_entry);
- if (ret < 0) {
- dev_err(&fmc->dev, "Can't find SDB at address 0x%x\n",
- sdb_entry);
- return -ENODEV;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(fmc_reprogram_raw);
-
-/* This helper calls reprogram and inizialized sdb as well */
-int fmc_reprogram(struct fmc_device *fmc, struct fmc_driver *d, char *gw,
- int sdb_entry)
-{
- int ret;
-
- ret = fmc->op->reprogram(fmc, d, gw);
- if (ret < 0)
- return ret;
- if (sdb_entry < 0)
- return ret;
-
- /* We are required to find SDB at a given offset */
- ret = fmc_scan_sdb_tree(fmc, sdb_entry);
- if (ret < 0) {
- dev_err(&fmc->dev, "Can't find SDB at address 0x%x\n",
- sdb_entry);
- return -ENODEV;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(fmc_reprogram);
-
-void fmc_show_sdb_tree(const struct fmc_device *fmc)
-{
- pr_err("%s: not supported anymore, use debugfs to dump SDB\n",
- __func__);
-}
-EXPORT_SYMBOL(fmc_show_sdb_tree);
-
-signed long fmc_find_sdb_device(struct sdb_array *tree,
- uint64_t vid, uint32_t did, unsigned long *sz)
-{
- signed long res = -ENODEV;
- union sdb_record *r;
- struct sdb_product *p;
- struct sdb_component *c;
- int i, n = tree->len;
- uint64_t last, first;
-
- /* FIXME: what if the first interconnect is not at zero? */
- for (i = 0; i < n; i++) {
- r = &tree->record[i];
- c = &r->dev.sdb_component;
- p = &c->product;
-
- if (!IS_ERR(tree->subtree[i]))
- res = fmc_find_sdb_device(tree->subtree[i],
- vid, did, sz);
- if (res >= 0)
- return res + tree->baseaddr;
- if (r->empty.record_type != sdb_type_device)
- continue;
- if (__be64_to_cpu(p->vendor_id) != vid)
- continue;
- if (__be32_to_cpu(p->device_id) != did)
- continue;
- /* found */
- last = __be64_to_cpu(c->addr_last);
- first = __be64_to_cpu(c->addr_first);
- if (sz)
- *sz = (typeof(*sz))(last + 1 - first);
- return first + tree->baseaddr;
- }
- return res;
-}
-EXPORT_SYMBOL(fmc_find_sdb_device);
diff --git a/drivers/fmc/fmc-trivial.c b/drivers/fmc/fmc-trivial.c
deleted file mode 100644
index 8defdee3e3a3..000000000000
--- a/drivers/fmc/fmc-trivial.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * The software is provided "as is"; the copyright holders disclaim
- * all warranties and liabilities, to the extent permitted by
- * applicable law.
- */
-
-/* A trivial fmc driver that can load a gateware file and reports interrupts */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <linux/fmc.h>
-
-static struct fmc_driver t_drv; /* initialized later */
-
-static irqreturn_t t_handler(int irq, void *dev_id)
-{
- struct fmc_device *fmc = dev_id;
-
- fmc_irq_ack(fmc);
- dev_info(&fmc->dev, "received irq %i\n", irq);
- return IRQ_HANDLED;
-}
-
-static struct fmc_gpio t_gpio[] = {
- {
- .gpio = FMC_GPIO_IRQ(0),
- .mode = GPIOF_DIR_IN,
- .irqmode = IRQF_TRIGGER_RISING,
- }, {
- .gpio = FMC_GPIO_IRQ(1),
- .mode = GPIOF_DIR_IN,
- .irqmode = IRQF_TRIGGER_RISING,
- }
-};
-
-static int t_probe(struct fmc_device *fmc)
-{
- int ret;
- int index = 0;
-
- index = fmc_validate(fmc, &t_drv);
- if (index < 0)
- return -EINVAL; /* not our device: invalid */
-
- ret = fmc_irq_request(fmc, t_handler, "fmc-trivial", IRQF_SHARED);
- if (ret < 0)
- return ret;
- /* ignore error code of call below, we really don't care */
- fmc_gpio_config(fmc, t_gpio, ARRAY_SIZE(t_gpio));
-
- ret = fmc_reprogram(fmc, &t_drv, "", 0);
- if (ret == -EPERM) /* programming not supported */
- ret = 0;
- if (ret < 0)
- fmc_irq_free(fmc);
-
- /* FIXME: reprogram LM32 too */
- return ret;
-}
-
-static int t_remove(struct fmc_device *fmc)
-{
- fmc_irq_free(fmc);
- return 0;
-}
-
-static struct fmc_driver t_drv = {
- .version = FMC_VERSION,
- .driver.name = KBUILD_MODNAME,
- .probe = t_probe,
- .remove = t_remove,
- /* no table, as the current match just matches everything */
-};
-
- /* We accept the generic parameters */
-FMC_PARAM_BUSID(t_drv);
-FMC_PARAM_GATEWARE(t_drv);
-
-static int t_init(void)
-{
- int ret;
-
- ret = fmc_driver_register(&t_drv);
- return ret;
-}
-
-static void t_exit(void)
-{
- fmc_driver_unregister(&t_drv);
-}
-
-module_init(t_init);
-module_exit(t_exit);
-
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/fmc/fmc-write-eeprom.c b/drivers/fmc/fmc-write-eeprom.c
deleted file mode 100644
index 1c7826e3f526..000000000000
--- a/drivers/fmc/fmc-write-eeprom.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/firmware.h>
-#include <linux/init.h>
-#include <linux/fmc.h>
-#include <asm/unaligned.h>
-
-/*
- * This module uses the firmware loader to program the whole or part
- * of the FMC eeprom. The meat is in the _run functions. However, no
- * default file name is provided, to avoid accidental mishaps. Also,
- * you must pass the busid argument
- */
-static struct fmc_driver fwe_drv;
-
-FMC_PARAM_BUSID(fwe_drv);
-
-/* The "file=" is like the generic "gateware=" used elsewhere */
-static char *fwe_file[FMC_MAX_CARDS];
-static int fwe_file_n;
-module_param_array_named(file, fwe_file, charp, &fwe_file_n, 0444);
-
-static int fwe_run_tlv(struct fmc_device *fmc, const struct firmware *fw,
- int write)
-{
- const uint8_t *p = fw->data;
- int len = fw->size;
- uint16_t thislen, thisaddr;
- int err;
-
- /* format is: 'w' addr16 len16 data... */
- while (len > 5) {
- thisaddr = get_unaligned_le16(p+1);
- thislen = get_unaligned_le16(p+3);
- if (p[0] != 'w' || thislen + 5 > len) {
- dev_err(&fmc->dev, "invalid tlv at offset %ti\n",
- p - fw->data);
- return -EINVAL;
- }
- err = 0;
- if (write) {
- dev_info(&fmc->dev, "write %i bytes at 0x%04x\n",
- thislen, thisaddr);
- err = fmc_write_ee(fmc, thisaddr, p + 5, thislen);
- }
- if (err < 0) {
- dev_err(&fmc->dev, "write failure @0x%04x\n",
- thisaddr);
- return err;
- }
- p += 5 + thislen;
- len -= 5 + thislen;
- }
- if (write)
- dev_info(&fmc->dev, "write_eeprom: success\n");
- return 0;
-}
-
-static int fwe_run_bin(struct fmc_device *fmc, const struct firmware *fw)
-{
- int ret;
-
- dev_info(&fmc->dev, "programming %zi bytes\n", fw->size);
- ret = fmc_write_ee(fmc, 0, (void *)fw->data, fw->size);
- if (ret < 0) {
- dev_info(&fmc->dev, "write_eeprom: error %i\n", ret);
- return ret;
- }
- dev_info(&fmc->dev, "write_eeprom: success\n");
- return 0;
-}
-
-static int fwe_run(struct fmc_device *fmc, const struct firmware *fw, char *s)
-{
- char *last4 = s + strlen(s) - 4;
- int err;
-
- if (!strcmp(last4, ".bin"))
- return fwe_run_bin(fmc, fw);
- if (!strcmp(last4, ".tlv")) {
- err = fwe_run_tlv(fmc, fw, 0);
- if (!err)
- err = fwe_run_tlv(fmc, fw, 1);
- return err;
- }
- dev_err(&fmc->dev, "invalid file name \"%s\"\n", s);
- return -EINVAL;
-}
-
-/*
- * Programming is done at probe time. Morever, only those listed with
- * busid= are programmed.
- * card is probed for, only one is programmed. Unfortunately, it's
- * difficult to know in advance when probing the first card if others
- * are there.
- */
-static int fwe_probe(struct fmc_device *fmc)
-{
- int err, index = 0;
- const struct firmware *fw;
- struct device *dev = &fmc->dev;
- char *s;
-
- if (!fwe_drv.busid_n) {
- dev_err(dev, "%s: no busid passed, refusing all cards\n",
- KBUILD_MODNAME);
- return -ENODEV;
- }
-
- index = fmc_validate(fmc, &fwe_drv);
- if (index < 0) {
- pr_err("%s: refusing device \"%s\"\n", KBUILD_MODNAME,
- dev_name(dev));
- return -ENODEV;
- }
- if (index >= fwe_file_n) {
- pr_err("%s: no filename for device index %i\n",
- KBUILD_MODNAME, index);
- return -ENODEV;
- }
- s = fwe_file[index];
- if (!s) {
- pr_err("%s: no filename for \"%s\" not programming\n",
- KBUILD_MODNAME, dev_name(dev));
- return -ENOENT;
- }
- err = request_firmware(&fw, s, dev);
- if (err < 0) {
- dev_err(&fmc->dev, "request firmware \"%s\": error %i\n",
- s, err);
- return err;
- }
- fwe_run(fmc, fw, s);
- release_firmware(fw);
- return 0;
-}
-
-static int fwe_remove(struct fmc_device *fmc)
-{
- return 0;
-}
-
-static struct fmc_driver fwe_drv = {
- .version = FMC_VERSION,
- .driver.name = KBUILD_MODNAME,
- .probe = fwe_probe,
- .remove = fwe_remove,
- /* no table, as the current match just matches everything */
-};
-
-static int fwe_init(void)
-{
- int ret;
-
- ret = fmc_driver_register(&fwe_drv);
- return ret;
-}
-
-static void fwe_exit(void)
-{
- fmc_driver_unregister(&fwe_drv);
-}
-
-module_init(fwe_init);
-module_exit(fwe_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/fmc/fru-parse.c b/drivers/fmc/fru-parse.c
deleted file mode 100644
index f551b81f4fd9..000000000000
--- a/drivers/fmc/fru-parse.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#include <linux/ipmi-fru.h>
-
-/* Some internal helpers */
-static struct fru_type_length *
-__fru_get_board_tl(struct fru_common_header *header, int nr)
-{
- struct fru_board_info_area *bia;
- struct fru_type_length *tl;
-
- bia = fru_get_board_area(header);
- tl = bia->tl;
- while (nr > 0 && !fru_is_eof(tl)) {
- tl = fru_next_tl(tl);
- nr--;
- }
- if (fru_is_eof(tl))
- return NULL;
- return tl;
-}
-
-static char *__fru_alloc_get_tl(struct fru_common_header *header, int nr)
-{
- struct fru_type_length *tl;
- char *res;
-
- tl = __fru_get_board_tl(header, nr);
- if (!tl)
- return NULL;
-
- res = fru_alloc(fru_strlen(tl) + 1);
- if (!res)
- return NULL;
- return fru_strcpy(res, tl);
-}
-
-/* Public checksum verifiers */
-int fru_header_cksum_ok(struct fru_common_header *header)
-{
- uint8_t *ptr = (void *)header;
- int i, sum;
-
- for (i = sum = 0; i < sizeof(*header); i++)
- sum += ptr[i];
- return (sum & 0xff) == 0;
-}
-int fru_bia_cksum_ok(struct fru_board_info_area *bia)
-{
- uint8_t *ptr = (void *)bia;
- int i, sum;
-
- for (i = sum = 0; i < 8 * bia->area_len; i++)
- sum += ptr[i];
- return (sum & 0xff) == 0;
-}
-
-/* Get various stuff, trivial */
-char *fru_get_board_manufacturer(struct fru_common_header *header)
-{
- return __fru_alloc_get_tl(header, 0);
-}
-char *fru_get_product_name(struct fru_common_header *header)
-{
- return __fru_alloc_get_tl(header, 1);
-}
-char *fru_get_serial_number(struct fru_common_header *header)
-{
- return __fru_alloc_get_tl(header, 2);
-}
-char *fru_get_part_number(struct fru_common_header *header)
-{
- return __fru_alloc_get_tl(header, 3);
-}
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index acd40eb51c46..e4fee216d5a4 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -62,16 +62,12 @@ config GPIO_SYSFS
bool "/sys/class/gpio/... (sysfs interface)"
depends on SYSFS
help
- Say Y here to add a sysfs interface for GPIOs.
+ Say Y here to add the legacy sysfs interface for GPIOs.
- This is mostly useful to work around omissions in a system's
- kernel support. Those are common in custom and semicustom
- hardware assembled using standard kernels with a minimum of
- custom patches. In those cases, userspace code may import
- a given GPIO from the kernel, if no kernel driver requested it.
-
- Kernel drivers may also request that a particular GPIO be
- exported to userspace; this can be useful when debugging.
+ This ABI is deprecated. If you want to use GPIO from userspace,
+ use the character device /dev/gpiochipN with the appropriate
+ ioctl() operations instead. The character device is always
+ available.
config GPIO_GENERIC
depends on HAS_IOMEM # Only for IOMEM drivers
@@ -178,7 +174,7 @@ config GPIO_CLPS711X
config GPIO_DAVINCI
bool "TI Davinci/Keystone GPIO support"
default y if ARCH_DAVINCI
- depends on ARM && (ARCH_DAVINCI || ARCH_KEYSTONE)
+ depends on (ARM || ARM64) && (ARCH_DAVINCI || ARCH_KEYSTONE || ARCH_K3)
help
Say yes here to enable GPIO support for TI Davinci/Keystone SoCs.
@@ -493,7 +489,8 @@ config GPIO_STA2X11
config GPIO_STP_XWAY
bool "XWAY STP GPIOs"
- depends on SOC_XWAY
+ depends on SOC_XWAY || COMPILE_TEST
+ depends on OF_GPIO
help
This enables support for the Serial To Parallel (STP) unit found on
XWAY SoC. The STP allows the SoC to drive a shift registers cascade,
@@ -602,7 +599,6 @@ config GPIO_XGENE_SB
config GPIO_XILINX
tristate "Xilinx GPIO support"
- depends on OF_GPIO
help
Say yes here to support the Xilinx FPGA GPIO device
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 6700eee860b7..9e400e34e300 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -17,154 +17,154 @@ obj-$(CONFIG_GPIO_GENERIC) += gpio-generic.o
# directly supported by gpio-generic
gpio-generic-$(CONFIG_GPIO_GENERIC) += gpio-mmio.o
-obj-$(CONFIG_GPIO_104_DIO_48E) += gpio-104-dio-48e.o
-obj-$(CONFIG_GPIO_104_IDIO_16) += gpio-104-idio-16.o
-obj-$(CONFIG_GPIO_104_IDI_48) += gpio-104-idi-48.o
-obj-$(CONFIG_GPIO_74X164) += gpio-74x164.o
-obj-$(CONFIG_GPIO_74XX_MMIO) += gpio-74xx-mmio.o
-obj-$(CONFIG_GPIO_ADNP) += gpio-adnp.o
-obj-$(CONFIG_GPIO_ADP5520) += gpio-adp5520.o
-obj-$(CONFIG_GPIO_ADP5588) += gpio-adp5588.o
-obj-$(CONFIG_GPIO_ALTERA) += gpio-altera.o
-obj-$(CONFIG_GPIO_ALTERA_A10SR) += gpio-altera-a10sr.o
-obj-$(CONFIG_GPIO_AMD_FCH) += gpio-amd-fch.o
-obj-$(CONFIG_GPIO_AMD8111) += gpio-amd8111.o
-obj-$(CONFIG_GPIO_AMDPT) += gpio-amdpt.o
-obj-$(CONFIG_GPIO_ARIZONA) += gpio-arizona.o
-obj-$(CONFIG_GPIO_ATH79) += gpio-ath79.o
-obj-$(CONFIG_GPIO_ASPEED) += gpio-aspeed.o
-obj-$(CONFIG_GPIO_RASPBERRYPI_EXP) += gpio-raspberrypi-exp.o
-obj-$(CONFIG_GPIO_BCM_KONA) += gpio-bcm-kona.o
-obj-$(CONFIG_GPIO_BD9571MWV) += gpio-bd9571mwv.o
-obj-$(CONFIG_GPIO_BRCMSTB) += gpio-brcmstb.o
-obj-$(CONFIG_GPIO_BT8XX) += gpio-bt8xx.o
-obj-$(CONFIG_GPIO_CADENCE) += gpio-cadence.o
-obj-$(CONFIG_GPIO_CLPS711X) += gpio-clps711x.o
-obj-$(CONFIG_GPIO_CS5535) += gpio-cs5535.o
-obj-$(CONFIG_GPIO_CRYSTAL_COVE) += gpio-crystalcove.o
-obj-$(CONFIG_GPIO_DA9052) += gpio-da9052.o
-obj-$(CONFIG_GPIO_DA9055) += gpio-da9055.o
-obj-$(CONFIG_GPIO_DAVINCI) += gpio-davinci.o
-obj-$(CONFIG_GPIO_DLN2) += gpio-dln2.o
-obj-$(CONFIG_GPIO_DWAPB) += gpio-dwapb.o
-obj-$(CONFIG_GPIO_EIC_SPRD) += gpio-eic-sprd.o
-obj-$(CONFIG_GPIO_EM) += gpio-em.o
-obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o
-obj-$(CONFIG_GPIO_EXAR) += gpio-exar.o
-obj-$(CONFIG_GPIO_F7188X) += gpio-f7188x.o
-obj-$(CONFIG_GPIO_FTGPIO010) += gpio-ftgpio010.o
-obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o
-obj-$(CONFIG_GPIO_GPIO_MM) += gpio-gpio-mm.o
-obj-$(CONFIG_GPIO_GRGPIO) += gpio-grgpio.o
-obj-$(CONFIG_GPIO_GW_PLD) += gpio-gw-pld.o
-obj-$(CONFIG_GPIO_HLWD) += gpio-hlwd.o
-obj-$(CONFIG_HTC_EGPIO) += gpio-htc-egpio.o
-obj-$(CONFIG_GPIO_ICH) += gpio-ich.o
-obj-$(CONFIG_GPIO_IOP) += gpio-iop.o
-obj-$(CONFIG_GPIO_IXP4XX) += gpio-ixp4xx.o
-obj-$(CONFIG_GPIO_IT87) += gpio-it87.o
-obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o
-obj-$(CONFIG_GPIO_KEMPLD) += gpio-kempld.o
-obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o
-obj-$(CONFIG_GPIO_INTEL_MID) += gpio-intel-mid.o
-obj-$(CONFIG_GPIO_LOONGSON) += gpio-loongson.o
-obj-$(CONFIG_GPIO_LP3943) += gpio-lp3943.o
-obj-$(CONFIG_GPIO_LPC18XX) += gpio-lpc18xx.o
-obj-$(CONFIG_ARCH_LPC32XX) += gpio-lpc32xx.o
-obj-$(CONFIG_GPIO_LP873X) += gpio-lp873x.o
-obj-$(CONFIG_GPIO_LP87565) += gpio-lp87565.o
-obj-$(CONFIG_GPIO_LYNXPOINT) += gpio-lynxpoint.o
-obj-$(CONFIG_GPIO_MADERA) += gpio-madera.o
-obj-$(CONFIG_GPIO_MAX3191X) += gpio-max3191x.o
-obj-$(CONFIG_GPIO_MAX730X) += gpio-max730x.o
-obj-$(CONFIG_GPIO_MAX7300) += gpio-max7300.o
-obj-$(CONFIG_GPIO_MAX7301) += gpio-max7301.o
-obj-$(CONFIG_GPIO_MAX732X) += gpio-max732x.o
-obj-$(CONFIG_GPIO_MAX77620) += gpio-max77620.o
-obj-$(CONFIG_GPIO_MAX77650) += gpio-max77650.o
-obj-$(CONFIG_GPIO_MB86S7X) += gpio-mb86s7x.o
-obj-$(CONFIG_GPIO_MENZ127) += gpio-menz127.o
-obj-$(CONFIG_GPIO_MERRIFIELD) += gpio-merrifield.o
-obj-$(CONFIG_GPIO_MC33880) += gpio-mc33880.o
-obj-$(CONFIG_GPIO_MC9S08DZ60) += gpio-mc9s08dz60.o
-obj-$(CONFIG_GPIO_MLXBF) += gpio-mlxbf.o
-obj-$(CONFIG_GPIO_ML_IOH) += gpio-ml-ioh.o
-obj-$(CONFIG_GPIO_MM_LANTIQ) += gpio-mm-lantiq.o
-obj-$(CONFIG_GPIO_MOCKUP) += gpio-mockup.o
-obj-$(CONFIG_GPIO_MPC5200) += gpio-mpc5200.o
-obj-$(CONFIG_GPIO_MPC8XXX) += gpio-mpc8xxx.o
-obj-$(CONFIG_GPIO_MSIC) += gpio-msic.o
+obj-$(CONFIG_GPIO_104_DIO_48E) += gpio-104-dio-48e.o
+obj-$(CONFIG_GPIO_104_IDI_48) += gpio-104-idi-48.o
+obj-$(CONFIG_GPIO_104_IDIO_16) += gpio-104-idio-16.o
+obj-$(CONFIG_GPIO_74X164) += gpio-74x164.o
+obj-$(CONFIG_GPIO_74XX_MMIO) += gpio-74xx-mmio.o
+obj-$(CONFIG_GPIO_ADNP) += gpio-adnp.o
+obj-$(CONFIG_GPIO_ADP5520) += gpio-adp5520.o
+obj-$(CONFIG_GPIO_ADP5588) += gpio-adp5588.o
+obj-$(CONFIG_GPIO_ALTERA_A10SR) += gpio-altera-a10sr.o
+obj-$(CONFIG_GPIO_ALTERA) += gpio-altera.o
+obj-$(CONFIG_GPIO_AMD8111) += gpio-amd8111.o
+obj-$(CONFIG_GPIO_AMD_FCH) += gpio-amd-fch.o
+obj-$(CONFIG_GPIO_AMDPT) += gpio-amdpt.o
+obj-$(CONFIG_GPIO_ARIZONA) += gpio-arizona.o
+obj-$(CONFIG_GPIO_ASPEED) += gpio-aspeed.o
+obj-$(CONFIG_GPIO_ATH79) += gpio-ath79.o
+obj-$(CONFIG_GPIO_BCM_KONA) += gpio-bcm-kona.o
+obj-$(CONFIG_GPIO_BD9571MWV) += gpio-bd9571mwv.o
+obj-$(CONFIG_GPIO_BRCMSTB) += gpio-brcmstb.o
+obj-$(CONFIG_GPIO_BT8XX) += gpio-bt8xx.o
+obj-$(CONFIG_GPIO_CADENCE) += gpio-cadence.o
+obj-$(CONFIG_GPIO_CLPS711X) += gpio-clps711x.o
+obj-$(CONFIG_GPIO_SNPS_CREG) += gpio-creg-snps.o
+obj-$(CONFIG_GPIO_CRYSTAL_COVE) += gpio-crystalcove.o
+obj-$(CONFIG_GPIO_CS5535) += gpio-cs5535.o
+obj-$(CONFIG_GPIO_DA9052) += gpio-da9052.o
+obj-$(CONFIG_GPIO_DA9055) += gpio-da9055.o
+obj-$(CONFIG_GPIO_DAVINCI) += gpio-davinci.o
+obj-$(CONFIG_GPIO_DLN2) += gpio-dln2.o
+obj-$(CONFIG_GPIO_DWAPB) += gpio-dwapb.o
+obj-$(CONFIG_GPIO_EIC_SPRD) += gpio-eic-sprd.o
+obj-$(CONFIG_GPIO_EM) += gpio-em.o
+obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o
+obj-$(CONFIG_GPIO_EXAR) += gpio-exar.o
+obj-$(CONFIG_GPIO_F7188X) += gpio-f7188x.o
+obj-$(CONFIG_GPIO_FTGPIO010) += gpio-ftgpio010.o
+obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o
+obj-$(CONFIG_GPIO_GPIO_MM) += gpio-gpio-mm.o
+obj-$(CONFIG_GPIO_GRGPIO) += gpio-grgpio.o
+obj-$(CONFIG_GPIO_GW_PLD) += gpio-gw-pld.o
+obj-$(CONFIG_GPIO_HLWD) += gpio-hlwd.o
+obj-$(CONFIG_HTC_EGPIO) += gpio-htc-egpio.o
+obj-$(CONFIG_GPIO_ICH) += gpio-ich.o
+obj-$(CONFIG_GPIO_INTEL_MID) += gpio-intel-mid.o
+obj-$(CONFIG_GPIO_IOP) += gpio-iop.o
+obj-$(CONFIG_GPIO_IT87) += gpio-it87.o
+obj-$(CONFIG_GPIO_IXP4XX) += gpio-ixp4xx.o
+obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o
+obj-$(CONFIG_GPIO_KEMPLD) += gpio-kempld.o
+obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o
+obj-$(CONFIG_GPIO_LOONGSON1) += gpio-loongson1.o
+obj-$(CONFIG_GPIO_LOONGSON) += gpio-loongson.o
+obj-$(CONFIG_GPIO_LP3943) += gpio-lp3943.o
+obj-$(CONFIG_GPIO_LP873X) += gpio-lp873x.o
+obj-$(CONFIG_GPIO_LP87565) += gpio-lp87565.o
+obj-$(CONFIG_GPIO_LPC18XX) += gpio-lpc18xx.o
+obj-$(CONFIG_ARCH_LPC32XX) += gpio-lpc32xx.o
+obj-$(CONFIG_GPIO_LYNXPOINT) += gpio-lynxpoint.o
+obj-$(CONFIG_GPIO_MADERA) += gpio-madera.o
+obj-$(CONFIG_GPIO_MAX3191X) += gpio-max3191x.o
+obj-$(CONFIG_GPIO_MAX7300) += gpio-max7300.o
+obj-$(CONFIG_GPIO_MAX7301) += gpio-max7301.o
+obj-$(CONFIG_GPIO_MAX730X) += gpio-max730x.o
+obj-$(CONFIG_GPIO_MAX732X) += gpio-max732x.o
+obj-$(CONFIG_GPIO_MAX77620) += gpio-max77620.o
+obj-$(CONFIG_GPIO_MAX77650) += gpio-max77650.o
+obj-$(CONFIG_GPIO_MB86S7X) += gpio-mb86s7x.o
+obj-$(CONFIG_GPIO_MC33880) += gpio-mc33880.o
+obj-$(CONFIG_GPIO_MC9S08DZ60) += gpio-mc9s08dz60.o
+obj-$(CONFIG_GPIO_MENZ127) += gpio-menz127.o
+obj-$(CONFIG_GPIO_MERRIFIELD) += gpio-merrifield.o
+obj-$(CONFIG_GPIO_ML_IOH) += gpio-ml-ioh.o
+obj-$(CONFIG_GPIO_MLXBF) += gpio-mlxbf.o
+obj-$(CONFIG_GPIO_MM_LANTIQ) += gpio-mm-lantiq.o
+obj-$(CONFIG_GPIO_MOCKUP) += gpio-mockup.o
+obj-$(CONFIG_GPIO_MPC5200) += gpio-mpc5200.o
+obj-$(CONFIG_GPIO_MPC8XXX) += gpio-mpc8xxx.o
+obj-$(CONFIG_GPIO_MSIC) += gpio-msic.o
obj-$(CONFIG_GPIO_MT7621) += gpio-mt7621.o
-obj-$(CONFIG_GPIO_MVEBU) += gpio-mvebu.o
-obj-$(CONFIG_GPIO_MXC) += gpio-mxc.o
-obj-$(CONFIG_GPIO_MXS) += gpio-mxs.o
-obj-$(CONFIG_GPIO_OCTEON) += gpio-octeon.o
-obj-$(CONFIG_GPIO_OMAP) += gpio-omap.o
-obj-$(CONFIG_GPIO_PCA953X) += gpio-pca953x.o
-obj-$(CONFIG_GPIO_PCF857X) += gpio-pcf857x.o
-obj-$(CONFIG_GPIO_PCH) += gpio-pch.o
-obj-$(CONFIG_GPIO_PCI_IDIO_16) += gpio-pci-idio-16.o
-obj-$(CONFIG_GPIO_PCIE_IDIO_24) += gpio-pcie-idio-24.o
-obj-$(CONFIG_GPIO_PISOSR) += gpio-pisosr.o
-obj-$(CONFIG_GPIO_PL061) += gpio-pl061.o
+obj-$(CONFIG_GPIO_MVEBU) += gpio-mvebu.o
+obj-$(CONFIG_GPIO_MXC) += gpio-mxc.o
+obj-$(CONFIG_GPIO_MXS) += gpio-mxs.o
+obj-$(CONFIG_GPIO_OCTEON) += gpio-octeon.o
+obj-$(CONFIG_GPIO_OMAP) += gpio-omap.o
+obj-$(CONFIG_GPIO_PALMAS) += gpio-palmas.o
+obj-$(CONFIG_GPIO_PCA953X) += gpio-pca953x.o
+obj-$(CONFIG_GPIO_PCF857X) += gpio-pcf857x.o
+obj-$(CONFIG_GPIO_PCH) += gpio-pch.o
+obj-$(CONFIG_GPIO_PCIE_IDIO_24) += gpio-pcie-idio-24.o
+obj-$(CONFIG_GPIO_PCI_IDIO_16) += gpio-pci-idio-16.o
+obj-$(CONFIG_GPIO_PISOSR) += gpio-pisosr.o
+obj-$(CONFIG_GPIO_PL061) += gpio-pl061.o
obj-$(CONFIG_GPIO_PMIC_EIC_SPRD) += gpio-pmic-eic-sprd.o
-obj-$(CONFIG_GPIO_PXA) += gpio-pxa.o
-obj-$(CONFIG_GPIO_RC5T583) += gpio-rc5t583.o
-obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o
-obj-$(CONFIG_GPIO_RCAR) += gpio-rcar.o
-obj-$(CONFIG_GPIO_REG) += gpio-reg.o
-obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o
+obj-$(CONFIG_GPIO_PXA) += gpio-pxa.o
+obj-$(CONFIG_GPIO_RASPBERRYPI_EXP) += gpio-raspberrypi-exp.o
+obj-$(CONFIG_GPIO_RC5T583) += gpio-rc5t583.o
+obj-$(CONFIG_GPIO_RCAR) += gpio-rcar.o
+obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o
+obj-$(CONFIG_GPIO_REG) += gpio-reg.o
+obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o
obj-$(CONFIG_GPIO_SAMA5D2_PIOBU) += gpio-sama5d2-piobu.o
-obj-$(CONFIG_GPIO_SCH) += gpio-sch.o
-obj-$(CONFIG_GPIO_SCH311X) += gpio-sch311x.o
-obj-$(CONFIG_GPIO_SNPS_CREG) += gpio-creg-snps.o
-obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o
-obj-$(CONFIG_GPIO_SPEAR_SPICS) += gpio-spear-spics.o
-obj-$(CONFIG_GPIO_SPRD) += gpio-sprd.o
-obj-$(CONFIG_GPIO_STA2X11) += gpio-sta2x11.o
-obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o
-obj-$(CONFIG_GPIO_STP_XWAY) += gpio-stp-xway.o
-obj-$(CONFIG_GPIO_SYSCON) += gpio-syscon.o
-obj-$(CONFIG_GPIO_TB10X) += gpio-tb10x.o
-obj-$(CONFIG_GPIO_TC3589X) += gpio-tc3589x.o
-obj-$(CONFIG_GPIO_TEGRA) += gpio-tegra.o
-obj-$(CONFIG_GPIO_TEGRA186) += gpio-tegra186.o
-obj-$(CONFIG_GPIO_THUNDERX) += gpio-thunderx.o
-obj-$(CONFIG_GPIO_TIMBERDALE) += gpio-timberdale.o
-obj-$(CONFIG_GPIO_PALMAS) += gpio-palmas.o
-obj-$(CONFIG_GPIO_SIOX) += gpio-siox.o
-obj-$(CONFIG_GPIO_TPIC2810) += gpio-tpic2810.o
-obj-$(CONFIG_GPIO_TPS65086) += gpio-tps65086.o
-obj-$(CONFIG_GPIO_TPS65218) += gpio-tps65218.o
-obj-$(CONFIG_GPIO_TPS6586X) += gpio-tps6586x.o
-obj-$(CONFIG_GPIO_TPS65910) += gpio-tps65910.o
-obj-$(CONFIG_GPIO_TPS65912) += gpio-tps65912.o
-obj-$(CONFIG_GPIO_TPS68470) += gpio-tps68470.o
-obj-$(CONFIG_GPIO_TQMX86) += gpio-tqmx86.o
-obj-$(CONFIG_GPIO_TS4800) += gpio-ts4800.o
-obj-$(CONFIG_GPIO_TS4900) += gpio-ts4900.o
-obj-$(CONFIG_GPIO_TS5500) += gpio-ts5500.o
-obj-$(CONFIG_GPIO_TWL4030) += gpio-twl4030.o
-obj-$(CONFIG_GPIO_TWL6040) += gpio-twl6040.o
-obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
-obj-$(CONFIG_GPIO_UNIPHIER) += gpio-uniphier.o
-obj-$(CONFIG_GPIO_VF610) += gpio-vf610.o
-obj-$(CONFIG_GPIO_VIPERBOARD) += gpio-viperboard.o
-obj-$(CONFIG_GPIO_VR41XX) += gpio-vr41xx.o
-obj-$(CONFIG_GPIO_VX855) += gpio-vx855.o
-obj-$(CONFIG_GPIO_WHISKEY_COVE) += gpio-wcove.o
-obj-$(CONFIG_GPIO_WINBOND) += gpio-winbond.o
-obj-$(CONFIG_GPIO_WM831X) += gpio-wm831x.o
-obj-$(CONFIG_GPIO_WM8350) += gpio-wm8350.o
-obj-$(CONFIG_GPIO_WM8994) += gpio-wm8994.o
-obj-$(CONFIG_GPIO_WS16C48) += gpio-ws16c48.o
-obj-$(CONFIG_GPIO_XGENE) += gpio-xgene.o
-obj-$(CONFIG_GPIO_XGENE_SB) += gpio-xgene-sb.o
-obj-$(CONFIG_GPIO_XILINX) += gpio-xilinx.o
-obj-$(CONFIG_GPIO_XLP) += gpio-xlp.o
-obj-$(CONFIG_GPIO_XRA1403) += gpio-xra1403.o
-obj-$(CONFIG_GPIO_XTENSA) += gpio-xtensa.o
-obj-$(CONFIG_GPIO_ZEVIO) += gpio-zevio.o
-obj-$(CONFIG_GPIO_ZYNQ) += gpio-zynq.o
-obj-$(CONFIG_GPIO_ZX) += gpio-zx.o
-obj-$(CONFIG_GPIO_LOONGSON1) += gpio-loongson1.o
+obj-$(CONFIG_GPIO_SCH311X) += gpio-sch311x.o
+obj-$(CONFIG_GPIO_SCH) += gpio-sch.o
+obj-$(CONFIG_GPIO_SIOX) += gpio-siox.o
+obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o
+obj-$(CONFIG_GPIO_SPEAR_SPICS) += gpio-spear-spics.o
+obj-$(CONFIG_GPIO_SPRD) += gpio-sprd.o
+obj-$(CONFIG_GPIO_STA2X11) += gpio-sta2x11.o
+obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o
+obj-$(CONFIG_GPIO_STP_XWAY) += gpio-stp-xway.o
+obj-$(CONFIG_GPIO_SYSCON) += gpio-syscon.o
+obj-$(CONFIG_GPIO_TB10X) += gpio-tb10x.o
+obj-$(CONFIG_GPIO_TC3589X) += gpio-tc3589x.o
+obj-$(CONFIG_GPIO_TEGRA186) += gpio-tegra186.o
+obj-$(CONFIG_GPIO_TEGRA) += gpio-tegra.o
+obj-$(CONFIG_GPIO_THUNDERX) += gpio-thunderx.o
+obj-$(CONFIG_GPIO_TIMBERDALE) += gpio-timberdale.o
+obj-$(CONFIG_GPIO_TPIC2810) += gpio-tpic2810.o
+obj-$(CONFIG_GPIO_TPS65086) += gpio-tps65086.o
+obj-$(CONFIG_GPIO_TPS65218) += gpio-tps65218.o
+obj-$(CONFIG_GPIO_TPS6586X) += gpio-tps6586x.o
+obj-$(CONFIG_GPIO_TPS65910) += gpio-tps65910.o
+obj-$(CONFIG_GPIO_TPS65912) += gpio-tps65912.o
+obj-$(CONFIG_GPIO_TPS68470) += gpio-tps68470.o
+obj-$(CONFIG_GPIO_TQMX86) += gpio-tqmx86.o
+obj-$(CONFIG_GPIO_TS4800) += gpio-ts4800.o
+obj-$(CONFIG_GPIO_TS4900) += gpio-ts4900.o
+obj-$(CONFIG_GPIO_TS5500) += gpio-ts5500.o
+obj-$(CONFIG_GPIO_TWL4030) += gpio-twl4030.o
+obj-$(CONFIG_GPIO_TWL6040) += gpio-twl6040.o
+obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
+obj-$(CONFIG_GPIO_UNIPHIER) += gpio-uniphier.o
+obj-$(CONFIG_GPIO_VF610) += gpio-vf610.o
+obj-$(CONFIG_GPIO_VIPERBOARD) += gpio-viperboard.o
+obj-$(CONFIG_GPIO_VR41XX) += gpio-vr41xx.o
+obj-$(CONFIG_GPIO_VX855) += gpio-vx855.o
+obj-$(CONFIG_GPIO_WHISKEY_COVE) += gpio-wcove.o
+obj-$(CONFIG_GPIO_WINBOND) += gpio-winbond.o
+obj-$(CONFIG_GPIO_WM831X) += gpio-wm831x.o
+obj-$(CONFIG_GPIO_WM8350) += gpio-wm8350.o
+obj-$(CONFIG_GPIO_WM8994) += gpio-wm8994.o
+obj-$(CONFIG_GPIO_WS16C48) += gpio-ws16c48.o
+obj-$(CONFIG_GPIO_XGENE) += gpio-xgene.o
+obj-$(CONFIG_GPIO_XGENE_SB) += gpio-xgene-sb.o
+obj-$(CONFIG_GPIO_XILINX) += gpio-xilinx.o
+obj-$(CONFIG_GPIO_XLP) += gpio-xlp.o
+obj-$(CONFIG_GPIO_XRA1403) += gpio-xra1403.o
+obj-$(CONFIG_GPIO_XTENSA) += gpio-xtensa.o
+obj-$(CONFIG_GPIO_ZEVIO) += gpio-zevio.o
+obj-$(CONFIG_GPIO_ZX) += gpio-zx.o
+obj-$(CONFIG_GPIO_ZYNQ) += gpio-zynq.o
diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index 19d27c904916..9c048f10c9ad 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -90,6 +90,46 @@ GPIOLIB irqchip
The GPIOLIB irqchip is a helper irqchip for "simple cases" that should
try to cover any generic kind of irqchip cascaded from a GPIO.
+- Convert all the GPIOLIB_IRQCHIP users to pass an irqchip template,
+ parent and flags before calling [devm_]gpiochip_add[_data]().
+ Currently we set up the irqchip after setting up the gpiochip
+ using gpiochip_irqchip_add() and gpiochip_set_[chained|nested]_irqchip().
+ This is too complex, so convert all users over to just set up
+ the irqchip before registering the gpio_chip, typical example:
+
+ /* Typical state container with dynamic irqchip */
+ struct my_gpio {
+ struct gpio_chip gc;
+ struct irq_chip irq;
+ };
+
+ int irq; /* from platform etc */
+ struct my_gpio *g;
+ struct gpio_irq_chip *girq
+
+ /* Set up the irqchip dynamically */
+ g->irq.name = "my_gpio_irq";
+ g->irq.irq_ack = my_gpio_ack_irq;
+ g->irq.irq_mask = my_gpio_mask_irq;
+ g->irq.irq_unmask = my_gpio_unmask_irq;
+ g->irq.irq_set_type = my_gpio_set_irq_type;
+
+ /* Get a pointer to the gpio_irq_chip */
+ girq = &g->gc.irq;
+ girq->chip = &g->irq;
+ girq->parent_handler = ftgpio_gpio_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+ girq->parents[0] = irq;
+
+ When this is done, we will delete the old APIs for instatiating
+ GPIOLIB_IRQCHIP and simplify the code.
+
- Look over and identify any remaining easily converted drivers and
dry-code conversions to gpiolib irqchip for maintainers to test
diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index e088b908c2c1..9f2e6b04c361 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -30,6 +30,7 @@ struct altera_gpio_chip {
raw_spinlock_t gpio_lock;
int interrupt_trigger;
int mapped_irq;
+ struct irq_chip irq_chip;
};
static void altera_gpio_irq_unmask(struct irq_data *d)
@@ -101,15 +102,6 @@ static unsigned int altera_gpio_irq_startup(struct irq_data *d)
return 0;
}
-static struct irq_chip altera_irq_chip = {
- .name = "altera-gpio",
- .irq_mask = altera_gpio_irq_mask,
- .irq_unmask = altera_gpio_irq_unmask,
- .irq_set_type = altera_gpio_irq_set_type,
- .irq_startup = altera_gpio_irq_startup,
- .irq_shutdown = altera_gpio_irq_mask,
-};
-
static int altera_gpio_get(struct gpio_chip *gc, unsigned offset)
{
struct of_mm_gpio_chip *mm_gc;
@@ -246,6 +238,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
struct device_node *node = pdev->dev.of_node;
int reg, ret;
struct altera_gpio_chip *altera_gc;
+ struct gpio_irq_chip *girq;
altera_gc = devm_kzalloc(&pdev->dev, sizeof(*altera_gc), GFP_KERNEL);
if (!altera_gc)
@@ -273,50 +266,50 @@ static int altera_gpio_probe(struct platform_device *pdev)
altera_gc->mmchip.gc.owner = THIS_MODULE;
altera_gc->mmchip.gc.parent = &pdev->dev;
- ret = of_mm_gpiochip_add_data(node, &altera_gc->mmchip, altera_gc);
- if (ret) {
- dev_err(&pdev->dev, "Failed adding memory mapped gpiochip\n");
- return ret;
- }
-
- platform_set_drvdata(pdev, altera_gc);
-
altera_gc->mapped_irq = platform_get_irq(pdev, 0);
if (altera_gc->mapped_irq < 0)
goto skip_irq;
if (of_property_read_u32(node, "altr,interrupt-type", &reg)) {
- ret = -EINVAL;
dev_err(&pdev->dev,
"altr,interrupt-type value not set in device tree\n");
- goto teardown;
+ return -EINVAL;
}
altera_gc->interrupt_trigger = reg;
- ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
- handle_bad_irq, IRQ_TYPE_NONE);
+ altera_gc->irq_chip.name = "altera-gpio";
+ altera_gc->irq_chip.irq_mask = altera_gpio_irq_mask;
+ altera_gc->irq_chip.irq_unmask = altera_gpio_irq_unmask;
+ altera_gc->irq_chip.irq_set_type = altera_gpio_irq_set_type;
+ altera_gc->irq_chip.irq_startup = altera_gpio_irq_startup;
+ altera_gc->irq_chip.irq_shutdown = altera_gpio_irq_mask;
+
+ girq = &altera_gc->mmchip.gc.irq;
+ girq->chip = &altera_gc->irq_chip;
+ if (altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
+ girq->parent_handler = altera_gpio_irq_leveL_high_handler;
+ else
+ girq->parent_handler = altera_gpio_irq_edge_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(&pdev->dev, 1, sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+ girq->parents[0] = altera_gc->mapped_irq;
+skip_irq:
+ ret = of_mm_gpiochip_add_data(node, &altera_gc->mmchip, altera_gc);
if (ret) {
- dev_err(&pdev->dev, "could not add irqchip\n");
- goto teardown;
+ dev_err(&pdev->dev, "Failed adding memory mapped gpiochip\n");
+ return ret;
}
- gpiochip_set_chained_irqchip(&altera_gc->mmchip.gc,
- &altera_irq_chip,
- altera_gc->mapped_irq,
- altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH ?
- altera_gpio_irq_leveL_high_handler :
- altera_gpio_irq_edge_handler);
+ platform_set_drvdata(pdev, altera_gc);
-skip_irq:
return 0;
-teardown:
- of_mm_gpiochip_remove(&altera_gc->mmchip);
- pr_err("%pOF: registration failed with status %d\n",
- node, ret);
-
- return ret;
}
static int altera_gpio_remove(struct platform_device *pdev)
diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c
index 38c3f4a3d4aa..181df1581df5 100644
--- a/drivers/gpio/gpio-amd-fch.c
+++ b/drivers/gpio/gpio-amd-fch.c
@@ -25,14 +25,13 @@
#define AMD_FCH_GPIO_FLAG_WRITE BIT(22)
#define AMD_FCH_GPIO_FLAG_READ BIT(16)
-static struct resource amd_fch_gpio_iores =
+static const struct resource amd_fch_gpio_iores =
DEFINE_RES_MEM_NAMED(
AMD_FCH_MMIO_BASE + AMD_FCH_GPIO_BANK0_BASE,
AMD_FCH_GPIO_SIZE,
"amd-fch-gpio-iomem");
struct amd_fch_gpio_priv {
- struct platform_device *pdev;
struct gpio_chip gc;
void __iomem *base;
struct amd_fch_gpio_pdata *pdata;
@@ -153,7 +152,6 @@ static int amd_fch_gpio_probe(struct platform_device *pdev)
return -ENOMEM;
priv->pdata = pdata;
- priv->pdev = pdev;
priv->gc.owner = THIS_MODULE;
priv->gc.parent = &pdev->dev;
diff --git a/drivers/gpio/gpio-amdpt.c b/drivers/gpio/gpio-amdpt.c
index ad255ba7ece9..44398992ae15 100644
--- a/drivers/gpio/gpio-amdpt.c
+++ b/drivers/gpio/gpio-amdpt.c
@@ -88,7 +88,7 @@ static int pt_gpio_probe(struct platform_device *pdev)
pt_gpio->reg_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(pt_gpio->reg_base)) {
- dev_err(&pdev->dev, "Failed to map MMIO resource for PT GPIO.\n");
+ dev_err(dev, "Failed to map MMIO resource for PT GPIO.\n");
return PTR_ERR(pt_gpio->reg_base);
}
@@ -98,7 +98,7 @@ static int pt_gpio_probe(struct platform_device *pdev)
pt_gpio->reg_base + PT_DIRECTION_REG, NULL,
BGPIOF_READ_OUTPUT_REG_SET);
if (ret) {
- dev_err(&pdev->dev, "bgpio_init failed\n");
+ dev_err(dev, "bgpio_init failed\n");
return ret;
}
@@ -107,11 +107,11 @@ static int pt_gpio_probe(struct platform_device *pdev)
pt_gpio->gc.free = pt_gpio_free;
pt_gpio->gc.ngpio = PT_TOTAL_GPIO;
#if defined(CONFIG_OF_GPIO)
- pt_gpio->gc.of_node = pdev->dev.of_node;
+ pt_gpio->gc.of_node = dev->of_node;
#endif
ret = gpiochip_add_data(&pt_gpio->gc, pt_gpio);
if (ret) {
- dev_err(&pdev->dev, "Failed to register GPIO lib\n");
+ dev_err(dev, "Failed to register GPIO lib\n");
return ret;
}
@@ -121,7 +121,7 @@ static int pt_gpio_probe(struct platform_device *pdev)
writel(0, pt_gpio->reg_base + PT_SYNC_REG);
writel(0, pt_gpio->reg_base + PT_CLOCKRATE_REG);
- dev_dbg(&pdev->dev, "PT GPIO driver loaded\n");
+ dev_dbg(dev, "PT GPIO driver loaded\n");
return ret;
}
diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c
index 6c6dcda1100c..f1a5ea9b3de2 100644
--- a/drivers/gpio/gpio-ath79.c
+++ b/drivers/gpio/gpio-ath79.c
@@ -222,14 +222,16 @@ MODULE_DEVICE_TABLE(of, ath79_gpio_of_match);
static int ath79_gpio_probe(struct platform_device *pdev)
{
struct ath79_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev);
- struct device_node *np = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
struct ath79_gpio_ctrl *ctrl;
+ struct gpio_irq_chip *girq;
struct resource *res;
u32 ath79_gpio_count;
bool oe_inverted;
int err;
- ctrl = devm_kzalloc(&pdev->dev, sizeof(*ctrl), GFP_KERNEL);
+ ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
if (!ctrl)
return -ENOMEM;
platform_set_drvdata(pdev, ctrl);
@@ -237,7 +239,7 @@ static int ath79_gpio_probe(struct platform_device *pdev)
if (np) {
err = of_property_read_u32(np, "ngpios", &ath79_gpio_count);
if (err) {
- dev_err(&pdev->dev, "ngpios property is not valid\n");
+ dev_err(dev, "ngpios property is not valid\n");
return err;
}
oe_inverted = of_device_is_compatible(np, "qca,ar9340-gpio");
@@ -245,25 +247,24 @@ static int ath79_gpio_probe(struct platform_device *pdev)
ath79_gpio_count = pdata->ngpios;
oe_inverted = pdata->oe_inverted;
} else {
- dev_err(&pdev->dev, "No DT node or platform data found\n");
+ dev_err(dev, "No DT node or platform data found\n");
return -EINVAL;
}
if (ath79_gpio_count >= 32) {
- dev_err(&pdev->dev, "ngpios must be less than 32\n");
+ dev_err(dev, "ngpios must be less than 32\n");
return -EINVAL;
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
- ctrl->base = devm_ioremap_nocache(
- &pdev->dev, res->start, resource_size(res));
+ ctrl->base = devm_ioremap_nocache(dev, res->start, resource_size(res));
if (!ctrl->base)
return -ENOMEM;
raw_spin_lock_init(&ctrl->lock);
- err = bgpio_init(&ctrl->gc, &pdev->dev, 4,
+ err = bgpio_init(&ctrl->gc, dev, 4,
ctrl->base + AR71XX_GPIO_REG_IN,
ctrl->base + AR71XX_GPIO_REG_SET,
ctrl->base + AR71XX_GPIO_REG_CLEAR,
@@ -271,45 +272,33 @@ static int ath79_gpio_probe(struct platform_device *pdev)
oe_inverted ? ctrl->base + AR71XX_GPIO_REG_OE : NULL,
0);
if (err) {
- dev_err(&pdev->dev, "bgpio_init failed\n");
+ dev_err(dev, "bgpio_init failed\n");
return err;
}
/* Use base 0 to stay compatible with legacy platforms */
ctrl->gc.base = 0;
- err = gpiochip_add_data(&ctrl->gc, ctrl);
- if (err) {
- dev_err(&pdev->dev,
- "cannot add AR71xx GPIO chip, error=%d", err);
- return err;
+ /* Optional interrupt setup */
+ if (!np || of_property_read_bool(np, "interrupt-controller")) {
+ girq = &ctrl->gc.irq;
+ girq->chip = &ath79_gpio_irqchip;
+ girq->parent_handler = ath79_gpio_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->parents[0] = platform_get_irq(pdev, 0);
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_simple_irq;
}
- if (np && !of_property_read_bool(np, "interrupt-controller"))
- return 0;
-
- err = gpiochip_irqchip_add(&ctrl->gc, &ath79_gpio_irqchip, 0,
- handle_simple_irq, IRQ_TYPE_NONE);
+ err = devm_gpiochip_add_data(dev, &ctrl->gc, ctrl);
if (err) {
- dev_err(&pdev->dev, "failed to add gpiochip_irqchip\n");
- goto gpiochip_remove;
+ dev_err(dev,
+ "cannot add AR71xx GPIO chip, error=%d", err);
+ return err;
}
-
- gpiochip_set_chained_irqchip(&ctrl->gc, &ath79_gpio_irqchip,
- platform_get_irq(pdev, 0),
- ath79_gpio_irq_handler);
-
- return 0;
-
-gpiochip_remove:
- gpiochip_remove(&ctrl->gc);
- return err;
-}
-
-static int ath79_gpio_remove(struct platform_device *pdev)
-{
- struct ath79_gpio_ctrl *ctrl = platform_get_drvdata(pdev);
-
- gpiochip_remove(&ctrl->gc);
return 0;
}
@@ -319,7 +308,6 @@ static struct platform_driver ath79_gpio_driver = {
.of_match_table = ath79_gpio_of_match,
},
.probe = ath79_gpio_probe,
- .remove = ath79_gpio_remove,
};
module_platform_driver(ath79_gpio_driver);
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 3bbf5804bd11..fc494a84a29d 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -297,7 +297,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
static void gpio_irq_disable(struct irq_data *d)
{
struct davinci_gpio_regs __iomem *g = irq2regs(d);
- u32 mask = (u32) irq_data_get_irq_handler_data(d);
+ uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d);
writel_relaxed(mask, &g->clr_falling);
writel_relaxed(mask, &g->clr_rising);
@@ -306,7 +306,7 @@ static void gpio_irq_disable(struct irq_data *d)
static void gpio_irq_enable(struct irq_data *d)
{
struct davinci_gpio_regs __iomem *g = irq2regs(d);
- u32 mask = (u32) irq_data_get_irq_handler_data(d);
+ uintptr_t mask = (uintptr_t)irq_data_get_irq_handler_data(d);
unsigned status = irqd_get_trigger_type(d);
status &= IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING;
@@ -447,7 +447,7 @@ davinci_gpio_irq_map(struct irq_domain *d, unsigned int irq,
"davinci_gpio");
irq_set_irq_type(irq, IRQ_TYPE_NONE);
irq_set_chip_data(irq, (__force void *)g);
- irq_set_handler_data(irq, (void *)__gpio_mask(hw));
+ irq_set_handler_data(irq, (void *)(uintptr_t)__gpio_mask(hw));
return 0;
}
@@ -632,6 +632,7 @@ done:
static const struct of_device_id davinci_gpio_ids[] = {
{ .compatible = "ti,keystone-gpio", keystone_gpio_get_irq_chip},
+ { .compatible = "ti,am654-gpio", keystone_gpio_get_irq_chip},
{ .compatible = "ti,dm6441-gpio", davinci_gpio_get_irq_chip},
{ /* sentinel */ },
};
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index 77092268ee95..7b9ac4a12c20 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -568,7 +568,6 @@ static int sprd_eic_probe(struct platform_device *pdev)
const struct sprd_eic_variant_data *pdata;
struct gpio_irq_chip *irq;
struct sprd_eic *sprd_eic;
- struct resource *res;
int ret, i;
pdata = of_device_get_match_data(&pdev->dev);
@@ -597,13 +596,9 @@ static int sprd_eic_probe(struct platform_device *pdev)
* have one bank EIC, thus base[1] and base[2] can be
* optional.
*/
- res = platform_get_resource(pdev, IORESOURCE_MEM, i);
- if (!res)
- continue;
-
- sprd_eic->base[i] = devm_ioremap_resource(&pdev->dev, res);
+ sprd_eic->base[i] = devm_platform_ioremap_resource(pdev, i);
if (IS_ERR(sprd_eic->base[i]))
- return PTR_ERR(sprd_eic->base[i]);
+ continue;
}
sprd_eic->chip.label = sprd_eic_label_name[sprd_eic->type];
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index 84a7375cee0a..b6af705a4e5f 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -270,10 +270,8 @@ static int em_gio_probe(struct platform_device *pdev)
int ret;
p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p) {
- ret = -ENOMEM;
- goto err0;
- }
+ if (!p)
+ return -ENOMEM;
p->pdev = pdev;
platform_set_drvdata(pdev, p);
@@ -286,30 +284,22 @@ static int em_gio_probe(struct platform_device *pdev)
if (!io[0] || !io[1] || !irq[0] || !irq[1]) {
dev_err(&pdev->dev, "missing IRQ or IOMEM\n");
- ret = -EINVAL;
- goto err0;
+ return -EINVAL;
}
p->base0 = devm_ioremap_nocache(&pdev->dev, io[0]->start,
resource_size(io[0]));
- if (!p->base0) {
- dev_err(&pdev->dev, "failed to remap low I/O memory\n");
- ret = -ENXIO;
- goto err0;
- }
+ if (!p->base0)
+ return -ENOMEM;
p->base1 = devm_ioremap_nocache(&pdev->dev, io[1]->start,
resource_size(io[1]));
- if (!p->base1) {
- dev_err(&pdev->dev, "failed to remap high I/O memory\n");
- ret = -ENXIO;
- goto err0;
- }
+ if (!p->base1)
+ return -ENOMEM;
if (of_property_read_u32(pdev->dev.of_node, "ngpios", &ngpios)) {
dev_err(&pdev->dev, "Missing ngpios OF property\n");
- ret = -EINVAL;
- goto err0;
+ return -EINVAL;
}
gpio_chip = &p->gpio_chip;
@@ -339,9 +329,8 @@ static int em_gio_probe(struct platform_device *pdev)
p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, ngpios, 0,
&em_gio_irq_domain_ops, p);
if (!p->irq_domain) {
- ret = -ENXIO;
dev_err(&pdev->dev, "cannot initialize irq domain\n");
- goto err0;
+ return -ENXIO;
}
if (devm_request_irq(&pdev->dev, irq[0]->start,
@@ -358,7 +347,7 @@ static int em_gio_probe(struct platform_device *pdev)
goto err1;
}
- ret = gpiochip_add_data(gpio_chip, p);
+ ret = devm_gpiochip_add_data(&pdev->dev, gpio_chip, p);
if (ret) {
dev_err(&pdev->dev, "failed to add GPIO controller\n");
goto err1;
@@ -368,7 +357,6 @@ static int em_gio_probe(struct platform_device *pdev)
err1:
irq_domain_remove(p->irq_domain);
-err0:
return ret;
}
@@ -376,8 +364,6 @@ static int em_gio_remove(struct platform_device *pdev)
{
struct em_gio_priv *p = platform_get_drvdata(pdev);
- gpiochip_remove(&p->gpio_chip);
-
irq_domain_remove(p->irq_domain);
return 0;
}
diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c
index 71728d6e0bca..a90870a60c15 100644
--- a/drivers/gpio/gpio-ep93xx.c
+++ b/drivers/gpio/gpio-ep93xx.c
@@ -393,16 +393,13 @@ static int ep93xx_gpio_add_bank(struct gpio_chip *gc, struct device *dev,
static int ep93xx_gpio_probe(struct platform_device *pdev)
{
struct ep93xx_gpio *epg;
- struct resource *res;
int i;
- struct device *dev = &pdev->dev;
- epg = devm_kzalloc(dev, sizeof(*epg), GFP_KERNEL);
+ epg = devm_kzalloc(&pdev->dev, sizeof(*epg), GFP_KERNEL);
if (!epg)
return -ENOMEM;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- epg->base = devm_ioremap_resource(dev, res);
+ epg->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(epg->base))
return PTR_ERR(epg->base);
diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c
index 8ff8ce2970d9..250e71f3e688 100644
--- a/drivers/gpio/gpio-ftgpio010.c
+++ b/drivers/gpio/gpio-ftgpio010.c
@@ -226,6 +226,7 @@ static int ftgpio_gpio_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct ftgpio_gpio *g;
+ struct gpio_irq_chip *girq;
int irq;
int ret;
@@ -277,6 +278,24 @@ static int ftgpio_gpio_probe(struct platform_device *pdev)
if (!IS_ERR(g->clk))
g->gc.set_config = ftgpio_gpio_set_config;
+ g->irq.name = "FTGPIO010";
+ g->irq.irq_ack = ftgpio_gpio_ack_irq;
+ g->irq.irq_mask = ftgpio_gpio_mask_irq;
+ g->irq.irq_unmask = ftgpio_gpio_unmask_irq;
+ g->irq.irq_set_type = ftgpio_gpio_set_irq_type;
+
+ girq = &g->gc.irq;
+ girq->chip = &g->irq;
+ girq->parent_handler = ftgpio_gpio_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+ girq->parents[0] = irq;
+
ret = devm_gpiochip_add_data(dev, &g->gc, g);
if (ret)
goto dis_clk;
@@ -289,22 +308,6 @@ static int ftgpio_gpio_probe(struct platform_device *pdev)
/* Clear any use of debounce */
writel(0x0, g->base + GPIO_DEBOUNCE_EN);
- g->irq.name = "FTGPIO010";
- g->irq.irq_ack = ftgpio_gpio_ack_irq;
- g->irq.irq_mask = ftgpio_gpio_mask_irq;
- g->irq.irq_unmask = ftgpio_gpio_unmask_irq;
- g->irq.irq_set_type = ftgpio_gpio_set_irq_type;
-
- ret = gpiochip_irqchip_add(&g->gc, &g->irq,
- 0, handle_bad_irq,
- IRQ_TYPE_NONE);
- if (ret) {
- dev_info(dev, "could not add irqchip\n");
- goto dis_clk;
- }
- gpiochip_set_chained_irqchip(&g->gc, &g->irq,
- irq, ftgpio_gpio_irq_handler);
-
platform_set_drvdata(pdev, g);
dev_info(dev, "FTGPIO010 @%p registered\n", g->base);
diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
index 7df48e76baea..0937b605e134 100644
--- a/drivers/gpio/gpio-grgpio.c
+++ b/drivers/gpio/gpio-grgpio.c
@@ -329,7 +329,6 @@ static int grgpio_probe(struct platform_device *ofdev)
void __iomem *regs;
struct gpio_chip *gc;
struct grgpio_priv *priv;
- struct resource *res;
int err;
u32 prop;
s32 *irqmap;
@@ -340,8 +339,7 @@ static int grgpio_probe(struct platform_device *ofdev)
if (!priv)
return -ENOMEM;
- res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
- regs = devm_ioremap_resource(&ofdev->dev, res);
+ regs = devm_platform_ioremap_resource(ofdev, 0);
if (IS_ERR(regs))
return PTR_ERR(regs);
diff --git a/drivers/gpio/gpio-ixp4xx.c b/drivers/gpio/gpio-ixp4xx.c
index 4b1cf7ea858d..670c2a85a35b 100644
--- a/drivers/gpio/gpio-ixp4xx.c
+++ b/drivers/gpio/gpio-ixp4xx.c
@@ -205,20 +205,20 @@ static int ixp4xx_gpio_irq_domain_translate(struct irq_domain *domain,
unsigned long *hwirq,
unsigned int *type)
{
+ int ret;
/* We support standard DT translation */
if (is_of_node(fwspec->fwnode) && fwspec->param_count == 2) {
- *hwirq = fwspec->param[0];
- *type = fwspec->param[1];
- return 0;
+ return irq_domain_translate_twocell(domain, fwspec,
+ hwirq, type);
}
/* This goes away when we transition to DT */
if (is_fwnode_irqchip(fwspec->fwnode)) {
- if (fwspec->param_count != 2)
- return -EINVAL;
- *hwirq = fwspec->param[0];
- *type = fwspec->param[1];
+ ret = irq_domain_translate_twocell(domain, fwspec,
+ hwirq, type);
+ if (ret)
+ return ret;
WARN_ON(*type == IRQ_TYPE_NONE);
return 0;
}
diff --git a/drivers/gpio/gpio-janz-ttl.c b/drivers/gpio/gpio-janz-ttl.c
index 6b5b5a8b9173..cdf50e4ea165 100644
--- a/drivers/gpio/gpio-janz-ttl.c
+++ b/drivers/gpio/gpio-janz-ttl.c
@@ -140,18 +140,17 @@ static void ttl_setup_device(struct ttl_module *mod)
static int ttl_probe(struct platform_device *pdev)
{
struct janz_platform_data *pdata;
- struct device *dev = &pdev->dev;
struct ttl_module *mod;
struct gpio_chip *gpio;
int ret;
pdata = dev_get_platdata(&pdev->dev);
if (!pdata) {
- dev_err(dev, "no platform data\n");
+ dev_err(&pdev->dev, "no platform data\n");
return -ENXIO;
}
- mod = devm_kzalloc(dev, sizeof(*mod), GFP_KERNEL);
+ mod = devm_kzalloc(&pdev->dev, sizeof(*mod), GFP_KERNEL);
if (!mod)
return -ENOMEM;
@@ -177,9 +176,9 @@ static int ttl_probe(struct platform_device *pdev)
gpio->base = -1;
gpio->ngpio = 20;
- ret = devm_gpiochip_add_data(dev, gpio, NULL);
+ ret = devm_gpiochip_add_data(&pdev->dev, gpio, NULL);
if (ret) {
- dev_err(dev, "unable to add GPIO chip\n");
+ dev_err(&pdev->dev, "unable to add GPIO chip\n");
return ret;
}
diff --git a/drivers/gpio/gpio-madera.c b/drivers/gpio/gpio-madera.c
index c9dad0543672..4dbc837d1215 100644
--- a/drivers/gpio/gpio-madera.c
+++ b/drivers/gpio/gpio-madera.c
@@ -1,12 +1,8 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-only
/*
* GPIO support for Cirrus Logic Madera codecs
*
* Copyright (C) 2015-2018 Cirrus Logic
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2.
*/
#include <linux/gpio/driver.h>
diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index 5e4102e7b1f9..5fb0bcf31142 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -649,12 +649,12 @@ static int max732x_probe(struct i2c_client *client,
case 0x60:
chip->client_group_a = client;
if (nr_port > 8) {
- c = i2c_new_dummy(client->adapter, addr_b);
- if (!c) {
+ c = devm_i2c_new_dummy_device(&client->dev,
+ client->adapter, addr_b);
+ if (IS_ERR(c)) {
dev_err(&client->dev,
"Failed to allocate I2C device\n");
- ret = -ENODEV;
- goto out_failed;
+ return PTR_ERR(c);
}
chip->client_group_b = chip->client_dummy = c;
}
@@ -662,12 +662,12 @@ static int max732x_probe(struct i2c_client *client,
case 0x50:
chip->client_group_b = client;
if (nr_port > 8) {
- c = i2c_new_dummy(client->adapter, addr_a);
- if (!c) {
+ c = devm_i2c_new_dummy_device(&client->dev,
+ client->adapter, addr_a);
+ if (IS_ERR(c)) {
dev_err(&client->dev,
"Failed to allocate I2C device\n");
- ret = -ENODEV;
- goto out_failed;
+ return PTR_ERR(c);
}
chip->client_group_a = chip->client_dummy = c;
}
@@ -675,37 +675,33 @@ static int max732x_probe(struct i2c_client *client,
default:
dev_err(&client->dev, "invalid I2C address specified %02x\n",
client->addr);
- ret = -EINVAL;
- goto out_failed;
+ return -EINVAL;
}
if (nr_port > 8 && !chip->client_dummy) {
dev_err(&client->dev,
"Failed to allocate second group I2C device\n");
- ret = -ENODEV;
- goto out_failed;
+ return -ENODEV;
}
mutex_init(&chip->lock);
ret = max732x_readb(chip, is_group_a(chip, 0), &chip->reg_out[0]);
if (ret)
- goto out_failed;
+ return ret;
if (nr_port > 8) {
ret = max732x_readb(chip, is_group_a(chip, 8), &chip->reg_out[1]);
if (ret)
- goto out_failed;
+ return ret;
}
- ret = gpiochip_add_data(&chip->gpio_chip, chip);
+ ret = devm_gpiochip_add_data(&client->dev, &chip->gpio_chip, chip);
if (ret)
- goto out_failed;
+ return ret;
ret = max732x_irq_setup(chip, id);
- if (ret) {
- gpiochip_remove(&chip->gpio_chip);
- goto out_failed;
- }
+ if (ret)
+ return ret;
if (pdata && pdata->setup) {
ret = pdata->setup(client, chip->gpio_chip.base,
@@ -716,10 +712,6 @@ static int max732x_probe(struct i2c_client *client,
i2c_set_clientdata(client, chip);
return 0;
-
-out_failed:
- i2c_unregister_device(chip->client_dummy);
- return ret;
}
static int max732x_remove(struct i2c_client *client)
@@ -739,11 +731,6 @@ static int max732x_remove(struct i2c_client *client)
}
}
- gpiochip_remove(&chip->gpio_chip);
-
- /* unregister any dummy i2c_client */
- i2c_unregister_device(chip->client_dummy);
-
return 0;
}
diff --git a/drivers/gpio/gpio-mb86s7x.c b/drivers/gpio/gpio-mb86s7x.c
index 9bfff171f9fe..8f466993cd24 100644
--- a/drivers/gpio/gpio-mb86s7x.c
+++ b/drivers/gpio/gpio-mb86s7x.c
@@ -6,6 +6,7 @@
* Copyright (C) 2015 Linaro Ltd.
*/
+#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/clk.h>
@@ -19,6 +20,8 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include "gpiolib.h"
+
/*
* Only first 8bits of a register correspond to each pin,
* so there are 4 registers for 32 pins.
@@ -135,6 +138,20 @@ static void mb86s70_gpio_set(struct gpio_chip *gc, unsigned gpio, int value)
spin_unlock_irqrestore(&gchip->lock, flags);
}
+static int mb86s70_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
+{
+ int irq, index;
+
+ for (index = 0;; index++) {
+ irq = platform_get_irq(to_platform_device(gc->parent), index);
+ if (irq <= 0)
+ break;
+ if (irq_get_irq_data(irq)->hwirq == offset)
+ return irq;
+ }
+ return -EINVAL;
+}
+
static int mb86s70_gpio_probe(struct platform_device *pdev)
{
struct mb86s70_gpio_chip *gchip;
@@ -150,13 +167,15 @@ static int mb86s70_gpio_probe(struct platform_device *pdev)
if (IS_ERR(gchip->base))
return PTR_ERR(gchip->base);
- gchip->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(gchip->clk))
- return PTR_ERR(gchip->clk);
+ if (!has_acpi_companion(&pdev->dev)) {
+ gchip->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(gchip->clk))
+ return PTR_ERR(gchip->clk);
- ret = clk_prepare_enable(gchip->clk);
- if (ret)
- return ret;
+ ret = clk_prepare_enable(gchip->clk);
+ if (ret)
+ return ret;
+ }
spin_lock_init(&gchip->lock);
@@ -172,19 +191,28 @@ static int mb86s70_gpio_probe(struct platform_device *pdev)
gchip->gc.parent = &pdev->dev;
gchip->gc.base = -1;
+ if (has_acpi_companion(&pdev->dev))
+ gchip->gc.to_irq = mb86s70_gpio_to_irq;
+
ret = gpiochip_add_data(&gchip->gc, gchip);
if (ret) {
dev_err(&pdev->dev, "couldn't register gpio driver\n");
clk_disable_unprepare(gchip->clk);
+ return ret;
}
- return ret;
+ if (has_acpi_companion(&pdev->dev))
+ acpi_gpiochip_request_interrupts(&gchip->gc);
+
+ return 0;
}
static int mb86s70_gpio_remove(struct platform_device *pdev)
{
struct mb86s70_gpio_chip *gchip = platform_get_drvdata(pdev);
+ if (has_acpi_companion(&pdev->dev))
+ acpi_gpiochip_free_interrupts(&gchip->gc);
gpiochip_remove(&gchip->gc);
clk_disable_unprepare(gchip->clk);
@@ -197,10 +225,19 @@ static const struct of_device_id mb86s70_gpio_dt_ids[] = {
};
MODULE_DEVICE_TABLE(of, mb86s70_gpio_dt_ids);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id mb86s70_gpio_acpi_ids[] = {
+ { "SCX0007" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, mb86s70_gpio_acpi_ids);
+#endif
+
static struct platform_driver mb86s70_gpio_driver = {
.driver = {
.name = "mb86s70-gpio",
.of_match_table = mb86s70_gpio_dt_ids,
+ .acpi_match_table = ACPI_PTR(mb86s70_gpio_acpi_ids),
},
.probe = mb86s70_gpio_probe,
.remove = mb86s70_gpio_remove,
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index b6a4efce7c92..f1a9c0544e3f 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -315,7 +315,6 @@ static void gpio_mockup_debugfs_setup(struct device *dev,
struct gpio_mockup_chip *chip)
{
struct gpio_mockup_dbgfs_private *priv;
- struct dentry *evfile;
struct gpio_chip *gc;
const char *devname;
char *name;
@@ -325,32 +324,25 @@ static void gpio_mockup_debugfs_setup(struct device *dev,
devname = dev_name(&gc->gpiodev->dev);
chip->dbg_dir = debugfs_create_dir(devname, gpio_mockup_dbg_dir);
- if (IS_ERR_OR_NULL(chip->dbg_dir))
- goto err;
for (i = 0; i < gc->ngpio; i++) {
name = devm_kasprintf(dev, GFP_KERNEL, "%d", i);
if (!name)
- goto err;
+ return;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
- goto err;
+ return;
priv->chip = chip;
priv->offset = i;
priv->desc = &gc->gpiodev->descs[i];
- evfile = debugfs_create_file(name, 0200, chip->dbg_dir, priv,
- &gpio_mockup_debugfs_ops);
- if (IS_ERR_OR_NULL(evfile))
- goto err;
+ debugfs_create_file(name, 0200, chip->dbg_dir, priv,
+ &gpio_mockup_debugfs_ops);
}
return;
-
-err:
- dev_err(dev, "error creating debugfs files\n");
}
static int gpio_mockup_name_lines(struct device *dev,
@@ -447,8 +439,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
if (rv)
return rv;
- if (!IS_ERR_OR_NULL(gpio_mockup_dbg_dir))
- gpio_mockup_debugfs_setup(dev, chip);
+ gpio_mockup_debugfs_setup(dev, chip);
return 0;
}
@@ -501,8 +492,6 @@ static int __init gpio_mockup_init(void)
}
gpio_mockup_dbg_dir = debugfs_create_dir("gpio-mockup", NULL);
- if (IS_ERR_OR_NULL(gpio_mockup_dbg_dir))
- gpio_mockup_err("error creating debugfs directory\n");
err = platform_driver_register(&gpio_mockup_driver);
if (err) {
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 059094ac44cb..869d47f89599 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -38,6 +38,7 @@
#include <linux/err.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/io.h>
#include <linux/irq.h>
@@ -618,18 +619,14 @@ static int mvebu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
ret = -EBUSY;
} else {
desc = gpiochip_request_own_desc(&mvchip->chip,
- pwm->hwpwm, "mvebu-pwm", 0);
+ pwm->hwpwm, "mvebu-pwm",
+ GPIO_ACTIVE_HIGH,
+ GPIOD_OUT_LOW);
if (IS_ERR(desc)) {
ret = PTR_ERR(desc);
goto out;
}
- ret = gpiod_direction_output(desc, 0);
- if (ret) {
- gpiochip_free_own_desc(desc);
- goto out;
- }
-
mvpwm->gpiod = desc;
}
out:
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 9276ef616430..d0f27084a942 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -44,8 +44,9 @@ struct gpio_regs {
};
struct gpio_bank {
- struct list_head node;
void __iomem *base;
+ const struct omap_gpio_reg_offs *regs;
+
int irq;
u32 non_wakeup_gpios;
u32 enabled_non_wakeup_gpios;
@@ -72,11 +73,7 @@ struct gpio_bank {
int context_loss_count;
void (*set_dataout)(struct gpio_bank *bank, unsigned gpio, int enable);
- void (*set_dataout_multiple)(struct gpio_bank *bank,
- unsigned long *mask, unsigned long *bits);
int (*get_context_loss_count)(struct device *dev);
-
- struct omap_gpio_reg_offs *regs;
};
#define GPIO_MOD_CTRL_BIT BIT(0)
@@ -92,20 +89,25 @@ static inline struct gpio_bank *omap_irq_data_get_bank(struct irq_data *d)
return gpiochip_get_data(chip);
}
-static void omap_set_gpio_direction(struct gpio_bank *bank, int gpio,
- int is_input)
+static inline u32 omap_gpio_rmw(void __iomem *reg, u32 mask, bool set)
{
- void __iomem *reg = bank->base;
- u32 l;
+ u32 val = readl_relaxed(reg);
- reg += bank->regs->direction;
- l = readl_relaxed(reg);
- if (is_input)
- l |= BIT(gpio);
+ if (set)
+ val |= mask;
else
- l &= ~(BIT(gpio));
- writel_relaxed(l, reg);
- bank->context.oe = l;
+ val &= ~mask;
+
+ writel_relaxed(val, reg);
+
+ return val;
+}
+
+static void omap_set_gpio_direction(struct gpio_bank *bank, int gpio,
+ int is_input)
+{
+ bank->context.oe = omap_gpio_rmw(bank->base + bank->regs->direction,
+ BIT(gpio), is_input);
}
@@ -131,88 +133,8 @@ static void omap_set_gpio_dataout_reg(struct gpio_bank *bank, unsigned offset,
static void omap_set_gpio_dataout_mask(struct gpio_bank *bank, unsigned offset,
int enable)
{
- void __iomem *reg = bank->base + bank->regs->dataout;
- u32 gpio_bit = BIT(offset);
- u32 l;
-
- l = readl_relaxed(reg);
- if (enable)
- l |= gpio_bit;
- else
- l &= ~gpio_bit;
- writel_relaxed(l, reg);
- bank->context.dataout = l;
-}
-
-static int omap_get_gpio_datain(struct gpio_bank *bank, int offset)
-{
- void __iomem *reg = bank->base + bank->regs->datain;
-
- return (readl_relaxed(reg) & (BIT(offset))) != 0;
-}
-
-static int omap_get_gpio_dataout(struct gpio_bank *bank, int offset)
-{
- void __iomem *reg = bank->base + bank->regs->dataout;
-
- return (readl_relaxed(reg) & (BIT(offset))) != 0;
-}
-
-/* set multiple data out values using dedicate set/clear register */
-static void omap_set_gpio_dataout_reg_multiple(struct gpio_bank *bank,
- unsigned long *mask,
- unsigned long *bits)
-{
- void __iomem *reg = bank->base;
- u32 l;
-
- l = *bits & *mask;
- writel_relaxed(l, reg + bank->regs->set_dataout);
- bank->context.dataout |= l;
-
- l = ~*bits & *mask;
- writel_relaxed(l, reg + bank->regs->clr_dataout);
- bank->context.dataout &= ~l;
-}
-
-/* set multiple data out values using mask register */
-static void omap_set_gpio_dataout_mask_multiple(struct gpio_bank *bank,
- unsigned long *mask,
- unsigned long *bits)
-{
- void __iomem *reg = bank->base + bank->regs->dataout;
- u32 l = (readl_relaxed(reg) & ~*mask) | (*bits & *mask);
-
- writel_relaxed(l, reg);
- bank->context.dataout = l;
-}
-
-static unsigned long omap_get_gpio_datain_multiple(struct gpio_bank *bank,
- unsigned long *mask)
-{
- void __iomem *reg = bank->base + bank->regs->datain;
-
- return readl_relaxed(reg) & *mask;
-}
-
-static unsigned long omap_get_gpio_dataout_multiple(struct gpio_bank *bank,
- unsigned long *mask)
-{
- void __iomem *reg = bank->base + bank->regs->dataout;
-
- return readl_relaxed(reg) & *mask;
-}
-
-static inline void omap_gpio_rmw(void __iomem *base, u32 reg, u32 mask, bool set)
-{
- int l = readl_relaxed(base + reg);
-
- if (set)
- l |= mask;
- else
- l &= ~mask;
-
- writel_relaxed(l, base + reg);
+ bank->context.dataout = omap_gpio_rmw(bank->base + bank->regs->dataout,
+ BIT(offset), enable);
}
static inline void omap_gpio_dbck_enable(struct gpio_bank *bank)
@@ -256,7 +178,6 @@ static inline void omap_gpio_dbck_disable(struct gpio_bank *bank)
static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
unsigned debounce)
{
- void __iomem *reg;
u32 val;
u32 l;
bool enable = !!debounce;
@@ -273,19 +194,11 @@ static int omap2_set_gpio_debounce(struct gpio_bank *bank, unsigned offset,
l = BIT(offset);
clk_enable(bank->dbck);
- reg = bank->base + bank->regs->debounce;
- writel_relaxed(debounce, reg);
+ writel_relaxed(debounce, bank->base + bank->regs->debounce);
- reg = bank->base + bank->regs->debounce_en;
- val = readl_relaxed(reg);
-
- if (enable)
- val |= l;
- else
- val &= ~l;
+ val = omap_gpio_rmw(bank->base + bank->regs->debounce_en, l, enable);
bank->dbck_enable_mask = val;
- writel_relaxed(val, reg);
clk_disable(bank->dbck);
/*
* Enable debounce clock per module.
@@ -360,9 +273,9 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
void __iomem *base = bank->base;
u32 gpio_bit = BIT(gpio);
- omap_gpio_rmw(base, bank->regs->leveldetect0, gpio_bit,
+ omap_gpio_rmw(base + bank->regs->leveldetect0, gpio_bit,
trigger & IRQ_TYPE_LEVEL_LOW);
- omap_gpio_rmw(base, bank->regs->leveldetect1, gpio_bit,
+ omap_gpio_rmw(base + bank->regs->leveldetect1, gpio_bit,
trigger & IRQ_TYPE_LEVEL_HIGH);
/*
@@ -370,9 +283,9 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
* to be woken from idle state. Set the appropriate edge detection
* in addition to the level detection.
*/
- omap_gpio_rmw(base, bank->regs->risingdetect, gpio_bit,
+ omap_gpio_rmw(base + bank->regs->risingdetect, gpio_bit,
trigger & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH));
- omap_gpio_rmw(base, bank->regs->fallingdetect, gpio_bit,
+ omap_gpio_rmw(base + bank->regs->fallingdetect, gpio_bit,
trigger & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_LEVEL_LOW));
bank->context.leveldetect0 =
@@ -384,11 +297,8 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
bank->context.fallingdetect =
readl_relaxed(bank->base + bank->regs->fallingdetect);
- if (likely(!(bank->non_wakeup_gpios & gpio_bit))) {
- omap_gpio_rmw(base, bank->regs->wkup_en, gpio_bit, trigger != 0);
- bank->context.wake_en =
- readl_relaxed(bank->base + bank->regs->wkup_en);
- }
+ bank->level_mask = bank->context.leveldetect0 |
+ bank->context.leveldetect1;
/* This part needs to be executed always for OMAP{34xx, 44xx} */
if (!bank->regs->irqctrl && !omap_gpio_is_off_wakeup_capable(bank, gpio)) {
@@ -403,44 +313,25 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
else
bank->enabled_non_wakeup_gpios &= ~gpio_bit;
}
-
- bank->level_mask =
- readl_relaxed(bank->base + bank->regs->leveldetect0) |
- readl_relaxed(bank->base + bank->regs->leveldetect1);
}
-#ifdef CONFIG_ARCH_OMAP1
/*
* This only applies to chips that can't do both rising and falling edge
* detection at once. For all other chips, this function is a noop.
*/
static void omap_toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio)
{
- void __iomem *reg = bank->base;
- u32 l = 0;
-
- if (!bank->regs->irqctrl)
- return;
+ if (IS_ENABLED(CONFIG_ARCH_OMAP1) && bank->regs->irqctrl) {
+ void __iomem *reg = bank->base + bank->regs->irqctrl;
- reg += bank->regs->irqctrl;
-
- l = readl_relaxed(reg);
- if ((l >> gpio) & 1)
- l &= ~(BIT(gpio));
- else
- l |= BIT(gpio);
-
- writel_relaxed(l, reg);
+ writel_relaxed(readl_relaxed(reg) ^ BIT(gpio), reg);
+ }
}
-#else
-static void omap_toggle_gpio_edge_triggering(struct gpio_bank *bank, int gpio) {}
-#endif
static int omap_set_gpio_triggering(struct gpio_bank *bank, int gpio,
unsigned trigger)
{
void __iomem *reg = bank->base;
- void __iomem *base = bank->base;
u32 l = 0;
if (bank->regs->leveldetect0 && bank->regs->wkup_en) {
@@ -472,11 +363,6 @@ static int omap_set_gpio_triggering(struct gpio_bank *bank, int gpio,
l |= 2 << (gpio << 1);
if (trigger & IRQ_TYPE_EDGE_FALLING)
l |= BIT(gpio << 1);
-
- /* Enable wake-up during idle for dynamic tick */
- omap_gpio_rmw(base, bank->regs->wkup_en, BIT(gpio), trigger);
- bank->context.wake_en =
- readl_relaxed(bank->base + bank->regs->wkup_en);
writel_relaxed(l, reg);
}
return 0;
@@ -505,17 +391,6 @@ static void omap_enable_gpio_module(struct gpio_bank *bank, unsigned offset)
static void omap_disable_gpio_module(struct gpio_bank *bank, unsigned offset)
{
- void __iomem *base = bank->base;
-
- if (bank->regs->wkup_en &&
- !LINE_USED(bank->mod_usage, offset) &&
- !LINE_USED(bank->irq_usage, offset)) {
- /* Disable wake-up during idle for dynamic tick */
- omap_gpio_rmw(base, bank->regs->wkup_en, BIT(offset), 0);
- bank->context.wake_en =
- readl_relaxed(bank->base + bank->regs->wkup_en);
- }
-
if (bank->regs->ctrl && !BANK_USED(bank)) {
void __iomem *reg = bank->base + bank->regs->ctrl;
u32 ctrl;
@@ -626,57 +501,39 @@ static u32 omap_get_gpio_irqbank_mask(struct gpio_bank *bank)
return l;
}
-static void omap_enable_gpio_irqbank(struct gpio_bank *bank, int gpio_mask)
+static inline void omap_set_gpio_irqenable(struct gpio_bank *bank,
+ unsigned offset, int enable)
{
void __iomem *reg = bank->base;
- u32 l;
+ u32 gpio_mask = BIT(offset);
- if (bank->regs->set_irqenable) {
- reg += bank->regs->set_irqenable;
- l = gpio_mask;
- bank->context.irqenable1 |= gpio_mask;
+ if (bank->regs->set_irqenable && bank->regs->clr_irqenable) {
+ if (enable) {
+ reg += bank->regs->set_irqenable;
+ bank->context.irqenable1 |= gpio_mask;
+ } else {
+ reg += bank->regs->clr_irqenable;
+ bank->context.irqenable1 &= ~gpio_mask;
+ }
+ writel_relaxed(gpio_mask, reg);
} else {
- reg += bank->regs->irqenable;
- l = readl_relaxed(reg);
- if (bank->regs->irqenable_inv)
- l &= ~gpio_mask;
- else
- l |= gpio_mask;
- bank->context.irqenable1 = l;
+ bank->context.irqenable1 =
+ omap_gpio_rmw(reg + bank->regs->irqenable, gpio_mask,
+ enable ^ bank->regs->irqenable_inv);
}
- writel_relaxed(l, reg);
-}
-
-static void omap_disable_gpio_irqbank(struct gpio_bank *bank, int gpio_mask)
-{
- void __iomem *reg = bank->base;
- u32 l;
-
- if (bank->regs->clr_irqenable) {
- reg += bank->regs->clr_irqenable;
- l = gpio_mask;
- bank->context.irqenable1 &= ~gpio_mask;
- } else {
- reg += bank->regs->irqenable;
- l = readl_relaxed(reg);
- if (bank->regs->irqenable_inv)
- l |= gpio_mask;
- else
- l &= ~gpio_mask;
- bank->context.irqenable1 = l;
+ /*
+ * Program GPIO wakeup along with IRQ enable to satisfy OMAP4430 TRM
+ * note requiring correlation between the IRQ enable registers and
+ * the wakeup registers. In any case, we want wakeup from idle
+ * enabled for the GPIOs which support this feature.
+ */
+ if (bank->regs->wkup_en &&
+ (bank->regs->edgectrl1 || !(bank->non_wakeup_gpios & gpio_mask))) {
+ bank->context.wake_en =
+ omap_gpio_rmw(bank->base + bank->regs->wkup_en,
+ gpio_mask, enable);
}
-
- writel_relaxed(l, reg);
-}
-
-static inline void omap_set_gpio_irqenable(struct gpio_bank *bank,
- unsigned offset, int enable)
-{
- if (enable)
- omap_enable_gpio_irqbank(bank, BIT(offset));
- else
- omap_disable_gpio_irqbank(bank, BIT(offset));
}
/* Use disable_irq_wake() and enable_irq_wake() functions from drivers */
@@ -687,38 +544,6 @@ static int omap_gpio_wake_enable(struct irq_data *d, unsigned int enable)
return irq_set_irq_wake(bank->irq, enable);
}
-static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
-{
- struct gpio_bank *bank = gpiochip_get_data(chip);
- unsigned long flags;
-
- pm_runtime_get_sync(chip->parent);
-
- raw_spin_lock_irqsave(&bank->lock, flags);
- omap_enable_gpio_module(bank, offset);
- bank->mod_usage |= BIT(offset);
- raw_spin_unlock_irqrestore(&bank->lock, flags);
-
- return 0;
-}
-
-static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
-{
- struct gpio_bank *bank = gpiochip_get_data(chip);
- unsigned long flags;
-
- raw_spin_lock_irqsave(&bank->lock, flags);
- bank->mod_usage &= ~(BIT(offset));
- if (!LINE_USED(bank->irq_usage, offset)) {
- omap_set_gpio_direction(bank, offset, 1);
- omap_clear_gpio_debounce(bank, offset);
- }
- omap_disable_gpio_module(bank, offset);
- raw_spin_unlock_irqrestore(&bank->lock, flags);
-
- pm_runtime_put(chip->parent);
-}
-
/*
* We need to unmask the GPIO bank interrupt as soon as possible to
* avoid missing GPIO interrupts for other lines in the bank.
@@ -731,7 +556,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
{
void __iomem *isr_reg = NULL;
- u32 enabled, isr, level_mask;
+ u32 enabled, isr, edge;
unsigned int bit;
struct gpio_bank *bank = gpiobank;
unsigned long wa_lock_flags;
@@ -751,16 +576,14 @@ static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
enabled = omap_get_gpio_irqbank_mask(bank);
isr = readl_relaxed(isr_reg) & enabled;
- if (bank->level_mask)
- level_mask = bank->level_mask & enabled;
- else
- level_mask = 0;
-
- /* clear edge sensitive interrupts before handler(s) are
- called so that we don't miss any interrupt occurred while
- executing them */
- if (isr & ~level_mask)
- omap_clear_gpio_irqbank(bank, isr & ~level_mask);
+ /*
+ * Clear edge sensitive interrupts before calling handler(s)
+ * so subsequent edge transitions are not missed while the
+ * handlers are running.
+ */
+ edge = isr & ~bank->level_mask;
+ if (edge)
+ omap_clear_gpio_irqbank(bank, edge);
raw_spin_unlock_irqrestore(&bank->lock, lock_flags);
@@ -807,8 +630,6 @@ static unsigned int omap_gpio_irq_startup(struct irq_data *d)
if (!LINE_USED(bank->mod_usage, offset))
omap_set_gpio_direction(bank, offset, 1);
- else if (!omap_gpio_is_input(bank, offset))
- goto err;
omap_enable_gpio_module(bank, offset);
bank->irq_usage |= BIT(offset);
@@ -816,9 +637,6 @@ static unsigned int omap_gpio_irq_startup(struct irq_data *d)
omap_gpio_unmask_irq(d);
return 0;
-err:
- raw_spin_unlock_irqrestore(&bank->lock, flags);
- return -EINVAL;
}
static void omap_gpio_irq_shutdown(struct irq_data *d)
@@ -829,9 +647,9 @@ static void omap_gpio_irq_shutdown(struct irq_data *d)
raw_spin_lock_irqsave(&bank->lock, flags);
bank->irq_usage &= ~(BIT(offset));
- omap_set_gpio_irqenable(bank, offset, 0);
- omap_clear_gpio_irqstatus(bank, offset);
omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
+ omap_clear_gpio_irqstatus(bank, offset);
+ omap_set_gpio_irqenable(bank, offset, 0);
if (!LINE_USED(bank->mod_usage, offset))
omap_clear_gpio_debounce(bank, offset);
omap_disable_gpio_module(bank, offset);
@@ -852,14 +670,6 @@ static void gpio_irq_bus_sync_unlock(struct irq_data *data)
pm_runtime_put(bank->chip.parent);
}
-static void omap_gpio_ack_irq(struct irq_data *d)
-{
- struct gpio_bank *bank = omap_irq_data_get_bank(d);
- unsigned offset = d->hwirq;
-
- omap_clear_gpio_irqstatus(bank, offset);
-}
-
static void omap_gpio_mask_irq(struct irq_data *d)
{
struct gpio_bank *bank = omap_irq_data_get_bank(d);
@@ -867,8 +677,8 @@ static void omap_gpio_mask_irq(struct irq_data *d)
unsigned long flags;
raw_spin_lock_irqsave(&bank->lock, flags);
- omap_set_gpio_irqenable(bank, offset, 0);
omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
+ omap_set_gpio_irqenable(bank, offset, 0);
raw_spin_unlock_irqrestore(&bank->lock, flags);
}
@@ -880,9 +690,6 @@ static void omap_gpio_unmask_irq(struct irq_data *d)
unsigned long flags;
raw_spin_lock_irqsave(&bank->lock, flags);
- if (trigger)
- omap_set_gpio_triggering(bank, offset, trigger);
-
omap_set_gpio_irqenable(bank, offset, 1);
/*
@@ -890,9 +697,13 @@ static void omap_gpio_unmask_irq(struct irq_data *d)
* is cleared, thus after the handler has run. OMAP4 needs this done
* after enabing the interrupt to clear the wakeup status.
*/
- if (bank->level_mask & BIT(offset))
+ if (bank->regs->leveldetect0 && bank->regs->wkup_en &&
+ trigger & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW))
omap_clear_gpio_irqstatus(bank, offset);
+ if (trigger)
+ omap_set_gpio_triggering(bank, offset, trigger);
+
raw_spin_unlock_irqrestore(&bank->lock, flags);
}
@@ -958,19 +769,44 @@ static inline void omap_mpuio_init(struct gpio_bank *bank)
/*---------------------------------------------------------------------*/
-static int omap_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+static int omap_gpio_request(struct gpio_chip *chip, unsigned offset)
{
- struct gpio_bank *bank;
+ struct gpio_bank *bank = gpiochip_get_data(chip);
+ unsigned long flags;
+
+ pm_runtime_get_sync(chip->parent);
+
+ raw_spin_lock_irqsave(&bank->lock, flags);
+ omap_enable_gpio_module(bank, offset);
+ bank->mod_usage |= BIT(offset);
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
+
+ return 0;
+}
+
+static void omap_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+ struct gpio_bank *bank = gpiochip_get_data(chip);
unsigned long flags;
- void __iomem *reg;
- int dir;
- bank = gpiochip_get_data(chip);
- reg = bank->base + bank->regs->direction;
raw_spin_lock_irqsave(&bank->lock, flags);
- dir = !!(readl_relaxed(reg) & BIT(offset));
+ bank->mod_usage &= ~(BIT(offset));
+ if (!LINE_USED(bank->irq_usage, offset)) {
+ omap_set_gpio_direction(bank, offset, 1);
+ omap_clear_gpio_debounce(bank, offset);
+ }
+ omap_disable_gpio_module(bank, offset);
raw_spin_unlock_irqrestore(&bank->lock, flags);
- return dir;
+
+ pm_runtime_put(chip->parent);
+}
+
+static int omap_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+{
+ struct gpio_bank *bank = gpiochip_get_data(chip);
+
+ return !!(readl_relaxed(bank->base + bank->regs->direction) &
+ BIT(offset));
}
static int omap_gpio_input(struct gpio_chip *chip, unsigned offset)
@@ -987,14 +823,15 @@ static int omap_gpio_input(struct gpio_chip *chip, unsigned offset)
static int omap_gpio_get(struct gpio_chip *chip, unsigned offset)
{
- struct gpio_bank *bank;
-
- bank = gpiochip_get_data(chip);
+ struct gpio_bank *bank = gpiochip_get_data(chip);
+ void __iomem *reg;
if (omap_gpio_is_input(bank, offset))
- return omap_get_gpio_datain(bank, offset);
+ reg = bank->base + bank->regs->datain;
else
- return omap_get_gpio_dataout(bank, offset);
+ reg = bank->base + bank->regs->dataout;
+
+ return (readl_relaxed(reg) & BIT(offset)) != 0;
}
static int omap_gpio_output(struct gpio_chip *chip, unsigned offset, int value)
@@ -1014,18 +851,20 @@ static int omap_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct gpio_bank *bank = gpiochip_get_data(chip);
- void __iomem *reg = bank->base + bank->regs->direction;
- unsigned long in = readl_relaxed(reg), l;
+ void __iomem *base = bank->base;
+ u32 direction, m, val = 0;
- *bits = 0;
+ direction = readl_relaxed(base + bank->regs->direction);
- l = in & *mask;
- if (l)
- *bits |= omap_get_gpio_datain_multiple(bank, &l);
+ m = direction & *mask;
+ if (m)
+ val |= readl_relaxed(base + bank->regs->datain) & m;
- l = ~in & *mask;
- if (l)
- *bits |= omap_get_gpio_dataout_multiple(bank, &l);
+ m = ~direction & *mask;
+ if (m)
+ val |= readl_relaxed(base + bank->regs->dataout) & m;
+
+ *bits = val;
return 0;
}
@@ -1078,10 +917,14 @@ static void omap_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask,
unsigned long *bits)
{
struct gpio_bank *bank = gpiochip_get_data(chip);
+ void __iomem *reg = bank->base + bank->regs->dataout;
unsigned long flags;
+ u32 l;
raw_spin_lock_irqsave(&bank->lock, flags);
- bank->set_dataout_multiple(bank, mask, bits);
+ l = (readl_relaxed(reg) & ~*mask) | (*bits & *mask);
+ writel_relaxed(l, reg);
+ bank->context.dataout = l;
raw_spin_unlock_irqrestore(&bank->lock, flags);
}
@@ -1115,9 +958,9 @@ static void omap_gpio_mod_init(struct gpio_bank *bank)
return;
}
- omap_gpio_rmw(base, bank->regs->irqenable, l,
+ omap_gpio_rmw(base + bank->regs->irqenable, l,
bank->regs->irqenable_inv);
- omap_gpio_rmw(base, bank->regs->irqstatus, l,
+ omap_gpio_rmw(base + bank->regs->irqstatus, l,
!bank->regs->irqenable_inv);
if (bank->regs->debounce_en)
writel_relaxed(0, base + bank->regs->debounce_en);
@@ -1180,11 +1023,8 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
#endif
/* MPUIO is a bit different, reading IRQ status clears it */
- if (bank->is_mpuio) {
- irqc->irq_ack = dummy_irq_chip.irq_ack;
- if (!bank->regs->wkup_en)
- irqc->irq_set_wake = NULL;
- }
+ if (bank->is_mpuio && !bank->regs->wkup_en)
+ irqc->irq_set_wake = NULL;
irq = &bank->chip.irq;
irq->chip = irqc;
@@ -1215,7 +1055,7 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
static void omap_gpio_init_context(struct gpio_bank *p)
{
- struct omap_gpio_reg_offs *regs = p->regs;
+ const struct omap_gpio_reg_offs *regs = p->regs;
void __iomem *base = p->base;
p->context.ctrl = readl_relaxed(base + regs->ctrl);
@@ -1227,60 +1067,56 @@ static void omap_gpio_init_context(struct gpio_bank *p)
p->context.fallingdetect = readl_relaxed(base + regs->fallingdetect);
p->context.irqenable1 = readl_relaxed(base + regs->irqenable);
p->context.irqenable2 = readl_relaxed(base + regs->irqenable2);
-
- if (regs->set_dataout && p->regs->clr_dataout)
- p->context.dataout = readl_relaxed(base + regs->set_dataout);
- else
- p->context.dataout = readl_relaxed(base + regs->dataout);
+ p->context.dataout = readl_relaxed(base + regs->dataout);
p->context_valid = true;
}
static void omap_gpio_restore_context(struct gpio_bank *bank)
{
- writel_relaxed(bank->context.wake_en,
- bank->base + bank->regs->wkup_en);
- writel_relaxed(bank->context.ctrl, bank->base + bank->regs->ctrl);
- writel_relaxed(bank->context.leveldetect0,
- bank->base + bank->regs->leveldetect0);
- writel_relaxed(bank->context.leveldetect1,
- bank->base + bank->regs->leveldetect1);
- writel_relaxed(bank->context.risingdetect,
- bank->base + bank->regs->risingdetect);
- writel_relaxed(bank->context.fallingdetect,
- bank->base + bank->regs->fallingdetect);
- if (bank->regs->set_dataout && bank->regs->clr_dataout)
- writel_relaxed(bank->context.dataout,
- bank->base + bank->regs->set_dataout);
- else
- writel_relaxed(bank->context.dataout,
- bank->base + bank->regs->dataout);
- writel_relaxed(bank->context.oe, bank->base + bank->regs->direction);
+ const struct omap_gpio_reg_offs *regs = bank->regs;
+ void __iomem *base = bank->base;
+
+ writel_relaxed(bank->context.wake_en, base + regs->wkup_en);
+ writel_relaxed(bank->context.ctrl, base + regs->ctrl);
+ writel_relaxed(bank->context.leveldetect0, base + regs->leveldetect0);
+ writel_relaxed(bank->context.leveldetect1, base + regs->leveldetect1);
+ writel_relaxed(bank->context.risingdetect, base + regs->risingdetect);
+ writel_relaxed(bank->context.fallingdetect, base + regs->fallingdetect);
+ writel_relaxed(bank->context.dataout, base + regs->dataout);
+ writel_relaxed(bank->context.oe, base + regs->direction);
if (bank->dbck_enable_mask) {
- writel_relaxed(bank->context.debounce, bank->base +
- bank->regs->debounce);
+ writel_relaxed(bank->context.debounce, base + regs->debounce);
writel_relaxed(bank->context.debounce_en,
- bank->base + bank->regs->debounce_en);
+ base + regs->debounce_en);
}
- writel_relaxed(bank->context.irqenable1,
- bank->base + bank->regs->irqenable);
- writel_relaxed(bank->context.irqenable2,
- bank->base + bank->regs->irqenable2);
+ writel_relaxed(bank->context.irqenable1, base + regs->irqenable);
+ writel_relaxed(bank->context.irqenable2, base + regs->irqenable2);
}
static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context)
{
struct device *dev = bank->chip.parent;
void __iomem *base = bank->base;
- u32 nowake;
+ u32 mask, nowake;
bank->saved_datain = readl_relaxed(base + bank->regs->datain);
if (!bank->enabled_non_wakeup_gpios)
goto update_gpio_context_count;
+ /* Check for pending EDGE_FALLING, ignore EDGE_BOTH */
+ mask = bank->enabled_non_wakeup_gpios & bank->context.fallingdetect;
+ mask &= ~bank->context.risingdetect;
+ bank->saved_datain |= mask;
+
+ /* Check for pending EDGE_RISING, ignore EDGE_BOTH */
+ mask = bank->enabled_non_wakeup_gpios & bank->context.risingdetect;
+ mask &= ~bank->context.fallingdetect;
+ bank->saved_datain &= ~mask;
+
if (!may_lose_context)
goto update_gpio_context_count;
@@ -1291,8 +1127,8 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context)
*/
if (!bank->loses_context && bank->enabled_non_wakeup_gpios) {
nowake = bank->enabled_non_wakeup_gpios;
- omap_gpio_rmw(base, bank->regs->fallingdetect, nowake, ~nowake);
- omap_gpio_rmw(base, bank->regs->risingdetect, nowake, ~nowake);
+ omap_gpio_rmw(base + bank->regs->fallingdetect, nowake, ~nowake);
+ omap_gpio_rmw(base + bank->regs->risingdetect, nowake, ~nowake);
}
update_gpio_context_count:
@@ -1421,7 +1257,7 @@ static int gpio_omap_cpu_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-static struct omap_gpio_reg_offs omap2_gpio_regs = {
+static const struct omap_gpio_reg_offs omap2_gpio_regs = {
.revision = OMAP24XX_GPIO_REVISION,
.direction = OMAP24XX_GPIO_OE,
.datain = OMAP24XX_GPIO_DATAIN,
@@ -1444,7 +1280,7 @@ static struct omap_gpio_reg_offs omap2_gpio_regs = {
.fallingdetect = OMAP24XX_GPIO_FALLINGDETECT,
};
-static struct omap_gpio_reg_offs omap4_gpio_regs = {
+static const struct omap_gpio_reg_offs omap4_gpio_regs = {
.revision = OMAP4_GPIO_REVISION,
.direction = OMAP4_GPIO_OE,
.datain = OMAP4_GPIO_DATAIN,
@@ -1453,6 +1289,8 @@ static struct omap_gpio_reg_offs omap4_gpio_regs = {
.clr_dataout = OMAP4_GPIO_CLEARDATAOUT,
.irqstatus = OMAP4_GPIO_IRQSTATUS0,
.irqstatus2 = OMAP4_GPIO_IRQSTATUS1,
+ .irqstatus_raw0 = OMAP4_GPIO_IRQSTATUSRAW0,
+ .irqstatus_raw1 = OMAP4_GPIO_IRQSTATUSRAW1,
.irqenable = OMAP4_GPIO_IRQSTATUSSET0,
.irqenable2 = OMAP4_GPIO_IRQSTATUSSET1,
.set_irqenable = OMAP4_GPIO_IRQSTATUSSET0,
@@ -1528,7 +1366,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
irqc->irq_startup = omap_gpio_irq_startup,
irqc->irq_shutdown = omap_gpio_irq_shutdown,
- irqc->irq_ack = omap_gpio_ack_irq,
+ irqc->irq_ack = dummy_irq_chip.irq_ack,
irqc->irq_mask = omap_gpio_mask_irq,
irqc->irq_unmask = omap_gpio_unmask_irq,
irqc->irq_set_type = omap_gpio_irq_type,
@@ -1572,14 +1410,10 @@ static int omap_gpio_probe(struct platform_device *pdev)
pdata->get_context_loss_count;
}
- if (bank->regs->set_dataout && bank->regs->clr_dataout) {
+ if (bank->regs->set_dataout && bank->regs->clr_dataout)
bank->set_dataout = omap_set_gpio_dataout_reg;
- bank->set_dataout_multiple = omap_set_gpio_dataout_reg_multiple;
- } else {
+ else
bank->set_dataout = omap_set_gpio_dataout_mask;
- bank->set_dataout_multiple =
- omap_set_gpio_dataout_mask_multiple;
- }
raw_spin_lock_init(&bank->lock);
raw_spin_lock_init(&bank->wa_lock);
@@ -1635,7 +1469,6 @@ static int omap_gpio_remove(struct platform_device *pdev)
struct gpio_bank *bank = platform_get_drvdata(pdev);
cpu_pm_unregister_notifier(&bank->nb);
- list_del(&bank->node);
gpiochip_remove(&bank->chip);
pm_runtime_disable(&pdev->dev);
if (bank->dbck_flag)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index cfe827cefad8..378b206d2dc9 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -1178,6 +1178,7 @@ static const struct of_device_id pca953x_dt_ids[] = {
{ .compatible = "ti,tca6408", .data = OF_953X( 8, PCA_INT), },
{ .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), },
{ .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), },
+ { .compatible = "ti,tca9539", .data = OF_953X(16, PCA_INT), },
{ .compatible = "onnn,cat9554", .data = OF_953X( 8, PCA_INT), },
{ .compatible = "onnn,pca9654", .data = OF_953X( 8, PCA_INT), },
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 9aad32206e84..722ce5cf861e 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -283,6 +283,7 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
{
struct device *dev = &adev->dev;
struct pl061 *pl061;
+ struct gpio_irq_chip *girq;
int ret, irq;
pl061 = devm_kzalloc(dev, sizeof(*pl061), GFP_KERNEL);
@@ -310,10 +311,6 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
pl061->gc.parent = dev;
pl061->gc.owner = THIS_MODULE;
- ret = gpiochip_add_data(&pl061->gc, pl061);
- if (ret)
- return ret;
-
/*
* irq_chip support
*/
@@ -332,19 +329,24 @@ static int pl061_probe(struct amba_device *adev, const struct amba_id *id)
}
pl061->parent_irq = irq;
- ret = gpiochip_irqchip_add(&pl061->gc, &pl061->irq_chip,
- 0, handle_bad_irq,
- IRQ_TYPE_NONE);
- if (ret) {
- dev_info(&adev->dev, "could not add irqchip\n");
+ girq = &pl061->gc.irq;
+ girq->chip = &pl061->irq_chip;
+ girq->parent_handler = pl061_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->parents[0] = irq;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+
+ ret = devm_gpiochip_add_data(dev, &pl061->gc, pl061);
+ if (ret)
return ret;
- }
- gpiochip_set_chained_irqchip(&pl061->gc, &pl061->irq_chip,
- irq, pl061_irq_handler);
amba_set_drvdata(adev, pl061);
- dev_info(&adev->dev, "PL061 GPIO chip @%pa registered\n",
- &adev->res.start);
+ dev_info(dev, "PL061 GPIO chip registered\n");
return 0;
}
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index 70e95fc4779f..187984d26f47 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -489,7 +489,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
irq_chip->irq_unmask = gpio_rcar_irq_enable;
irq_chip->irq_set_type = gpio_rcar_irq_set_type;
irq_chip->irq_set_wake = gpio_rcar_irq_set_wake;
- irq_chip->flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND;
+ irq_chip->flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND;
ret = gpiochip_add_data(gpio_chip, p);
if (ret) {
diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c
index 571b2a81c6de..006a7e6a75f2 100644
--- a/drivers/gpio/gpio-siox.c
+++ b/drivers/gpio/gpio-siox.c
@@ -211,20 +211,22 @@ static int gpio_siox_get_direction(struct gpio_chip *chip, unsigned int offset)
static int gpio_siox_probe(struct siox_device *sdevice)
{
struct gpio_siox_ddata *ddata;
+ struct gpio_irq_chip *girq;
+ struct device *dev = &sdevice->dev;
int ret;
- ddata = devm_kzalloc(&sdevice->dev, sizeof(*ddata), GFP_KERNEL);
+ ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
if (!ddata)
return -ENOMEM;
- dev_set_drvdata(&sdevice->dev, ddata);
+ dev_set_drvdata(dev, ddata);
mutex_init(&ddata->lock);
spin_lock_init(&ddata->irqlock);
ddata->gchip.base = -1;
ddata->gchip.can_sleep = 1;
- ddata->gchip.parent = &sdevice->dev;
+ ddata->gchip.parent = dev;
ddata->gchip.owner = THIS_MODULE;
ddata->gchip.get = gpio_siox_get;
ddata->gchip.set = gpio_siox_set;
@@ -239,54 +241,27 @@ static int gpio_siox_probe(struct siox_device *sdevice)
ddata->ichip.irq_unmask = gpio_siox_irq_unmask;
ddata->ichip.irq_set_type = gpio_siox_irq_set_type;
- ret = gpiochip_add(&ddata->gchip);
- if (ret) {
- dev_err(&sdevice->dev,
- "Failed to register gpio chip (%d)\n", ret);
- goto err_gpiochip;
- }
+ girq = &ddata->gchip.irq;
+ girq->chip = &ddata->ichip;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_level_irq;
- ret = gpiochip_irqchip_add(&ddata->gchip, &ddata->ichip,
- 0, handle_level_irq, IRQ_TYPE_EDGE_RISING);
- if (ret) {
- dev_err(&sdevice->dev,
- "Failed to register irq chip (%d)\n", ret);
-err_gpiochip:
- gpiochip_remove(&ddata->gchip);
- }
+ ret = devm_gpiochip_add_data(dev, &ddata->gchip, NULL);
+ if (ret)
+ dev_err(dev, "Failed to register gpio chip (%d)\n", ret);
return ret;
}
-static int gpio_siox_remove(struct siox_device *sdevice)
-{
- struct gpio_siox_ddata *ddata = dev_get_drvdata(&sdevice->dev);
-
- gpiochip_remove(&ddata->gchip);
- return 0;
-}
-
static struct siox_driver gpio_siox_driver = {
.probe = gpio_siox_probe,
- .remove = gpio_siox_remove,
.set_data = gpio_siox_set_data,
.get_data = gpio_siox_get_data,
.driver = {
.name = "gpio-siox",
},
};
-
-static int __init gpio_siox_init(void)
-{
- return siox_driver_register(&gpio_siox_driver);
-}
-module_init(gpio_siox_init);
-
-static void __exit gpio_siox_exit(void)
-{
- siox_driver_unregister(&gpio_siox_driver);
-}
-module_exit(gpio_siox_exit);
+module_siox_driver(gpio_siox_driver);
MODULE_AUTHOR("Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>");
MODULE_DESCRIPTION("SIOX gpio driver");
diff --git a/drivers/gpio/gpio-stp-xway.c b/drivers/gpio/gpio-stp-xway.c
index 24c478392394..9e23a5ae8108 100644
--- a/drivers/gpio/gpio-stp-xway.c
+++ b/drivers/gpio/gpio-stp-xway.c
@@ -15,8 +15,6 @@
#include <linux/clk.h>
#include <linux/err.h>
-#include <lantiq_soc.h>
-
/*
* The Serial To Parallel (STP) is found on MIPS based Lantiq socs. It is a
* peripheral controller used to drive external shift register cascades. At most
@@ -71,8 +69,7 @@
#define xway_stp_r32(m, reg) __raw_readl(m + reg)
#define xway_stp_w32(m, val, reg) __raw_writel(val, m + reg)
#define xway_stp_w32_mask(m, clear, set, reg) \
- ltq_w32((ltq_r32(m + reg) & ~(clear)) | (set), \
- m + reg)
+ xway_stp_w32(m, (xway_stp_r32(m, reg) & ~(clear)) | (set), reg)
struct xway_stp {
struct gpio_chip gc;
@@ -156,9 +153,9 @@ static int xway_stp_request(struct gpio_chip *gc, unsigned gpio)
/**
* xway_stp_hw_init() - Configure the STP unit and enable the clock gate
- * @virt: pointer to the remapped register range
+ * @chip: Pointer to the xway_stp chip structure
*/
-static int xway_stp_hw_init(struct xway_stp *chip)
+static void xway_stp_hw_init(struct xway_stp *chip)
{
/* sane defaults */
xway_stp_w32(chip->virt, 0, XWAY_STP_AR);
@@ -201,8 +198,6 @@ static int xway_stp_hw_init(struct xway_stp *chip)
if (chip->reserved)
xway_stp_w32_mask(chip->virt, XWAY_STP_UPD_MASK,
XWAY_STP_UPD_FPI, XWAY_STP_CON1);
-
- return 0;
}
static int xway_stp_probe(struct platform_device *pdev)
@@ -258,21 +253,27 @@ static int xway_stp_probe(struct platform_device *pdev)
if (!of_find_property(pdev->dev.of_node, "lantiq,rising", NULL))
chip->edge = XWAY_STP_FALLING;
- clk = clk_get(&pdev->dev, NULL);
+ clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(clk)) {
dev_err(&pdev->dev, "Failed to get clock\n");
return PTR_ERR(clk);
}
- clk_enable(clk);
- ret = xway_stp_hw_init(chip);
- if (!ret)
- ret = devm_gpiochip_add_data(&pdev->dev, &chip->gc, chip);
+ ret = clk_prepare_enable(clk);
+ if (ret)
+ return ret;
- if (!ret)
- dev_info(&pdev->dev, "Init done\n");
+ xway_stp_hw_init(chip);
- return ret;
+ ret = devm_gpiochip_add_data(&pdev->dev, &chip->gc, chip);
+ if (ret) {
+ clk_disable_unprepare(clk);
+ return ret;
+ }
+
+ dev_info(&pdev->dev, "Init done\n");
+
+ return 0;
}
static const struct of_device_id xway_stp_match[] = {
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index f57bfc07ae22..0f59161a4701 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -541,8 +541,8 @@ DEFINE_SHOW_ATTRIBUTE(tegra_dbg_gpio);
static void tegra_gpio_debuginit(struct tegra_gpio_info *tgi)
{
- (void) debugfs_create_file("tegra_gpio", 0444,
- NULL, tgi, &tegra_dbg_gpio_fops);
+ debugfs_create_file("tegra_gpio", 0444, NULL, tgi,
+ &tegra_dbg_gpio_fops);
}
#else
diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c
index 30aef41e3b7e..7ba668db171b 100644
--- a/drivers/gpio/gpio-vf610.c
+++ b/drivers/gpio/gpio-vf610.c
@@ -265,7 +265,8 @@ static int vf610_gpio_probe(struct platform_device *pdev)
return port->irq;
port->clk_port = devm_clk_get(dev, "port");
- if (!IS_ERR(port->clk_port)) {
+ ret = PTR_ERR_OR_ZERO(port->clk_port);
+ if (!ret) {
ret = clk_prepare_enable(port->clk_port);
if (ret)
return ret;
@@ -273,16 +274,17 @@ static int vf610_gpio_probe(struct platform_device *pdev)
port->clk_port);
if (ret)
return ret;
- } else if (port->clk_port == ERR_PTR(-EPROBE_DEFER)) {
+ } else if (ret == -EPROBE_DEFER) {
/*
* Percolate deferrals, for anything else,
* just live without the clocking.
*/
- return PTR_ERR(port->clk_port);
+ return ret;
}
port->clk_gpio = devm_clk_get(dev, "gpio");
- if (!IS_ERR(port->clk_gpio)) {
+ ret = PTR_ERR_OR_ZERO(port->clk_gpio);
+ if (!ret) {
ret = clk_prepare_enable(port->clk_gpio);
if (ret)
return ret;
@@ -290,8 +292,8 @@ static int vf610_gpio_probe(struct platform_device *pdev)
port->clk_gpio);
if (ret)
return ret;
- } else if (port->clk_gpio == ERR_PTR(-EPROBE_DEFER)) {
- return PTR_ERR(port->clk_gpio);
+ } else if (ret == -EPROBE_DEFER) {
+ return ret;
}
gc = &port->gc;
diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c
index b13a49c89cc1..98cd715ccc33 100644
--- a/drivers/gpio/gpio-vr41xx.c
+++ b/drivers/gpio/gpio-vr41xx.c
@@ -467,10 +467,9 @@ static struct gpio_chip vr41xx_gpio_chip = {
static int giu_probe(struct platform_device *pdev)
{
- struct resource *res;
unsigned int trigger, i, pin;
struct irq_chip *chip;
- int irq, ret;
+ int irq;
switch (pdev->id) {
case GPIO_50PINS_PULLUPDOWN:
@@ -489,21 +488,14 @@ static int giu_probe(struct platform_device *pdev)
return -ENODEV;
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -EBUSY;
-
- giu_base = ioremap(res->start, resource_size(res));
- if (!giu_base)
- return -ENOMEM;
+ giu_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(giu_base))
+ return PTR_ERR(giu_base);
vr41xx_gpio_chip.parent = &pdev->dev;
- ret = gpiochip_add_data(&vr41xx_gpio_chip, NULL);
- if (!ret) {
- iounmap(giu_base);
+ if (gpiochip_add_data(&vr41xx_gpio_chip, NULL))
return -ENODEV;
- }
giu_write(GIUINTENL, 0);
giu_write(GIUINTENH, 0);
@@ -534,7 +526,6 @@ static int giu_probe(struct platform_device *pdev)
static int giu_remove(struct platform_device *pdev)
{
if (giu_base) {
- iounmap(giu_base);
giu_base = NULL;
}
diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index 32944eb886c1..a9748b5198e6 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -11,7 +11,6 @@
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
-#include <linux/of_gpio.h>
#include <linux/io.h>
#include <linux/gpio/driver.h>
#include <linux/slab.h>
@@ -33,14 +32,16 @@
/**
* struct xgpio_instance - Stores information about GPIO device
- * @mmchip: OF GPIO chip for memory mapped banks
+ * @gc: GPIO chip
+ * @regs: register block
* @gpio_width: GPIO width for every channel
* @gpio_state: GPIO state shadow register
* @gpio_dir: GPIO direction shadow register
* @gpio_lock: Lock used for synchronization
*/
struct xgpio_instance {
- struct of_mm_gpio_chip mmchip;
+ struct gpio_chip gc;
+ void __iomem *regs;
unsigned int gpio_width[2];
u32 gpio_state[2];
u32 gpio_dir[2];
@@ -84,11 +85,10 @@ static inline int xgpio_offset(struct xgpio_instance *chip, int gpio)
*/
static int xgpio_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct xgpio_instance *chip = gpiochip_get_data(gc);
u32 val;
- val = xgpio_readreg(mm_gc->regs + XGPIO_DATA_OFFSET +
+ val = xgpio_readreg(chip->regs + XGPIO_DATA_OFFSET +
xgpio_regoffset(chip, gpio));
return !!(val & BIT(xgpio_offset(chip, gpio)));
@@ -106,7 +106,6 @@ static int xgpio_get(struct gpio_chip *gc, unsigned int gpio)
static void xgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
unsigned long flags;
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct xgpio_instance *chip = gpiochip_get_data(gc);
int index = xgpio_index(chip, gpio);
int offset = xgpio_offset(chip, gpio);
@@ -119,7 +118,7 @@ static void xgpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
else
chip->gpio_state[index] &= ~BIT(offset);
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
xgpio_regoffset(chip, gpio), chip->gpio_state[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
@@ -138,7 +137,6 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
unsigned long *bits)
{
unsigned long flags;
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct xgpio_instance *chip = gpiochip_get_data(gc);
int index = xgpio_index(chip, 0);
int offset, i;
@@ -150,7 +148,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
if (*mask == 0)
break;
if (index != xgpio_index(chip, i)) {
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
xgpio_regoffset(chip, i),
chip->gpio_state[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
@@ -166,7 +164,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
}
}
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
xgpio_regoffset(chip, i), chip->gpio_state[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
@@ -184,7 +182,6 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask,
static int xgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
unsigned long flags;
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct xgpio_instance *chip = gpiochip_get_data(gc);
int index = xgpio_index(chip, gpio);
int offset = xgpio_offset(chip, gpio);
@@ -193,7 +190,7 @@ static int xgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
/* Set the GPIO bit in shadow register and set direction as input */
chip->gpio_dir[index] |= BIT(offset);
- xgpio_writereg(mm_gc->regs + XGPIO_TRI_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_TRI_OFFSET +
xgpio_regoffset(chip, gpio), chip->gpio_dir[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
@@ -216,7 +213,6 @@ static int xgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
static int xgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
unsigned long flags;
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
struct xgpio_instance *chip = gpiochip_get_data(gc);
int index = xgpio_index(chip, gpio);
int offset = xgpio_offset(chip, gpio);
@@ -228,12 +224,12 @@ static int xgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
chip->gpio_state[index] |= BIT(offset);
else
chip->gpio_state[index] &= ~BIT(offset);
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET +
xgpio_regoffset(chip, gpio), chip->gpio_state[index]);
/* Clear the GPIO bit in shadow register and set direction as output */
chip->gpio_dir[index] &= ~BIT(offset);
- xgpio_writereg(mm_gc->regs + XGPIO_TRI_OFFSET +
+ xgpio_writereg(chip->regs + XGPIO_TRI_OFFSET +
xgpio_regoffset(chip, gpio), chip->gpio_dir[index]);
spin_unlock_irqrestore(&chip->gpio_lock[index], flags);
@@ -243,43 +239,23 @@ static int xgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
/**
* xgpio_save_regs - Set initial values of GPIO pins
- * @mm_gc: Pointer to memory mapped GPIO chip structure
+ * @chip: Pointer to GPIO instance
*/
-static void xgpio_save_regs(struct of_mm_gpio_chip *mm_gc)
+static void xgpio_save_regs(struct xgpio_instance *chip)
{
- struct xgpio_instance *chip =
- container_of(mm_gc, struct xgpio_instance, mmchip);
-
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET, chip->gpio_state[0]);
- xgpio_writereg(mm_gc->regs + XGPIO_TRI_OFFSET, chip->gpio_dir[0]);
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET, chip->gpio_state[0]);
+ xgpio_writereg(chip->regs + XGPIO_TRI_OFFSET, chip->gpio_dir[0]);
if (!chip->gpio_width[1])
return;
- xgpio_writereg(mm_gc->regs + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_OFFSET,
+ xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET + XGPIO_CHANNEL_OFFSET,
chip->gpio_state[1]);
- xgpio_writereg(mm_gc->regs + XGPIO_TRI_OFFSET + XGPIO_CHANNEL_OFFSET,
+ xgpio_writereg(chip->regs + XGPIO_TRI_OFFSET + XGPIO_CHANNEL_OFFSET,
chip->gpio_dir[1]);
}
/**
- * xgpio_remove - Remove method for the GPIO device.
- * @pdev: pointer to the platform device
- *
- * This function remove gpiochips and frees all the allocated resources.
- *
- * Return: 0 always
- */
-static int xgpio_remove(struct platform_device *pdev)
-{
- struct xgpio_instance *chip = platform_get_drvdata(pdev);
-
- of_mm_gpiochip_remove(&chip->mmchip);
-
- return 0;
-}
-
-/**
* xgpio_of_probe - Probe method for the GPIO device.
* @pdev: pointer to the platform device
*
@@ -340,21 +316,28 @@ static int xgpio_probe(struct platform_device *pdev)
spin_lock_init(&chip->gpio_lock[1]);
}
- chip->mmchip.gc.ngpio = chip->gpio_width[0] + chip->gpio_width[1];
- chip->mmchip.gc.parent = &pdev->dev;
- chip->mmchip.gc.direction_input = xgpio_dir_in;
- chip->mmchip.gc.direction_output = xgpio_dir_out;
- chip->mmchip.gc.get = xgpio_get;
- chip->mmchip.gc.set = xgpio_set;
- chip->mmchip.gc.set_multiple = xgpio_set_multiple;
+ chip->gc.base = -1;
+ chip->gc.ngpio = chip->gpio_width[0] + chip->gpio_width[1];
+ chip->gc.parent = &pdev->dev;
+ chip->gc.direction_input = xgpio_dir_in;
+ chip->gc.direction_output = xgpio_dir_out;
+ chip->gc.get = xgpio_get;
+ chip->gc.set = xgpio_set;
+ chip->gc.set_multiple = xgpio_set_multiple;
+
+ chip->gc.label = dev_name(&pdev->dev);
+
+ chip->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(chip->regs)) {
+ dev_err(&pdev->dev, "failed to ioremap memory resource\n");
+ return PTR_ERR(chip->regs);
+ }
- chip->mmchip.save_regs = xgpio_save_regs;
+ xgpio_save_regs(chip);
- /* Call the OF gpio helper to setup and register the GPIO device */
- status = of_mm_gpiochip_add_data(np, &chip->mmchip, chip);
+ status = devm_gpiochip_add_data(&pdev->dev, &chip->gc, chip);
if (status) {
- pr_err("%pOF: error in probe function with status %d\n",
- np, status);
+ dev_err(&pdev->dev, "failed to add GPIO chip\n");
return status;
}
@@ -370,7 +353,6 @@ MODULE_DEVICE_TABLE(of, xgpio_of_match);
static struct platform_driver xgpio_plat_driver = {
.probe = xgpio_probe,
- .remove = xgpio_remove,
.driver = {
.name = "gpio-xilinx",
.of_match_table = xgpio_of_match,
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index c9fc9e232aaf..39f2f9035c11 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -217,14 +217,13 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
if (!handler)
return AE_OK;
- desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event", 0);
+ desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event",
+ GPIO_ACTIVE_HIGH, GPIOD_IN);
if (IS_ERR(desc)) {
dev_err(chip->parent, "Failed to request GPIO\n");
return AE_ERROR;
}
- gpiod_direction_input(desc);
-
ret = gpiochip_lock_as_irq(chip, pin);
if (ret) {
dev_err(chip->parent, "Failed to lock GPIO as interrupt\n");
@@ -951,6 +950,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
const char *label = "ACPI:OpRegion";
desc = gpiochip_request_own_desc(chip, pin, label,
+ GPIO_ACTIVE_HIGH,
flags);
if (IS_ERR(desc)) {
status = AE_ERROR;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index aec7bd86ae7e..f974075ff00e 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -118,8 +118,15 @@ static void of_gpio_flags_quirks(struct device_node *np,
* Legacy handling of SPI active high chip select. If we have a
* property named "cs-gpios" we need to inspect the child node
* to determine if the flags should have inverted semantics.
+ *
+ * This does not apply to an SPI device named "spi-gpio", because
+ * these have traditionally obtained their own GPIOs by parsing
+ * the device tree directly and did not respect any "spi-cs-high"
+ * property on the SPI bus children.
*/
- if (IS_ENABLED(CONFIG_SPI_MASTER) && !strcmp(propname, "cs-gpios") &&
+ if (IS_ENABLED(CONFIG_SPI_MASTER) &&
+ !strcmp(propname, "cs-gpios") &&
+ !of_device_is_compatible(np, "spi-gpio") &&
of_property_read_bool(np, "cs-gpios")) {
struct device_node *child;
u32 cs;
@@ -158,6 +165,12 @@ static void of_gpio_flags_quirks(struct device_node *np,
}
}
}
+
+ /* Legacy handling of stmmac's active-low PHY reset line */
+ if (IS_ENABLED(CONFIG_STMMAC_ETH) &&
+ !strcmp(propname, "snps,reset-gpio") &&
+ of_property_read_bool(np, "snps,reset-active-low"))
+ *flags |= OF_GPIO_ACTIVE_LOW;
}
/**
@@ -255,6 +268,37 @@ static struct gpio_desc *of_find_spi_gpio(struct device *dev, const char *con_id
}
/*
+ * The old Freescale bindings use simply "gpios" as name for the chip select
+ * lines rather than "cs-gpios" like all other SPI hardware. Account for this
+ * with a special quirk.
+ */
+static struct gpio_desc *of_find_spi_cs_gpio(struct device *dev,
+ const char *con_id,
+ unsigned int idx,
+ unsigned long *flags)
+{
+ struct device_node *np = dev->of_node;
+
+ if (!IS_ENABLED(CONFIG_SPI_MASTER))
+ return ERR_PTR(-ENOENT);
+
+ /* Allow this specifically for Freescale devices */
+ if (!of_device_is_compatible(np, "fsl,spi") &&
+ !of_device_is_compatible(np, "aeroflexgaisler,spictrl"))
+ return ERR_PTR(-ENOENT);
+ /* Allow only if asking for "cs-gpios" */
+ if (!con_id || strcmp(con_id, "cs"))
+ return ERR_PTR(-ENOENT);
+
+ /*
+ * While all other SPI controllers use "cs-gpios" the Freescale
+ * uses just "gpios" so translate to that when "cs-gpios" is
+ * requested.
+ */
+ return of_find_gpio(dev, NULL, idx, flags);
+}
+
+/*
* Some regulator bindings happened before we managed to establish that GPIO
* properties should be named "foo-gpios" so we have this special kludge for
* them.
@@ -325,6 +369,12 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
/* Special handling for SPI GPIOs if used */
if (IS_ERR(desc))
desc = of_find_spi_gpio(dev, con_id, &of_flags);
+ if (IS_ERR(desc)) {
+ /* This quirk looks up flags and all */
+ desc = of_find_spi_cs_gpio(dev, con_id, idx, flags);
+ if (!IS_ERR(desc))
+ return desc;
+ }
/* Special handling for regulator GPIOs if used */
if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e013d417a936..3ee99d070608 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1644,39 +1644,47 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_irq_valid);
/**
* gpiochip_set_cascaded_irqchip() - connects a cascaded irqchip to a gpiochip
- * @gpiochip: the gpiochip to set the irqchip chain to
+ * @gc: the gpiochip to set the irqchip chain to
* @parent_irq: the irq number corresponding to the parent IRQ for this
* chained irqchip
* @parent_handler: the parent interrupt handler for the accumulated IRQ
* coming out of the gpiochip. If the interrupt is nested rather than
* cascaded, pass NULL in this handler argument
*/
-static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip,
+static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gc,
unsigned int parent_irq,
irq_flow_handler_t parent_handler)
{
- if (!gpiochip->irq.domain) {
- chip_err(gpiochip, "called %s before setting up irqchip\n",
+ struct gpio_irq_chip *girq = &gc->irq;
+ struct device *dev = &gc->gpiodev->dev;
+
+ if (!girq->domain) {
+ chip_err(gc, "called %s before setting up irqchip\n",
__func__);
return;
}
if (parent_handler) {
- if (gpiochip->can_sleep) {
- chip_err(gpiochip,
+ if (gc->can_sleep) {
+ chip_err(gc,
"you cannot have chained interrupts on a chip that may sleep\n");
return;
}
+ girq->parents = devm_kcalloc(dev, 1,
+ sizeof(*girq->parents),
+ GFP_KERNEL);
+ if (!girq->parents) {
+ chip_err(gc, "out of memory allocating parent IRQ\n");
+ return;
+ }
+ girq->parents[0] = parent_irq;
+ girq->num_parents = 1;
/*
* The parent irqchip is already using the chip_data for this
* irqchip, so our callbacks simply use the handler_data.
*/
irq_set_chained_handler_and_data(parent_irq, parent_handler,
- gpiochip);
-
- gpiochip->irq.parent_irq = parent_irq;
- gpiochip->irq.parents = &gpiochip->irq.parent_irq;
- gpiochip->irq.num_parents = 1;
+ gc);
}
}
@@ -2503,7 +2511,11 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
* @chip: GPIO chip
* @hwnum: hardware number of the GPIO for which to request the descriptor
* @label: label for the GPIO
- * @flags: flags for this GPIO or 0 if default
+ * @lflags: lookup flags for this GPIO or 0 if default, this can be used to
+ * specify things like line inversion semantics with the machine flags
+ * such as GPIO_OUT_LOW
+ * @dflags: descriptor request flags for this GPIO or 0 if default, this
+ * can be used to specify consumer semantics such as open drain
*
* Function allows GPIO chip drivers to request and use their own GPIO
* descriptors via gpiolib API. Difference to gpiod_request() is that this
@@ -2517,9 +2529,9 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
*/
struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
const char *label,
- enum gpiod_flags flags)
+ enum gpio_lookup_flags lflags,
+ enum gpiod_flags dflags)
{
- unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum);
int err;
@@ -2532,7 +2544,7 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
if (err < 0)
return ERR_PTR(err);
- err = gpiod_configure_flags(desc, label, lflags, flags);
+ err = gpiod_configure_flags(desc, label, lflags, dflags);
if (err) {
chip_err(chip, "setup of own GPIO %s failed\n", label);
gpiod_free_commit(desc);
@@ -3019,13 +3031,13 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
* Return the GPIO's raw value, i.e. the value of the physical line disregarding
* its ACTIVE_LOW status, or negative errno on failure.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
int gpiod_get_raw_value(const struct gpio_desc *desc)
{
VALIDATE_DESC(desc);
- /* Should be using gpio_get_value_cansleep() */
+ /* Should be using gpiod_get_raw_value_cansleep() */
WARN_ON(desc->gdev->chip->can_sleep);
return gpiod_get_raw_value_commit(desc);
}
@@ -3038,7 +3050,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value);
* Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into
* account, or negative errno on failure.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
int gpiod_get_value(const struct gpio_desc *desc)
@@ -3046,7 +3058,7 @@ int gpiod_get_value(const struct gpio_desc *desc)
int value;
VALIDATE_DESC(desc);
- /* Should be using gpio_get_value_cansleep() */
+ /* Should be using gpiod_get_value_cansleep() */
WARN_ON(desc->gdev->chip->can_sleep);
value = gpiod_get_raw_value_commit(desc);
@@ -3071,7 +3083,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_value);
* without regard for their ACTIVE_LOW status. Return 0 in case of success,
* else an error code.
*
- * This function should be called from contexts where we cannot sleep,
+ * This function can be called from contexts where we cannot sleep,
* and it will complain if the GPIO chip functions potentially sleep.
*/
int gpiod_get_raw_array_value(unsigned int array_size,
@@ -3097,7 +3109,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
* Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
* into account. Return 0 in case of success, else an error code.
*
- * This function should be called from contexts where we cannot sleep,
+ * This function can be called from contexts where we cannot sleep,
* and it will complain if the GPIO chip functions potentially sleep.
*/
int gpiod_get_array_value(unsigned int array_size,
@@ -3311,13 +3323,13 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
* Set the raw value of the GPIO, i.e. the value of its physical line without
* regard for its ACTIVE_LOW status.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
void gpiod_set_raw_value(struct gpio_desc *desc, int value)
{
VALIDATE_DESC_VOID(desc);
- /* Should be using gpiod_set_value_cansleep() */
+ /* Should be using gpiod_set_raw_value_cansleep() */
WARN_ON(desc->gdev->chip->can_sleep);
gpiod_set_raw_value_commit(desc, value);
}
@@ -3352,12 +3364,13 @@ static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
* Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW,
* OPEN_DRAIN and OPEN_SOURCE flags into account.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
void gpiod_set_value(struct gpio_desc *desc, int value)
{
VALIDATE_DESC_VOID(desc);
+ /* Should be using gpiod_set_value_cansleep() */
WARN_ON(desc->gdev->chip->can_sleep);
gpiod_set_value_nocheck(desc, value);
}
@@ -3373,7 +3386,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
* Set the raw values of the GPIOs, i.e. the values of the physical lines
* without regard for their ACTIVE_LOW status.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
int gpiod_set_raw_array_value(unsigned int array_size,
@@ -3398,7 +3411,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
* Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
* into account.
*
- * This function should be called from contexts where we cannot sleep, and will
+ * This function can be called from contexts where we cannot sleep, and will
* complain if the GPIO chip functions potentially sleep.
*/
int gpiod_set_array_value(unsigned int array_size,
@@ -4244,8 +4257,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_index);
*
* Returns:
* On successful request the GPIO pin is configured in accordance with
- * provided @dflags. If the node does not have the requested GPIO
- * property, NULL is returned.
+ * provided @dflags.
*
* In case of error an ERR_PTR() is returned.
*/
@@ -4267,9 +4279,6 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
index, &flags);
if (!desc || IS_ERR(desc)) {
- /* If it is not there, just return NULL */
- if (PTR_ERR(desc) == -ENOENT)
- return NULL;
return desc;
}
@@ -4420,15 +4429,8 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
chip = gpiod_to_chip(desc);
hwnum = gpio_chip_hwgpio(desc);
- /*
- * FIXME: not very elegant that we call gpiod_configure_flags()
- * twice here (once inside gpiochip_request_own_desc() and
- * again here), but the gpiochip_request_own_desc() is external
- * and cannot really pass the lflags so this is the lesser evil
- * at the moment. Pass zero as dflags on this first call so we
- * don't screw anything up.
- */
- local_desc = gpiochip_request_own_desc(chip, hwnum, name, 0);
+ local_desc = gpiochip_request_own_desc(chip, hwnum, name,
+ lflags, dflags);
if (IS_ERR(local_desc)) {
status = PTR_ERR(local_desc);
pr_err("requesting hog GPIO %s (chip %s, offset %d) failed, %d\n",
@@ -4436,14 +4438,6 @@ int gpiod_hog(struct gpio_desc *desc, const char *name,
return status;
}
- status = gpiod_configure_flags(desc, name, lflags, dflags);
- if (status < 0) {
- pr_err("setup of hog GPIO %s (chip %s, offset %d) failed, %d\n",
- name, chip->label, hwnum, status);
- gpiochip_free_own_desc(desc);
- return status;
- }
-
/* Mark GPIO as hogged so it can be identified and removed later */
set_bit(FLAG_IS_HOGGED, &desc->flags);
@@ -4805,8 +4799,8 @@ static const struct file_operations gpiolib_operations = {
static int __init gpiolib_debugfs_init(void)
{
/* /sys/kernel/debug/gpio */
- (void) debugfs_create_file("gpio", S_IFREG | S_IRUGO,
- NULL, NULL, &gpiolib_operations);
+ debugfs_create_file("gpio", S_IFREG | S_IRUGO, NULL, NULL,
+ &gpiolib_operations);
return 0;
}
subsys_initcall(gpiolib_debugfs_init);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 7a65dad43932..7c52c2442173 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -210,7 +210,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
struct gpio_array *array_info,
unsigned long *value_bitmap);
-extern struct spinlock gpio_lock;
+extern spinlock_t gpio_lock;
extern struct list_head gpio_devices;
struct gpio_desc {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b610e3b30d95..2f18c64d531f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1959,25 +1959,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_init_compute_vmid(adev);
-
- mutex_lock(&adev->grbm_idx_mutex);
- /*
- * making sure that the following register writes will be broadcasted
- * to all the shaders
- */
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
- WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
- (adev->gfx.config.sc_prim_fifo_size_frontend <<
- PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_prim_fifo_size_backend <<
- PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_hiz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_earlyz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
- mutex_unlock(&adev->grbm_idx_mutex);
-
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..dd6b4b0b5f30 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -837,7 +837,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
/* No access to rdtsc. Using raw monotonic time */
args->cpu_clock_counter = ktime_get_raw_ns();
- args->system_clock_counter = ktime_get_boot_ns();
+ args->system_clock_counter = ktime_get_boottime_ns();
/* Since the counter is in nano-seconds we use 1GHz frequency */
args->system_clock_freq = 1000000000;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index f1d326caf69e..a7e8340baf90 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -326,7 +326,7 @@ int hwmgr_resume(struct pp_hwmgr *hwmgr)
if (ret)
return ret;
- ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL);
+ ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL);
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
index ae64ff7153d6..1cd5a8b5cdc1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
@@ -916,8 +916,10 @@ static int init_thermal_controller(
PHM_PlatformCaps_ThermalController
);
- if (0 == powerplay_table->usFanTableOffset)
+ if (0 == powerplay_table->usFanTableOffset) {
+ hwmgr->thermal_controller.use_hw_fan_control = 1;
return 0;
+ }
fan_table = (const PPTable_Generic_SubTable_Header *)
(((unsigned long)powerplay_table) +
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index c92999aac07c..eccb26fddbd0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -694,6 +694,7 @@ struct pp_thermal_controller_info {
uint8_t ucType;
uint8_t ucI2cLine;
uint8_t ucI2cAddress;
+ uint8_t use_hw_fan_control;
struct pp_fan_info fanInfo;
struct pp_advance_fan_control_parameters advanceFanControlParameters;
};
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2d4cfe14f72e..29e641c6a5db 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -2092,6 +2092,10 @@ static int polaris10_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
return 0;
}
+ /* use hardware fan control */
+ if (hwmgr->thermal_controller.use_hw_fan_control)
+ return 0;
+
tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.
usPWMMin * duty100;
do_div(tmp64, 10000);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 72d01e873160..5418a1a87b2c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -760,7 +760,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
if (IS_ERR(gpu->cmdbuf_suballoc)) {
dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
ret = PTR_ERR(gpu->cmdbuf_suballoc);
- goto fail;
+ goto destroy_iommu;
}
/* Create buffer: */
@@ -768,7 +768,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
PAGE_SIZE);
if (ret) {
dev_err(gpu->dev, "could not create command buffer\n");
- goto destroy_iommu;
+ goto destroy_suballoc;
}
if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
@@ -800,6 +800,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
free_buffer:
etnaviv_cmdbuf_free(&gpu->buffer);
gpu->buffer.suballoc = NULL;
+destroy_suballoc:
+ etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+ gpu->cmdbuf_suballoc = NULL;
destroy_iommu:
etnaviv_iommu_destroy(gpu->mmu);
gpu->mmu = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 029fd8ec1857..f0d45ccc1aac 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1888,12 +1888,12 @@ static int ring_request_alloc(struct i915_request *request)
*/
request->reserved_space += LEGACY_REQUEST_SIZE;
- ret = switch_context(request);
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
- /* Unconditionally invalidate GPU caches and TLBs. */
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ ret = switch_context(request);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 9cc1d678674f..c436a28d50e4 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -91,14 +91,14 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
ipu_dc_disable(ipu);
ipu_prg_disable(ipu);
+ drm_crtc_vblank_off(crtc);
+
spin_lock_irq(&crtc->dev->event_lock);
- if (crtc->state->event) {
+ if (crtc->state->event && !crtc->state->active) {
drm_crtc_send_vblank_event(crtc, crtc->state->event);
crtc->state->event = NULL;
}
spin_unlock_irq(&crtc->dev->event_lock);
-
- drm_crtc_vblank_off(crtc);
}
static void imx_drm_crtc_reset(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d11e2281dde6..7e43b25785f7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -63,7 +63,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
return 0;
err_free:
- drm_gem_object_put_unlocked(&shmem->base);
+ drm_gem_handle_delete(file, args->handle);
return ret;
}
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index e62fe24b1a2e..5bb0f0a084e9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -619,11 +619,11 @@ static void virtio_gpu_cmd_get_edid_cb(struct virtio_gpu_device *vgdev,
output = vgdev->outputs + scanout;
new_edid = drm_do_get_edid(&output->conn, virtio_get_edid_block, resp);
+ drm_connector_update_edid_property(&output->conn, new_edid);
spin_lock(&vgdev->display_info_lock);
old_edid = output->edid;
output->edid = new_edid;
- drm_connector_update_edid_property(&output->conn, output->edid);
spin_unlock(&vgdev->display_info_lock);
kfree(old_edid);
diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c
index 8bbe3d0cbe5d..2310c96ccf4a 100644
--- a/drivers/hid/hid-cp2112.c
+++ b/drivers/hid/hid-cp2112.c
@@ -16,7 +16,8 @@
* https://www.silabs.com/documents/public/application-notes/an495-cp2112-interface-specification.pdf
*/
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/machine.h>
#include <linux/gpio/driver.h>
#include <linux/hid.h>
#include <linux/hidraw.h>
@@ -1195,7 +1196,9 @@ static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev,
return -EINVAL;
dev->desc[pin] = gpiochip_request_own_desc(&dev->gc, pin,
- "HID/I2C:Event", 0);
+ "HID/I2C:Event",
+ GPIO_ACTIVE_HIGH,
+ GPIOD_IN);
if (IS_ERR(dev->desc[pin])) {
dev_err(dev->gc.parent, "Failed to request GPIO\n");
return PTR_ERR(dev->desc[pin]);
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 1c1a2514d6f3..9a59957922d4 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -6,10 +6,14 @@ config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST
select PARAVIRT
+ select X86_HV_CALLBACK_VECTOR
help
Select this option to run Linux as a Hyper-V client operating
system.
+config HYPERV_TIMER
+ def_bool HYPERV
+
config HYPERV_TSCPAGE
def_bool HYPERV && X86_64
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1ea482183e8..6188fb7dda42 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -16,6 +16,7 @@
#include <linux/version.h>
#include <linux/random.h>
#include <linux/clockchips.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -23,21 +24,6 @@
struct hv_context hv_context;
/*
- * If false, we're using the old mechanism for stimer0 interrupts
- * where it sends a VMbus message when it expires. The old
- * mechanism is used when running on older versions of Hyper-V
- * that don't support Direct Mode. While Hyper-V provides
- * four stimer's per CPU, Linux uses only stimer0.
- */
-static bool direct_mode_enabled;
-static int stimer0_irq;
-static int stimer0_vector;
-
-#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
-#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
-#define HV_MIN_DELTA_TICKS 1
-
-/*
* hv_init - Main initialization routine.
*
* This routine must be called before any other routines in here are called
@@ -47,9 +33,6 @@ int hv_init(void)
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
if (!hv_context.cpu_context)
return -ENOMEM;
-
- direct_mode_enabled = ms_hyperv.misc_features &
- HV_STIMER_DIRECT_MODE_AVAILABLE;
return 0;
}
@@ -88,89 +71,6 @@ int hv_post_message(union hv_connection_id connection_id,
return status & 0xFFFF;
}
-/*
- * ISR for when stimer0 is operating in Direct Mode. Direct Mode
- * does not use VMbus or any VMbus messages, so process here and not
- * in the VMbus driver code.
- */
-
-static void hv_stimer0_isr(void)
-{
- struct hv_per_cpu_context *hv_cpu;
-
- hv_cpu = this_cpu_ptr(hv_context.cpu_context);
- hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
- add_interrupt_randomness(stimer0_vector, 0);
-}
-
-static int hv_ce_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- u64 current_tick;
-
- WARN_ON(!clockevent_state_oneshot(evt));
-
- current_tick = hyperv_cs->read(NULL);
- current_tick += delta;
- hv_init_timer(0, current_tick);
- return 0;
-}
-
-static int hv_ce_shutdown(struct clock_event_device *evt)
-{
- hv_init_timer(0, 0);
- hv_init_timer_config(0, 0);
- if (direct_mode_enabled)
- hv_disable_stimer0_percpu_irq(stimer0_irq);
-
- return 0;
-}
-
-static int hv_ce_set_oneshot(struct clock_event_device *evt)
-{
- union hv_stimer_config timer_cfg;
-
- timer_cfg.as_uint64 = 0;
- timer_cfg.enable = 1;
- timer_cfg.auto_enable = 1;
- if (direct_mode_enabled) {
- /*
- * When it expires, the timer will directly interrupt
- * on the specified hardware vector/IRQ.
- */
- timer_cfg.direct_mode = 1;
- timer_cfg.apic_vector = stimer0_vector;
- hv_enable_stimer0_percpu_irq(stimer0_irq);
- } else {
- /*
- * When it expires, the timer will generate a VMbus message,
- * to be handled by the normal VMbus interrupt handler.
- */
- timer_cfg.direct_mode = 0;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
- }
- hv_init_timer_config(0, timer_cfg.as_uint64);
- return 0;
-}
-
-static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
-{
- dev->name = "Hyper-V clockevent";
- dev->features = CLOCK_EVT_FEAT_ONESHOT;
- dev->cpumask = cpumask_of(cpu);
- dev->rating = 1000;
- /*
- * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
- * result in clockevents_config_and_register() taking additional
- * references to the hv_vmbus module making it impossible to unload.
- */
-
- dev->set_state_shutdown = hv_ce_shutdown;
- dev->set_state_oneshot = hv_ce_set_oneshot;
- dev->set_next_event = hv_ce_set_next_event;
-}
-
-
int hv_synic_alloc(void)
{
int cpu;
@@ -199,14 +99,6 @@ int hv_synic_alloc(void)
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
- hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
- GFP_KERNEL);
- if (hv_cpu->clk_evt == NULL) {
- pr_err("Unable to allocate clock event device\n");
- goto err;
- }
- hv_init_clockevent_device(hv_cpu->clk_evt, cpu);
-
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -229,11 +121,6 @@ int hv_synic_alloc(void)
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
- if (direct_mode_enabled &&
- hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
- hv_stimer0_isr))
- goto err;
-
return 0;
err:
/*
@@ -252,7 +139,6 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- kfree(hv_cpu->clk_evt);
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
@@ -311,36 +197,9 @@ int hv_synic_init(unsigned int cpu)
hv_set_synic_state(sctrl.as_uint64);
- /*
- * Register the per-cpu clockevent source.
- */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)
- clockevents_config_and_register(hv_cpu->clk_evt,
- HV_TIMER_FREQUENCY,
- HV_MIN_DELTA_TICKS,
- HV_MAX_MAX_DELTA_TICKS);
- return 0;
-}
-
-/*
- * hv_synic_clockevents_cleanup - Cleanup clockevent devices
- */
-void hv_synic_clockevents_cleanup(void)
-{
- int cpu;
+ hv_stimer_init(cpu);
- if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
- return;
-
- if (direct_mode_enabled)
- hv_remove_stimer0_irq(stimer0_irq);
-
- for_each_present_cpu(cpu) {
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
-
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- }
+ return 0;
}
/*
@@ -388,14 +247,7 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;
- /* Turn off clockevent device */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
-
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- hv_ce_shutdown(hv_cpu->clk_evt);
- }
+ hv_stimer_cleanup(cpu);
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 7d3d31f099ea..e32681ee7b9f 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -17,6 +17,7 @@
#include <linux/hyperv.h>
#include <linux/clockchips.h>
#include <linux/ptp_clock_kernel.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index b8e1ff05f110..362e70e9d145 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -138,7 +138,6 @@ struct hv_per_cpu_context {
* per-cpu list of the channels based on their CPU affinity.
*/
struct list_head chan_list;
- struct clock_event_device *clk_evt;
};
struct hv_context {
@@ -176,8 +175,6 @@ extern int hv_synic_init(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
-extern void hv_synic_clockevents_cleanup(void);
-
/* Interface */
void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 92b1874b3eb3..72d5a7cde7ea 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -30,6 +30,7 @@
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
struct vmbus_dynid {
@@ -955,17 +956,6 @@ static void vmbus_onmessage_work(struct work_struct *work)
kfree(ctx);
}
-static void hv_process_timer_expiration(struct hv_message *msg,
- struct hv_per_cpu_context *hv_cpu)
-{
- struct clock_event_device *dev = hv_cpu->clk_evt;
-
- if (dev->event_handler)
- dev->event_handler(dev);
-
- vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
-}
-
void vmbus_on_msg_dpc(unsigned long data)
{
struct hv_per_cpu_context *hv_cpu = (void *)data;
@@ -1159,9 +1149,10 @@ static void vmbus_isr(void)
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
- if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
- hv_process_timer_expiration(msg, hv_cpu);
- else
+ if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
+ hv_stimer0_isr();
+ vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
+ } else
tasklet_schedule(&hv_cpu->msg_dpc);
}
@@ -1263,14 +1254,19 @@ static int vmbus_bus_init(void)
ret = hv_synic_alloc();
if (ret)
goto err_alloc;
+
+ ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
+ if (ret < 0)
+ goto err_alloc;
+
/*
- * Initialize the per-cpu interrupt state and
- * connect to the host.
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
*/
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_alloc;
+ goto err_cpuhp;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
@@ -1318,6 +1314,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
+err_cpuhp:
+ hv_stimer_free();
err_alloc:
hv_synic_free();
hv_remove_vmbus_irq();
@@ -2064,7 +2062,7 @@ static struct acpi_driver vmbus_acpi_driver = {
static void hv_kexec_handler(void)
{
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
/* Make sure conn_state is set as hv_synic_cleanup checks for it */
@@ -2075,6 +2073,8 @@ static void hv_kexec_handler(void)
static void hv_crash_handler(struct pt_regs *regs)
{
+ int cpu;
+
vmbus_initiate_unload(true);
/*
* In crash handler we can't schedule synic cleanup for all CPUs,
@@ -2082,7 +2082,9 @@ static void hv_crash_handler(struct pt_regs *regs)
* for kdump.
*/
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_cleanup(smp_processor_id());
+ cpu = smp_processor_id();
+ hv_stimer_cleanup(cpu);
+ hv_synic_cleanup(cpu);
hyperv_cleanup();
};
@@ -2131,7 +2133,7 @@ static void __exit vmbus_exit(void)
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_disconnect();
hv_remove_vmbus_irq();
for_each_online_cpu(cpu) {
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 4d0d6c86c12f..fe6618e49dc4 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -96,10 +96,10 @@ struct platform_data {
struct device_attribute name_attr;
};
-/* Keep track of how many package pointers we allocated in init() */
-static int max_packages __read_mostly;
-/* Array of package pointers. Serialized by cpu hotplug lock */
-static struct platform_device **pkg_devices;
+/* Keep track of how many zone pointers we allocated in init() */
+static int max_zones __read_mostly;
+/* Array of zone pointers. Serialized by cpu hotplug lock */
+static struct platform_device **zone_devices;
static ssize_t show_label(struct device *dev,
struct device_attribute *devattr, char *buf)
@@ -422,10 +422,10 @@ static int chk_ucode_version(unsigned int cpu)
static struct platform_device *coretemp_get_pdev(unsigned int cpu)
{
- int pkgid = topology_logical_package_id(cpu);
+ int id = topology_logical_die_id(cpu);
- if (pkgid >= 0 && pkgid < max_packages)
- return pkg_devices[pkgid];
+ if (id >= 0 && id < max_zones)
+ return zone_devices[id];
return NULL;
}
@@ -531,7 +531,7 @@ static int coretemp_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct platform_data *pdata;
- /* Initialize the per-package data structures */
+ /* Initialize the per-zone data structures */
pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
@@ -566,13 +566,13 @@ static struct platform_driver coretemp_driver = {
static struct platform_device *coretemp_device_add(unsigned int cpu)
{
- int err, pkgid = topology_logical_package_id(cpu);
+ int err, zoneid = topology_logical_die_id(cpu);
struct platform_device *pdev;
- if (pkgid < 0)
+ if (zoneid < 0)
return ERR_PTR(-ENOMEM);
- pdev = platform_device_alloc(DRVNAME, pkgid);
+ pdev = platform_device_alloc(DRVNAME, zoneid);
if (!pdev)
return ERR_PTR(-ENOMEM);
@@ -582,7 +582,7 @@ static struct platform_device *coretemp_device_add(unsigned int cpu)
return ERR_PTR(err);
}
- pkg_devices[pkgid] = pdev;
+ zone_devices[zoneid] = pdev;
return pdev;
}
@@ -690,7 +690,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
* the rest.
*/
if (cpumask_empty(&pd->cpumask)) {
- pkg_devices[topology_logical_package_id(cpu)] = NULL;
+ zone_devices[topology_logical_die_id(cpu)] = NULL;
platform_device_unregister(pdev);
return 0;
}
@@ -728,10 +728,10 @@ static int __init coretemp_init(void)
if (!x86_match_cpu(coretemp_ids))
return -ENODEV;
- max_packages = topology_max_packages();
- pkg_devices = kcalloc(max_packages, sizeof(struct platform_device *),
+ max_zones = topology_max_packages() * topology_max_die_per_package();
+ zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
GFP_KERNEL);
- if (!pkg_devices)
+ if (!zone_devices)
return -ENOMEM;
err = platform_driver_register(&coretemp_driver);
@@ -747,7 +747,7 @@ static int __init coretemp_init(void)
outdrv:
platform_driver_unregister(&coretemp_driver);
- kfree(pkg_devices);
+ kfree(zone_devices);
return err;
}
module_init(coretemp_init)
@@ -756,7 +756,7 @@ static void __exit coretemp_exit(void)
{
cpuhp_remove_state(coretemp_hp_online);
platform_driver_unregister(&coretemp_driver);
- kfree(pkg_devices);
+ kfree(zone_devices);
}
module_exit(coretemp_exit)
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index d84095591e45..f86065e16772 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -111,8 +111,7 @@ static int i2c_acpi_do_lookup(struct acpi_device *adev,
struct list_head resource_list;
int ret;
- if (acpi_bus_get_status(adev) || !adev->status.present ||
- acpi_device_enumerated(adev))
+ if (acpi_bus_get_status(adev) || !adev->status.present)
return -EINVAL;
if (acpi_match_device_ids(adev, i2c_acpi_ignored_device_ids) == 0)
@@ -147,6 +146,9 @@ static int i2c_acpi_get_info(struct acpi_device *adev,
lookup.info = info;
lookup.index = -1;
+ if (acpi_device_enumerated(adev))
+ return -EINVAL;
+
ret = i2c_acpi_do_lookup(adev, &lookup);
if (ret)
return ret;
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 5f4bd52121fe..d6f8b038a896 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -91,6 +91,12 @@ void i3c_bus_normaluse_unlock(struct i3c_bus *bus)
up_read(&bus->lock);
}
+static struct i3c_master_controller *
+i3c_bus_to_i3c_master(struct i3c_bus *i3cbus)
+{
+ return container_of(i3cbus, struct i3c_master_controller, bus);
+}
+
static struct i3c_master_controller *dev_to_i3cmaster(struct device *dev)
{
return container_of(dev, struct i3c_master_controller, dev);
@@ -464,6 +470,7 @@ static int i3c_bus_init(struct i3c_bus *i3cbus)
static const char * const i3c_bus_mode_strings[] = {
[I3C_BUS_MODE_PURE] = "pure",
[I3C_BUS_MODE_MIXED_FAST] = "mixed-fast",
+ [I3C_BUS_MODE_MIXED_LIMITED] = "mixed-limited",
[I3C_BUS_MODE_MIXED_SLOW] = "mixed-slow",
};
@@ -565,20 +572,39 @@ static const struct device_type i3c_masterdev_type = {
.groups = i3c_masterdev_groups,
};
-int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode)
+int i3c_bus_set_mode(struct i3c_bus *i3cbus, enum i3c_bus_mode mode,
+ unsigned long max_i2c_scl_rate)
{
- i3cbus->mode = mode;
+ struct i3c_master_controller *master = i3c_bus_to_i3c_master(i3cbus);
- if (!i3cbus->scl_rate.i3c)
- i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE;
+ i3cbus->mode = mode;
- if (!i3cbus->scl_rate.i2c) {
- if (i3cbus->mode == I3C_BUS_MODE_MIXED_SLOW)
- i3cbus->scl_rate.i2c = I3C_BUS_I2C_FM_SCL_RATE;
- else
- i3cbus->scl_rate.i2c = I3C_BUS_I2C_FM_PLUS_SCL_RATE;
+ switch (i3cbus->mode) {
+ case I3C_BUS_MODE_PURE:
+ if (!i3cbus->scl_rate.i3c)
+ i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE;
+ break;
+ case I3C_BUS_MODE_MIXED_FAST:
+ case I3C_BUS_MODE_MIXED_LIMITED:
+ if (!i3cbus->scl_rate.i3c)
+ i3cbus->scl_rate.i3c = I3C_BUS_TYP_I3C_SCL_RATE;
+ if (!i3cbus->scl_rate.i2c)
+ i3cbus->scl_rate.i2c = max_i2c_scl_rate;
+ break;
+ case I3C_BUS_MODE_MIXED_SLOW:
+ if (!i3cbus->scl_rate.i2c)
+ i3cbus->scl_rate.i2c = max_i2c_scl_rate;
+ if (!i3cbus->scl_rate.i3c ||
+ i3cbus->scl_rate.i3c > i3cbus->scl_rate.i2c)
+ i3cbus->scl_rate.i3c = i3cbus->scl_rate.i2c;
+ break;
+ default:
+ return -EINVAL;
}
+ dev_dbg(&master->dev, "i2c-scl = %ld Hz i3c-scl = %ld Hz\n",
+ i3cbus->scl_rate.i2c, i3cbus->scl_rate.i3c);
+
/*
* I3C/I2C frequency may have been overridden, check that user-provided
* values are not exceeding max possible frequency.
@@ -924,9 +950,8 @@ int i3c_master_defslvs_locked(struct i3c_master_controller *master)
ndevs++;
defslvs = i3c_ccc_cmd_dest_init(&dest, I3C_BROADCAST_ADDR,
- sizeof(*defslvs) +
- ((ndevs - 1) *
- sizeof(struct i3c_ccc_dev_desc)));
+ struct_size(defslvs, slaves,
+ ndevs - 1));
if (!defslvs)
return -ENOMEM;
@@ -1963,12 +1988,19 @@ of_i3c_master_add_i2c_boardinfo(struct i3c_master_controller *master,
if (ret)
return ret;
+ /*
+ * The I3C Specification does not clearly say I2C devices with 10-bit
+ * address are supported. These devices can't be passed properly through
+ * DEFSLVS command.
+ */
+ if (boardinfo->base.flags & I2C_CLIENT_TEN) {
+ dev_err(&master->dev, "I2C device with 10 bit address not supported.");
+ return -ENOTSUPP;
+ }
+
/* LVR is encoded in reg[2]. */
boardinfo->lvr = reg[2];
- if (boardinfo->lvr & I3C_LVR_I2C_FM_MODE)
- master->bus.scl_rate.i2c = I3C_BUS_I2C_FM_SCL_RATE;
-
list_add_tail(&boardinfo->node, &master->boardinfo.i2c);
of_node_get(node);
@@ -2111,16 +2143,14 @@ static int i3c_master_i2c_adapter_xfer(struct i2c_adapter *adap,
return ret ? ret : nxfers;
}
-static u32 i3c_master_i2c_functionalities(struct i2c_adapter *adap)
+static u32 i3c_master_i2c_funcs(struct i2c_adapter *adapter)
{
- struct i3c_master_controller *master = i2c_adapter_to_i3c_master(adap);
-
- return master->ops->i2c_funcs(master);
+ return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C;
}
static const struct i2c_algorithm i3c_master_i2c_algo = {
.master_xfer = i3c_master_i2c_adapter_xfer,
- .functionality = i3c_master_i2c_functionalities,
+ .functionality = i3c_master_i2c_funcs,
};
static int i3c_master_i2c_adapter_init(struct i3c_master_controller *master)
@@ -2379,8 +2409,7 @@ EXPORT_SYMBOL_GPL(i3c_generic_ibi_recycle_slot);
static int i3c_master_check_ops(const struct i3c_master_controller_ops *ops)
{
if (!ops || !ops->bus_init || !ops->priv_xfers ||
- !ops->send_ccc_cmd || !ops->do_daa || !ops->i2c_xfers ||
- !ops->i2c_funcs)
+ !ops->send_ccc_cmd || !ops->do_daa || !ops->i2c_xfers)
return -EINVAL;
if (ops->request_ibi &&
@@ -2417,6 +2446,7 @@ int i3c_master_register(struct i3c_master_controller *master,
const struct i3c_master_controller_ops *ops,
bool secondary)
{
+ unsigned long i2c_scl_rate = I3C_BUS_I2C_FM_PLUS_SCL_RATE;
struct i3c_bus *i3cbus = i3c_master_get_bus(master);
enum i3c_bus_mode mode = I3C_BUS_MODE_PURE;
struct i2c_dev_boardinfo *i2cbi;
@@ -2458,6 +2488,9 @@ int i3c_master_register(struct i3c_master_controller *master,
mode = I3C_BUS_MODE_MIXED_FAST;
break;
case I3C_LVR_I2C_INDEX(1):
+ if (mode < I3C_BUS_MODE_MIXED_LIMITED)
+ mode = I3C_BUS_MODE_MIXED_LIMITED;
+ break;
case I3C_LVR_I2C_INDEX(2):
if (mode < I3C_BUS_MODE_MIXED_SLOW)
mode = I3C_BUS_MODE_MIXED_SLOW;
@@ -2466,9 +2499,12 @@ int i3c_master_register(struct i3c_master_controller *master,
ret = -EINVAL;
goto err_put_dev;
}
+
+ if (i2cbi->lvr & I3C_LVR_I2C_FM_MODE)
+ i2c_scl_rate = I3C_BUS_I2C_FM_SCL_RATE;
}
- ret = i3c_bus_set_mode(i3cbus, mode);
+ ret = i3c_bus_set_mode(i3cbus, mode, i2c_scl_rate);
if (ret)
goto err_put_dev;
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 1d83c97431c7..09912d75c6d5 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -599,6 +599,7 @@ static int dw_i3c_master_bus_init(struct i3c_master_controller *m)
switch (bus->mode) {
case I3C_BUS_MODE_MIXED_FAST:
+ case I3C_BUS_MODE_MIXED_LIMITED:
ret = dw_i2c_clk_cfg(master);
if (ret)
return ret;
@@ -1060,11 +1061,6 @@ static void dw_i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev)
kfree(data);
}
-static u32 dw_i3c_master_i2c_funcs(struct i3c_master_controller *m)
-{
- return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
static irqreturn_t dw_i3c_master_irq_handler(int irq, void *dev_id)
{
struct dw_i3c_master *master = dev_id;
@@ -1099,7 +1095,6 @@ static const struct i3c_master_controller_ops dw_mipi_i3c_ops = {
.attach_i2c_dev = dw_i3c_master_attach_i2c_dev,
.detach_i2c_dev = dw_i3c_master_detach_i2c_dev,
.i2c_xfers = dw_i3c_master_i2c_xfers,
- .i2c_funcs = dw_i3c_master_i2c_funcs,
};
static int dw_i3c_probe(struct platform_device *pdev)
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index 8889a4fdb454..237f24adddc6 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -864,11 +864,6 @@ static int cdns_i3c_master_i2c_xfers(struct i2c_dev_desc *dev,
return ret;
}
-static u32 cdns_i3c_master_i2c_funcs(struct i3c_master_controller *m)
-{
- return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR;
-}
-
struct cdns_i3c_i2c_dev_data {
u16 id;
s16 ibi;
@@ -1010,9 +1005,7 @@ static int cdns_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev)
master->free_rr_slots &= ~BIT(slot);
i2c_dev_set_master_data(dev, data);
- writel(prepare_rr0_dev_address(dev->boardinfo->base.addr) |
- (dev->boardinfo->base.flags & I2C_CLIENT_TEN ?
- DEV_ID_RR0_LVR_EXT_ADDR : 0),
+ writel(prepare_rr0_dev_address(dev->boardinfo->base.addr),
master->regs + DEV_ID_RR0(data->id));
writel(dev->boardinfo->lvr, master->regs + DEV_ID_RR2(data->id));
writel(readl(master->regs + DEVS_CTRL) |
@@ -1518,7 +1511,6 @@ static const struct i3c_master_controller_ops cdns_i3c_master_ops = {
.send_ccc_cmd = cdns_i3c_master_send_ccc_cmd,
.priv_xfers = cdns_i3c_master_priv_xfers,
.i2c_xfers = cdns_i3c_master_i2c_xfers,
- .i2c_funcs = cdns_i3c_master_i2c_funcs,
.enable_ibi = cdns_i3c_master_enable_ibi,
.disable_ibi = cdns_i3c_master_disable_ibi,
.request_ibi = cdns_i3c_master_request_ibi,
diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c
index c8159205c77d..4e22b3c3e488 100644
--- a/drivers/iio/humidity/dht11.c
+++ b/drivers/iio/humidity/dht11.c
@@ -149,7 +149,7 @@ static int dht11_decode(struct dht11 *dht11, int offset)
return -EIO;
}
- dht11->timestamp = ktime_get_boot_ns();
+ dht11->timestamp = ktime_get_boottime_ns();
if (hum_int < 4) { /* DHT22: 100000 = (3*256+232)*100 */
dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
((temp_int & 0x80) ? -100 : 100);
@@ -177,7 +177,7 @@ static irqreturn_t dht11_handle_irq(int irq, void *data)
/* TODO: Consider making the handler safe for IRQ sharing */
if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
- dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
+ dht11->edges[dht11->num_edges].ts = ktime_get_boottime_ns();
dht11->edges[dht11->num_edges++].value =
gpio_get_value(dht11->gpio);
@@ -196,7 +196,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
int ret, timeres, offset;
mutex_lock(&dht11->lock);
- if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
+ if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boottime_ns()) {
timeres = ktime_get_resolution_ns();
dev_dbg(dht11->dev, "current timeresolution: %dns\n", timeres);
if (timeres > DHT11_MIN_TIMERES) {
@@ -322,7 +322,7 @@ static int dht11_probe(struct platform_device *pdev)
return -EINVAL;
}
- dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
+ dht11->timestamp = ktime_get_boottime_ns() - DHT11_DATA_VALID_TIME - 1;
dht11->num_edges = -1;
platform_set_drvdata(pdev, iio);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 245b5844028d..401d7ff99853 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -228,9 +228,9 @@ s64 iio_get_time_ns(const struct iio_dev *indio_dev)
ktime_get_coarse_ts64(&tp);
return timespec64_to_ns(&tp);
case CLOCK_BOOTTIME:
- return ktime_get_boot_ns();
+ return ktime_get_boottime_ns();
case CLOCK_TAI:
- return ktime_get_tai_ns();
+ return ktime_get_clocktai_ns();
default:
BUG();
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 29f7b15c81d9..3352a107b4a3 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -457,7 +457,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
int rc;
int i;
- lockdep_assert_held_exclusive(&devices_rwsem);
+ lockdep_assert_held_write(&devices_rwsem);
ida_init(&inuse);
xa_for_each (&devices, index, device) {
char buf[IB_DEVICE_NAME_MAX];
@@ -2520,7 +2520,7 @@ static int __init ib_core_init(void)
goto err_mad;
}
- ret = register_lsm_notifier(&ibdev_lsm_nb);
+ ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
if (ret) {
pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
goto err_sa;
@@ -2539,7 +2539,7 @@ static int __init ib_core_init(void)
return 0;
err_compat:
- unregister_lsm_notifier(&ibdev_lsm_nb);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
ib_sa_cleanup();
err_mad:
@@ -2565,7 +2565,7 @@ static void __exit ib_core_cleanup(void)
nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
unregister_pernet_device(&rdma_dev_net_ops);
- unregister_lsm_notifier(&ibdev_lsm_nb);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 4fe662c3bbc1..c142b23bb401 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1038,7 +1038,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = &current->cpus_allowed;
+ *proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
@@ -1046,7 +1046,7 @@ int hfi1_get_proc_affinity(int node)
* check whether process/context affinity has already
* been set
*/
- if (cpumask_weight(proc_mask) == 1) {
+ if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -1057,7 +1057,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 28b66bd70b74..2395fd4233a7 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -869,14 +869,13 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;
/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
- if (cpumask_weight(current_mask) != 1)
+ if (current->nr_cpus_allowed != 1)
goto out;
cpu_id = smp_processor_id();
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 2a0b59a4b6eb..cca414ecfcd5 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status,
if (status) {
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
- rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
+ rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@@ -416,7 +416,7 @@ next_entry:
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
- rec->time_to_run = ktime_get_boot_ns() +
+ rec->time_to_run = ktime_get_boottime_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -709,7 +709,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
}
}
if (resched_delay_sec) {
- u64 curr_time = ktime_get_boot_ns();
+ u64 curr_time = ktime_get_boottime_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 78fa634de98a..27b6e664e59d 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1142,7 +1142,7 @@ static __poll_t qib_poll(struct file *fp, struct poll_table_struct *pt)
static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
{
struct qib_filedata *fd = fp->private_data;
- const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const unsigned int weight = current->nr_cpus_allowed;
const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
int local_cpu;
@@ -1623,9 +1623,8 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
ret = find_free_ctxt(i_minor - 1, fp, uinfo);
else {
int unit;
- const unsigned int cpu = cpumask_first(&current->cpus_allowed);
- const unsigned int weight =
- cpumask_weight(&current->cpus_allowed);
+ const unsigned int cpu = cpumask_first(current->cpus_ptr);
+ const unsigned int weight = current->nr_cpus_allowed;
if (weight == 1 && !test_bit(cpu, qib_cpulist))
if (!find_hca(cpu, &unit) && unit >= 0)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 659c5e0fb835..80e10f4e213a 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -15,10 +15,10 @@ config ARM_GIC_PM
bool
depends on PM
select ARM_GIC
- select PM_CLK
config ARM_GIC_MAX_NR
int
+ depends on ARM_GIC
default 2 if ARCH_REALVIEW
default 1
@@ -87,6 +87,14 @@ config ALPINE_MSI
select PCI_MSI
select GENERIC_IRQ_CHIP
+config AL_FIC
+ bool "Amazon's Annapurna Labs Fabric Interrupt Controller"
+ depends on OF || COMPILE_TEST
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+ help
+ Support Amazon's Annapurna Labs Fabric Interrupt Controller.
+
config ATMEL_AIC_IRQ
bool
select GENERIC_IRQ_CHIP
@@ -217,13 +225,26 @@ config RDA_INTC
select IRQ_DOMAIN
config RENESAS_INTC_IRQPIN
- bool
+ bool "Renesas INTC External IRQ Pin Support" if COMPILE_TEST
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas Interrupt Controller for external
+ interrupt pins, as found on SH/R-Mobile and R-Car Gen1 SoCs.
config RENESAS_IRQC
- bool
+ bool "Renesas R-Mobile APE6 and R-Car IRQC support" if COMPILE_TEST
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas Interrupt Controller for external
+ devices, as found on R-Mobile APE6, R-Car Gen2, and R-Car Gen3 SoCs.
+
+config RENESAS_RZA1_IRQC
+ bool "Renesas RZ/A1 IRQC support" if COMPILE_TEST
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ Enable support for the Renesas RZ/A1 Interrupt Controller, to use up
+ to 8 external interrupts with configurable sense select.
config ST_IRQCHIP
bool
@@ -299,8 +320,11 @@ config RENESAS_H8300H_INTC
select IRQ_DOMAIN
config RENESAS_H8S_INTC
- bool
+ bool "Renesas H8S Interrupt Controller Support" if COMPILE_TEST
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas H8/300 Interrupt Controller, as found
+ on Renesas H8S SoCs.
config IMX_GPCV2
bool
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 606a003a0000..8d0fcec6ab23 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IRQCHIP) += irqchip.o
+obj-$(CONFIG_AL_FIC) += irq-al-fic.o
obj-$(CONFIG_ALPINE_MSI) += irq-alpine-msi.o
obj-$(CONFIG_ATH79) += irq-ath79-cpu.o
obj-$(CONFIG_ATH79) += irq-ath79-misc.o
@@ -49,6 +50,7 @@ obj-$(CONFIG_JCORE_AIC) += irq-jcore-aic.o
obj-$(CONFIG_RDA_INTC) += irq-rda-intc.o
obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o
obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o
+obj-$(CONFIG_RENESAS_RZA1_IRQC) += irq-renesas-rza1.o
obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
new file mode 100644
index 000000000000..1a57cee3efab
--- /dev/null
+++ b/drivers/irqchip/irq-al-fic.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+/* FIC Registers */
+#define AL_FIC_CAUSE 0x00
+#define AL_FIC_MASK 0x10
+#define AL_FIC_CONTROL 0x28
+
+#define CONTROL_TRIGGER_RISING BIT(3)
+#define CONTROL_MASK_MSI_X BIT(5)
+
+#define NR_FIC_IRQS 32
+
+MODULE_AUTHOR("Talel Shenhar");
+MODULE_DESCRIPTION("Amazon's Annapurna Labs Interrupt Controller Driver");
+MODULE_LICENSE("GPL v2");
+
+enum al_fic_state {
+ AL_FIC_UNCONFIGURED = 0,
+ AL_FIC_CONFIGURED_LEVEL,
+ AL_FIC_CONFIGURED_RISING_EDGE,
+};
+
+struct al_fic {
+ void __iomem *base;
+ struct irq_domain *domain;
+ const char *name;
+ unsigned int parent_irq;
+ enum al_fic_state state;
+};
+
+static void al_fic_set_trigger(struct al_fic *fic,
+ struct irq_chip_generic *gc,
+ enum al_fic_state new_state)
+{
+ irq_flow_handler_t handler;
+ u32 control = readl_relaxed(fic->base + AL_FIC_CONTROL);
+
+ if (new_state == AL_FIC_CONFIGURED_LEVEL) {
+ handler = handle_level_irq;
+ control &= ~CONTROL_TRIGGER_RISING;
+ } else {
+ handler = handle_edge_irq;
+ control |= CONTROL_TRIGGER_RISING;
+ }
+ gc->chip_types->handler = handler;
+ fic->state = new_state;
+ writel_relaxed(control, fic->base + AL_FIC_CONTROL);
+}
+
+static int al_fic_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ struct al_fic *fic = gc->private;
+ enum al_fic_state new_state;
+ int ret = 0;
+
+ irq_gc_lock(gc);
+
+ if (((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH) &&
+ ((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)) {
+ pr_debug("fic doesn't support flow type %d\n", flow_type);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ new_state = (flow_type & IRQ_TYPE_LEVEL_HIGH) ?
+ AL_FIC_CONFIGURED_LEVEL : AL_FIC_CONFIGURED_RISING_EDGE;
+
+ /*
+ * A given FIC instance can be either all level or all edge triggered.
+ * This is generally fixed depending on what pieces of HW it's wired up
+ * to.
+ *
+ * We configure it based on the sensitivity of the first source
+ * being setup, and reject any subsequent attempt at configuring it in a
+ * different way.
+ */
+ if (fic->state == AL_FIC_UNCONFIGURED) {
+ al_fic_set_trigger(fic, gc, new_state);
+ } else if (fic->state != new_state) {
+ pr_debug("fic %s state already configured to %d\n",
+ fic->name, fic->state);
+ ret = -EINVAL;
+ goto err;
+ }
+
+err:
+ irq_gc_unlock(gc);
+
+ return ret;
+}
+
+static void al_fic_irq_handler(struct irq_desc *desc)
+{
+ struct al_fic *fic = irq_desc_get_handler_data(desc);
+ struct irq_domain *domain = fic->domain;
+ struct irq_chip *irqchip = irq_desc_get_chip(desc);
+ struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
+ unsigned long pending;
+ unsigned int irq;
+ u32 hwirq;
+
+ chained_irq_enter(irqchip, desc);
+
+ pending = readl_relaxed(fic->base + AL_FIC_CAUSE);
+ pending &= ~gc->mask_cache;
+
+ for_each_set_bit(hwirq, &pending, NR_FIC_IRQS) {
+ irq = irq_find_mapping(domain, hwirq);
+ generic_handle_irq(irq);
+ }
+
+ chained_irq_exit(irqchip, desc);
+}
+
+static int al_fic_register(struct device_node *node,
+ struct al_fic *fic)
+{
+ struct irq_chip_generic *gc;
+ int ret;
+
+ fic->domain = irq_domain_add_linear(node,
+ NR_FIC_IRQS,
+ &irq_generic_chip_ops,
+ fic);
+ if (!fic->domain) {
+ pr_err("fail to add irq domain\n");
+ return -ENOMEM;
+ }
+
+ ret = irq_alloc_domain_generic_chips(fic->domain,
+ NR_FIC_IRQS,
+ 1, fic->name,
+ handle_level_irq,
+ 0, 0, IRQ_GC_INIT_MASK_CACHE);
+ if (ret) {
+ pr_err("fail to allocate generic chip (%d)\n", ret);
+ goto err_domain_remove;
+ }
+
+ gc = irq_get_domain_generic_chip(fic->domain, 0);
+ gc->reg_base = fic->base;
+ gc->chip_types->regs.mask = AL_FIC_MASK;
+ gc->chip_types->regs.ack = AL_FIC_CAUSE;
+ gc->chip_types->chip.irq_mask = irq_gc_mask_set_bit;
+ gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
+ gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
+ gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
+ gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
+ gc->private = fic;
+
+ irq_set_chained_handler_and_data(fic->parent_irq,
+ al_fic_irq_handler,
+ fic);
+ return 0;
+
+err_domain_remove:
+ irq_domain_remove(fic->domain);
+
+ return ret;
+}
+
+/*
+ * al_fic_wire_init() - initialize and configure fic in wire mode
+ * @of_node: optional pointer to interrupt controller's device tree node.
+ * @base: mmio to fic register
+ * @name: name of the fic
+ * @parent_irq: interrupt of parent
+ *
+ * This API will configure the fic hardware to to work in wire mode.
+ * In wire mode, fic hardware is generating a wire ("wired") interrupt.
+ * Interrupt can be generated based on positive edge or level - configuration is
+ * to be determined based on connected hardware to this fic.
+ */
+static struct al_fic *al_fic_wire_init(struct device_node *node,
+ void __iomem *base,
+ const char *name,
+ unsigned int parent_irq)
+{
+ struct al_fic *fic;
+ int ret;
+ u32 control = CONTROL_MASK_MSI_X;
+
+ fic = kzalloc(sizeof(*fic), GFP_KERNEL);
+ if (!fic)
+ return ERR_PTR(-ENOMEM);
+
+ fic->base = base;
+ fic->parent_irq = parent_irq;
+ fic->name = name;
+
+ /* mask out all interrupts */
+ writel_relaxed(0xFFFFFFFF, fic->base + AL_FIC_MASK);
+
+ /* clear any pending interrupt */
+ writel_relaxed(0, fic->base + AL_FIC_CAUSE);
+
+ writel_relaxed(control, fic->base + AL_FIC_CONTROL);
+
+ ret = al_fic_register(node, fic);
+ if (ret) {
+ pr_err("fail to register irqchip\n");
+ goto err_free;
+ }
+
+ pr_debug("%s initialized successfully in Legacy mode (parent-irq=%u)\n",
+ fic->name, parent_irq);
+
+ return fic;
+
+err_free:
+ kfree(fic);
+ return ERR_PTR(ret);
+}
+
+static int __init al_fic_init_dt(struct device_node *node,
+ struct device_node *parent)
+{
+ int ret;
+ void __iomem *base;
+ unsigned int parent_irq;
+ struct al_fic *fic;
+
+ if (!parent) {
+ pr_err("%s: unsupported - device require a parent\n",
+ node->name);
+ return -EINVAL;
+ }
+
+ base = of_iomap(node, 0);
+ if (!base) {
+ pr_err("%s: fail to map memory\n", node->name);
+ return -ENOMEM;
+ }
+
+ parent_irq = irq_of_parse_and_map(node, 0);
+ if (!parent_irq) {
+ pr_err("%s: fail to map irq\n", node->name);
+ ret = -EINVAL;
+ goto err_unmap;
+ }
+
+ fic = al_fic_wire_init(node,
+ base,
+ node->name,
+ parent_irq);
+ if (IS_ERR(fic)) {
+ pr_err("%s: fail to initialize irqchip (%lu)\n",
+ node->name,
+ PTR_ERR(fic));
+ ret = PTR_ERR(fic);
+ goto err_irq_dispose;
+ }
+
+ return 0;
+
+err_irq_dispose:
+ irq_dispose_mapping(parent_irq);
+err_unmap:
+ iounmap(base);
+
+ return ret;
+}
+
+IRQCHIP_DECLARE(al_fic, "amazon,al-fic", al_fic_init_dt);
diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c
index a4c1aacba1ff..a1534edef7fa 100644
--- a/drivers/irqchip/irq-csky-mpintc.c
+++ b/drivers/irqchip/irq-csky-mpintc.c
@@ -32,8 +32,8 @@ static void __iomem *INTCL_base;
#define INTCG_CIDSTR 0x1000
#define INTCL_PICTLR 0x0
+#define INTCL_CFGR 0x14
#define INTCL_SIGR 0x60
-#define INTCL_HPPIR 0x68
#define INTCL_RDYIR 0x6c
#define INTCL_SENR 0xa0
#define INTCL_CENR 0xa4
@@ -41,21 +41,49 @@ static void __iomem *INTCL_base;
static DEFINE_PER_CPU(void __iomem *, intcl_reg);
+static unsigned long *__trigger;
+
+#define IRQ_OFFSET(irq) ((irq < COMM_IRQ_BASE) ? irq : (irq - COMM_IRQ_BASE))
+
+#define TRIG_BYTE_OFFSET(i) ((((i) * 2) / 32) * 4)
+#define TRIG_BIT_OFFSET(i) (((i) * 2) % 32)
+
+#define TRIG_VAL(trigger, irq) (trigger << TRIG_BIT_OFFSET(IRQ_OFFSET(irq)))
+#define TRIG_VAL_MSK(irq) (~(3 << TRIG_BIT_OFFSET(IRQ_OFFSET(irq))))
+
+#define TRIG_BASE(irq) \
+ (TRIG_BYTE_OFFSET(IRQ_OFFSET(irq)) + ((irq < COMM_IRQ_BASE) ? \
+ (this_cpu_read(intcl_reg) + INTCL_CFGR) : (INTCG_base + INTCG_CICFGR)))
+
+static DEFINE_SPINLOCK(setup_lock);
+static void setup_trigger(unsigned long irq, unsigned long trigger)
+{
+ unsigned int tmp;
+
+ spin_lock(&setup_lock);
+
+ /* setup trigger */
+ tmp = readl_relaxed(TRIG_BASE(irq)) & TRIG_VAL_MSK(irq);
+
+ writel_relaxed(tmp | TRIG_VAL(trigger, irq), TRIG_BASE(irq));
+
+ spin_unlock(&setup_lock);
+}
+
static void csky_mpintc_handler(struct pt_regs *regs)
{
void __iomem *reg_base = this_cpu_read(intcl_reg);
- do {
- handle_domain_irq(root_domain,
- readl_relaxed(reg_base + INTCL_RDYIR),
- regs);
- } while (readl_relaxed(reg_base + INTCL_HPPIR) & BIT(31));
+ handle_domain_irq(root_domain,
+ readl_relaxed(reg_base + INTCL_RDYIR), regs);
}
static void csky_mpintc_enable(struct irq_data *d)
{
void __iomem *reg_base = this_cpu_read(intcl_reg);
+ setup_trigger(d->hwirq, __trigger[d->hwirq]);
+
writel_relaxed(d->hwirq, reg_base + INTCL_SENR);
}
@@ -73,6 +101,28 @@ static void csky_mpintc_eoi(struct irq_data *d)
writel_relaxed(d->hwirq, reg_base + INTCL_CACR);
}
+static int csky_mpintc_set_type(struct irq_data *d, unsigned int type)
+{
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_HIGH:
+ __trigger[d->hwirq] = 0;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ __trigger[d->hwirq] = 1;
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ __trigger[d->hwirq] = 2;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ __trigger[d->hwirq] = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_SMP
static int csky_irq_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
@@ -116,6 +166,7 @@ static struct irq_chip csky_irq_chip = {
.irq_eoi = csky_mpintc_eoi,
.irq_enable = csky_mpintc_enable,
.irq_disable = csky_mpintc_disable,
+ .irq_set_type = csky_mpintc_set_type,
#ifdef CONFIG_SMP
.irq_set_affinity = csky_irq_set_affinity,
#endif
@@ -136,9 +187,26 @@ static int csky_irqdomain_map(struct irq_domain *d, unsigned int irq,
return 0;
}
+static int csky_irq_domain_xlate_cells(struct irq_domain *d,
+ struct device_node *ctrlr, const u32 *intspec,
+ unsigned int intsize, unsigned long *out_hwirq,
+ unsigned int *out_type)
+{
+ if (WARN_ON(intsize < 1))
+ return -EINVAL;
+
+ *out_hwirq = intspec[0];
+ if (intsize > 1)
+ *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+ else
+ *out_type = IRQ_TYPE_LEVEL_HIGH;
+
+ return 0;
+}
+
static const struct irq_domain_ops csky_irqdomain_ops = {
.map = csky_irqdomain_map,
- .xlate = irq_domain_xlate_onecell,
+ .xlate = csky_irq_domain_xlate_cells,
};
#ifdef CONFIG_SMP
@@ -172,6 +240,10 @@ csky_mpintc_init(struct device_node *node, struct device_node *parent)
if (ret < 0)
nr_irq = INTC_IRQS;
+ __trigger = kcalloc(nr_irq, sizeof(unsigned long), GFP_KERNEL);
+ if (__trigger == NULL)
+ return -ENXIO;
+
if (INTCG_base == NULL) {
INTCG_base = ioremap(mfcr("cr<31, 14>"),
INTCL_SIZE*nr_cpu_ids + INTCG_SIZE);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 875ac80f690b..7338f90b2f9e 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -53,6 +53,7 @@
/* List of flags for specific v2m implementation */
#define GICV2M_NEEDS_SPI_OFFSET 0x00000001
+#define GICV2M_GRAVITON_ADDRESS_ONLY 0x00000002
static LIST_HEAD(v2m_nodes);
static DEFINE_SPINLOCK(v2m_lock);
@@ -95,15 +96,26 @@ static struct msi_domain_info gicv2m_msi_domain_info = {
.chip = &gicv2m_msi_irq_chip,
};
+static phys_addr_t gicv2m_get_msi_addr(struct v2m_data *v2m, int hwirq)
+{
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
+ return v2m->res.start | ((hwirq - 32) << 3);
+ else
+ return v2m->res.start + V2M_MSI_SETSPI_NS;
+}
+
static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
- phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS;
+ phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq);
msg->address_hi = upper_32_bits(addr);
msg->address_lo = lower_32_bits(addr);
- msg->data = data->hwirq;
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
+ msg->data = 0;
+ else
+ msg->data = data->hwirq;
if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET)
msg->data -= v2m->spi_offset;
@@ -185,7 +197,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
hwirq = v2m->spi_start + offset;
err = iommu_dma_prepare_msi(info->desc,
- v2m->res.start + V2M_MSI_SETSPI_NS);
+ gicv2m_get_msi_addr(v2m, hwirq));
if (err)
return err;
@@ -304,7 +316,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
u32 spi_start, u32 nr_spis,
- struct resource *res)
+ struct resource *res, u32 flags)
{
int ret;
struct v2m_data *v2m;
@@ -317,6 +329,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
INIT_LIST_HEAD(&v2m->entry);
v2m->fwnode = fwnode;
+ v2m->flags = flags;
memcpy(&v2m->res, res, sizeof(struct resource));
@@ -331,7 +344,14 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
v2m->spi_start = spi_start;
v2m->nr_spis = nr_spis;
} else {
- u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
+ u32 typer;
+
+ /* Graviton should always have explicit spi_start/nr_spis */
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+ typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer);
v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer);
@@ -352,18 +372,21 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
*
* Broadom NS2 GICv2m implementation has an erratum where the MSI data
* is 'spi_number - 32'
+ *
+ * Reading that register fails on the Graviton implementation
*/
- switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) {
- case XGENE_GICV2M_MSI_IIDR:
- v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
- v2m->spi_offset = v2m->spi_start;
- break;
- case BCM_NS2_GICV2M_MSI_IIDR:
- v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
- v2m->spi_offset = 32;
- break;
+ if (!(v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)) {
+ switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) {
+ case XGENE_GICV2M_MSI_IIDR:
+ v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
+ v2m->spi_offset = v2m->spi_start;
+ break;
+ case BCM_NS2_GICV2M_MSI_IIDR:
+ v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
+ v2m->spi_offset = 32;
+ break;
+ }
}
-
v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long),
GFP_KERNEL);
if (!v2m->bm) {
@@ -416,7 +439,8 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n",
spi_start, nr_spis);
- ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res);
+ ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis,
+ &res, 0);
if (ret) {
of_node_put(child);
break;
@@ -448,6 +472,25 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
return data->fwnode;
}
+static bool acpi_check_amazon_graviton_quirks(void)
+{
+ static struct acpi_table_madt *madt;
+ acpi_status status;
+ bool rc = false;
+
+#define ACPI_AMZN_OEM_ID "AMAZON"
+
+ status = acpi_get_table(ACPI_SIG_MADT, 0,
+ (struct acpi_table_header **)&madt);
+
+ if (ACPI_FAILURE(status) || !madt)
+ return rc;
+ rc = !memcmp(madt->header.oem_id, ACPI_AMZN_OEM_ID, ACPI_OEM_ID_SIZE);
+ acpi_put_table((struct acpi_table_header *)madt);
+
+ return rc;
+}
+
static int __init
acpi_parse_madt_msi(union acpi_subtable_headers *header,
const unsigned long end)
@@ -457,6 +500,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
u32 spi_start = 0, nr_spis = 0;
struct acpi_madt_generic_msi_frame *m;
struct fwnode_handle *fwnode;
+ u32 flags = 0;
m = (struct acpi_madt_generic_msi_frame *)header;
if (BAD_MADT_ENTRY(m, end))
@@ -466,6 +510,13 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
res.end = m->base_address + SZ_4K - 1;
res.flags = IORESOURCE_MEM;
+ if (acpi_check_amazon_graviton_quirks()) {
+ pr_info("applying Amazon Graviton quirk\n");
+ res.end = res.start + SZ_8K - 1;
+ flags |= GICV2M_GRAVITON_ADDRESS_ONLY;
+ gicv2m_msi_domain_info.flags &= ~MSI_FLAG_MULTI_PCI_MSI;
+ }
+
if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) {
spi_start = m->spi_base;
nr_spis = m->spi_count;
@@ -480,7 +531,7 @@ acpi_parse_madt_msi(union acpi_subtable_headers *header,
return -EINVAL;
}
- ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res);
+ ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res, flags);
if (ret)
irq_domain_free_fwnode(fwnode);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6377cb864f4c..9bca4896fa6f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -461,8 +461,12 @@ static void gic_deactivate_unhandled(u32 irqnr)
static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
{
+ bool irqs_enabled = interrupts_enabled(regs);
int err;
+ if (irqs_enabled)
+ nmi_enter();
+
if (static_branch_likely(&supports_deactivate_key))
gic_write_eoir(irqnr);
/*
@@ -474,6 +478,9 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
err = handle_domain_nmi(gic_data.domain, irqnr, regs);
if (err)
gic_deactivate_unhandled(irqnr);
+
+ if (irqs_enabled)
+ nmi_exit();
}
static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
@@ -1332,6 +1339,9 @@ static int __init gic_init_bases(void __iomem *dist_base,
if (gic_dist_supports_lpis()) {
its_init(handle, &gic_data.rdists, gic_data.domain);
its_cpu_init();
+ } else {
+ if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+ gicv2m_init(handle, gic_data.domain);
}
if (gic_prio_masking_enabled()) {
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index a89c693d5b90..3dd28382d5f5 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -344,8 +344,7 @@ static int mbigen_device_probe(struct platform_device *pdev)
err = -EINVAL;
if (err) {
- dev_err(&pdev->dev, "Failed to create mbi-gen@%p irqdomain",
- mgn_chip->base);
+ dev_err(&pdev->dev, "Failed to create mbi-gen irqdomain\n");
return err;
}
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 8eb92eb98f54..dcdc23b9dce6 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -60,6 +60,7 @@ static const struct of_device_id meson_irq_gpio_matches[] = {
{ .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
{ .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params },
{ .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params },
+ { .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
{ }
};
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index 04c05a18600c..f82bc60a6793 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -508,7 +508,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
irq_chip = &p->irq_chip;
- irq_chip->name = name;
+ irq_chip->name = "intc-irqpin";
+ irq_chip->parent_device = dev;
irq_chip->irq_mask = disable_fn;
irq_chip->irq_unmask = enable_fn;
irq_chip->irq_set_type = intc_irqpin_irq_set_type;
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index a449a7c839b3..11abc09ef76c 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -7,7 +7,6 @@
#include <linux/init.h>
#include <linux/platform_device.h>
-#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/io.h>
@@ -48,7 +47,7 @@ struct irqc_priv {
void __iomem *cpu_int_base;
struct irqc_irq irq[IRQC_IRQ_MAX];
unsigned int number_of_irqs;
- struct platform_device *pdev;
+ struct device *dev;
struct irq_chip_generic *gc;
struct irq_domain *irq_domain;
atomic_t wakeup_path;
@@ -61,8 +60,7 @@ static struct irqc_priv *irq_data_to_priv(struct irq_data *data)
static void irqc_dbg(struct irqc_irq *i, char *str)
{
- dev_dbg(&i->p->pdev->dev, "%s (%d:%d)\n",
- str, i->requested_irq, i->hw_irq);
+ dev_dbg(i->p->dev, "%s (%d:%d)\n", str, i->requested_irq, i->hw_irq);
}
static unsigned char irqc_sense[IRQ_TYPE_SENSE_MASK + 1] = {
@@ -125,33 +123,22 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id)
static int irqc_probe(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
+ const char *name = dev_name(dev);
struct irqc_priv *p;
- struct resource *io;
struct resource *irq;
- const char *name = dev_name(&pdev->dev);
int ret;
int k;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p) {
- dev_err(&pdev->dev, "failed to allocate driver data\n");
- ret = -ENOMEM;
- goto err0;
- }
+ p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
- p->pdev = pdev;
+ p->dev = dev;
platform_set_drvdata(pdev, p);
- pm_runtime_enable(&pdev->dev);
- pm_runtime_get_sync(&pdev->dev);
-
- /* get hold of manadatory IOMEM */
- io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!io) {
- dev_err(&pdev->dev, "not enough IOMEM resources\n");
- ret = -EINVAL;
- goto err1;
- }
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
/* allow any number of IRQs between 1 and IRQC_IRQ_MAX */
for (k = 0; k < IRQC_IRQ_MAX; k++) {
@@ -166,42 +153,41 @@ static int irqc_probe(struct platform_device *pdev)
p->number_of_irqs = k;
if (p->number_of_irqs < 1) {
- dev_err(&pdev->dev, "not enough IRQ resources\n");
+ dev_err(dev, "not enough IRQ resources\n");
ret = -EINVAL;
- goto err1;
+ goto err_runtime_pm_disable;
}
/* ioremap IOMEM and setup read/write callbacks */
- p->iomem = ioremap_nocache(io->start, resource_size(io));
- if (!p->iomem) {
- dev_err(&pdev->dev, "failed to remap IOMEM\n");
- ret = -ENXIO;
- goto err2;
+ p->iomem = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(p->iomem)) {
+ ret = PTR_ERR(p->iomem);
+ goto err_runtime_pm_disable;
}
p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */
- p->irq_domain = irq_domain_add_linear(pdev->dev.of_node,
- p->number_of_irqs,
+ p->irq_domain = irq_domain_add_linear(dev->of_node, p->number_of_irqs,
&irq_generic_chip_ops, p);
if (!p->irq_domain) {
ret = -ENXIO;
- dev_err(&pdev->dev, "cannot initialize irq domain\n");
- goto err2;
+ dev_err(dev, "cannot initialize irq domain\n");
+ goto err_runtime_pm_disable;
}
ret = irq_alloc_domain_generic_chips(p->irq_domain, p->number_of_irqs,
- 1, name, handle_level_irq,
+ 1, "irqc", handle_level_irq,
0, 0, IRQ_GC_INIT_NESTED_LOCK);
if (ret) {
- dev_err(&pdev->dev, "cannot allocate generic chip\n");
- goto err3;
+ dev_err(dev, "cannot allocate generic chip\n");
+ goto err_remove_domain;
}
p->gc = irq_get_domain_generic_chip(p->irq_domain, 0);
p->gc->reg_base = p->cpu_int_base;
p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
+ p->gc->chip_types[0].chip.parent_device = dev;
p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type;
@@ -210,46 +196,33 @@ static int irqc_probe(struct platform_device *pdev)
/* request interrupts one by one */
for (k = 0; k < p->number_of_irqs; k++) {
- if (request_irq(p->irq[k].requested_irq, irqc_irq_handler,
- 0, name, &p->irq[k])) {
- dev_err(&pdev->dev, "failed to request IRQ\n");
+ if (devm_request_irq(dev, p->irq[k].requested_irq,
+ irqc_irq_handler, 0, name, &p->irq[k])) {
+ dev_err(dev, "failed to request IRQ\n");
ret = -ENOENT;
- goto err4;
+ goto err_remove_domain;
}
}
- dev_info(&pdev->dev, "driving %d irqs\n", p->number_of_irqs);
+ dev_info(dev, "driving %d irqs\n", p->number_of_irqs);
return 0;
-err4:
- while (--k >= 0)
- free_irq(p->irq[k].requested_irq, &p->irq[k]);
-err3:
+err_remove_domain:
irq_domain_remove(p->irq_domain);
-err2:
- iounmap(p->iomem);
-err1:
- pm_runtime_put(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
- kfree(p);
-err0:
+err_runtime_pm_disable:
+ pm_runtime_put(dev);
+ pm_runtime_disable(dev);
return ret;
}
static int irqc_remove(struct platform_device *pdev)
{
struct irqc_priv *p = platform_get_drvdata(pdev);
- int k;
-
- for (k = 0; k < p->number_of_irqs; k++)
- free_irq(p->irq[k].requested_irq, &p->irq[k]);
irq_domain_remove(p->irq_domain);
- iounmap(p->iomem);
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- kfree(p);
return 0;
}
diff --git a/drivers/irqchip/irq-renesas-rza1.c b/drivers/irqchip/irq-renesas-rza1.c
new file mode 100644
index 000000000000..b1f19b210190
--- /dev/null
+++ b/drivers/irqchip/irq-renesas-rza1.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/A1 IRQC Driver
+ *
+ * Copyright (C) 2019 Glider bvba
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#define IRQC_NUM_IRQ 8
+
+#define ICR0 0 /* Interrupt Control Register 0 */
+
+#define ICR0_NMIL BIT(15) /* NMI Input Level (0=low, 1=high) */
+#define ICR0_NMIE BIT(8) /* Edge Select (0=falling, 1=rising) */
+#define ICR0_NMIF BIT(1) /* NMI Interrupt Request */
+
+#define ICR1 2 /* Interrupt Control Register 1 */
+
+#define ICR1_IRQS(n, sense) ((sense) << ((n) * 2)) /* IRQ Sense Select */
+#define ICR1_IRQS_LEVEL_LOW 0
+#define ICR1_IRQS_EDGE_FALLING 1
+#define ICR1_IRQS_EDGE_RISING 2
+#define ICR1_IRQS_EDGE_BOTH 3
+#define ICR1_IRQS_MASK(n) ICR1_IRQS((n), 3)
+
+#define IRQRR 4 /* IRQ Interrupt Request Register */
+
+
+struct rza1_irqc_priv {
+ struct device *dev;
+ void __iomem *base;
+ struct irq_chip chip;
+ struct irq_domain *irq_domain;
+ struct of_phandle_args map[IRQC_NUM_IRQ];
+};
+
+static struct rza1_irqc_priv *irq_data_to_priv(struct irq_data *data)
+{
+ return data->domain->host_data;
+}
+
+static void rza1_irqc_eoi(struct irq_data *d)
+{
+ struct rza1_irqc_priv *priv = irq_data_to_priv(d);
+ u16 bit = BIT(irqd_to_hwirq(d));
+ u16 tmp;
+
+ tmp = readw_relaxed(priv->base + IRQRR);
+ if (tmp & bit)
+ writew_relaxed(GENMASK(IRQC_NUM_IRQ - 1, 0) & ~bit,
+ priv->base + IRQRR);
+
+ irq_chip_eoi_parent(d);
+}
+
+static int rza1_irqc_set_type(struct irq_data *d, unsigned int type)
+{
+ struct rza1_irqc_priv *priv = irq_data_to_priv(d);
+ unsigned int hw_irq = irqd_to_hwirq(d);
+ u16 sense, tmp;
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_LOW:
+ sense = ICR1_IRQS_LEVEL_LOW;
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ sense = ICR1_IRQS_EDGE_FALLING;
+ break;
+
+ case IRQ_TYPE_EDGE_RISING:
+ sense = ICR1_IRQS_EDGE_RISING;
+ break;
+
+ case IRQ_TYPE_EDGE_BOTH:
+ sense = ICR1_IRQS_EDGE_BOTH;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ tmp = readw_relaxed(priv->base + ICR1);
+ tmp &= ~ICR1_IRQS_MASK(hw_irq);
+ tmp |= ICR1_IRQS(hw_irq, sense);
+ writew_relaxed(tmp, priv->base + ICR1);
+ return 0;
+}
+
+static int rza1_irqc_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct rza1_irqc_priv *priv = domain->host_data;
+ struct irq_fwspec *fwspec = arg;
+ unsigned int hwirq = fwspec->param[0];
+ struct irq_fwspec spec;
+ unsigned int i;
+ int ret;
+
+ ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, &priv->chip,
+ priv);
+ if (ret)
+ return ret;
+
+ spec.fwnode = &priv->dev->of_node->fwnode;
+ spec.param_count = priv->map[hwirq].args_count;
+ for (i = 0; i < spec.param_count; i++)
+ spec.param[i] = priv->map[hwirq].args[i];
+
+ return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &spec);
+}
+
+static int rza1_irqc_translate(struct irq_domain *domain,
+ struct irq_fwspec *fwspec, unsigned long *hwirq,
+ unsigned int *type)
+{
+ if (fwspec->param_count != 2 || fwspec->param[0] >= IRQC_NUM_IRQ)
+ return -EINVAL;
+
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[1];
+ return 0;
+}
+
+static const struct irq_domain_ops rza1_irqc_domain_ops = {
+ .alloc = rza1_irqc_alloc,
+ .translate = rza1_irqc_translate,
+};
+
+static int rza1_irqc_parse_map(struct rza1_irqc_priv *priv,
+ struct device_node *gic_node)
+{
+ unsigned int imaplen, i, j, ret;
+ struct device *dev = priv->dev;
+ struct device_node *ipar;
+ const __be32 *imap;
+ u32 intsize;
+
+ imap = of_get_property(dev->of_node, "interrupt-map", &imaplen);
+ if (!imap)
+ return -EINVAL;
+
+ for (i = 0; i < IRQC_NUM_IRQ; i++) {
+ if (imaplen < 3)
+ return -EINVAL;
+
+ /* Check interrupt number, ignore sense */
+ if (be32_to_cpup(imap) != i)
+ return -EINVAL;
+
+ ipar = of_find_node_by_phandle(be32_to_cpup(imap + 2));
+ if (ipar != gic_node) {
+ of_node_put(ipar);
+ return -EINVAL;
+ }
+
+ imap += 3;
+ imaplen -= 3;
+
+ ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize);
+ of_node_put(ipar);
+ if (ret)
+ return ret;
+
+ if (imaplen < intsize)
+ return -EINVAL;
+
+ priv->map[i].args_count = intsize;
+ for (j = 0; j < intsize; j++)
+ priv->map[i].args[j] = be32_to_cpup(imap++);
+
+ imaplen -= intsize;
+ }
+
+ return 0;
+}
+
+static int rza1_irqc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct irq_domain *parent = NULL;
+ struct device_node *gic_node;
+ struct rza1_irqc_priv *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, priv);
+ priv->dev = dev;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ gic_node = of_irq_find_parent(np);
+ if (gic_node) {
+ parent = irq_find_host(gic_node);
+ of_node_put(gic_node);
+ }
+
+ if (!parent) {
+ dev_err(dev, "cannot find parent domain\n");
+ return -ENODEV;
+ }
+
+ ret = rza1_irqc_parse_map(priv, gic_node);
+ if (ret) {
+ dev_err(dev, "cannot parse %s: %d\n", "interrupt-map", ret);
+ return ret;
+ }
+
+ priv->chip.name = "rza1-irqc",
+ priv->chip.irq_mask = irq_chip_mask_parent,
+ priv->chip.irq_unmask = irq_chip_unmask_parent,
+ priv->chip.irq_eoi = rza1_irqc_eoi,
+ priv->chip.irq_retrigger = irq_chip_retrigger_hierarchy,
+ priv->chip.irq_set_type = rza1_irqc_set_type,
+ priv->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
+
+ priv->irq_domain = irq_domain_add_hierarchy(parent, 0, IRQC_NUM_IRQ,
+ np, &rza1_irqc_domain_ops,
+ priv);
+ if (!priv->irq_domain) {
+ dev_err(dev, "cannot initialize irq domain\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int rza1_irqc_remove(struct platform_device *pdev)
+{
+ struct rza1_irqc_priv *priv = platform_get_drvdata(pdev);
+
+ irq_domain_remove(priv->irq_domain);
+ return 0;
+}
+
+static const struct of_device_id rza1_irqc_dt_ids[] = {
+ { .compatible = "renesas,rza1-irqc" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rza1_irqc_dt_ids);
+
+static struct platform_driver rza1_irqc_device_driver = {
+ .probe = rza1_irqc_probe,
+ .remove = rza1_irqc_remove,
+ .driver = {
+ .name = "renesas_rza1_irqc",
+ .of_match_table = rza1_irqc_dt_ids,
+ }
+};
+
+static int __init rza1_irqc_init(void)
+{
+ return platform_driver_register(&rza1_irqc_device_driver);
+}
+postcore_initcall(rza1_irqc_init);
+
+static void __exit rza1_irqc_exit(void)
+{
+ platform_driver_unregister(&rza1_irqc_device_driver);
+}
+module_exit(rza1_irqc_exit);
+
+MODULE_AUTHOR("Geert Uytterhoeven <geert+renesas@glider.be>");
+MODULE_DESCRIPTION("Renesas RZ/A1 IRQC Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/irqchip/irq-sni-exiu.c b/drivers/irqchip/irq-sni-exiu.c
index 4e983bc6cf93..1d027623c776 100644
--- a/drivers/irqchip/irq-sni-exiu.c
+++ b/drivers/irqchip/irq-sni-exiu.c
@@ -2,7 +2,7 @@
/*
* Driver for Socionext External Interrupt Unit (EXIU)
*
- * Copyright (c) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (c) 2017-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* Based on irq-tegra.c:
* Copyright (C) 2011 Google, Inc.
@@ -17,6 +17,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
@@ -131,9 +132,13 @@ static int exiu_domain_translate(struct irq_domain *domain,
*hwirq = fwspec->param[1] - info->spi_base;
*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
- return 0;
+ } else {
+ if (fwspec->param_count != 2)
+ return -EINVAL;
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
}
- return -EINVAL;
+ return 0;
}
static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq,
@@ -144,16 +149,21 @@ static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq,
struct exiu_irq_data *info = dom->host_data;
irq_hw_number_t hwirq;
- if (fwspec->param_count != 3)
- return -EINVAL; /* Not GIC compliant */
- if (fwspec->param[0] != GIC_SPI)
- return -EINVAL; /* No PPI should point to this domain */
+ parent_fwspec = *fwspec;
+ if (is_of_node(dom->parent->fwnode)) {
+ if (fwspec->param_count != 3)
+ return -EINVAL; /* Not GIC compliant */
+ if (fwspec->param[0] != GIC_SPI)
+ return -EINVAL; /* No PPI should point to this domain */
+ hwirq = fwspec->param[1] - info->spi_base;
+ } else {
+ hwirq = fwspec->param[0];
+ parent_fwspec.param[0] = hwirq + info->spi_base + 32;
+ }
WARN_ON(nr_irqs != 1);
- hwirq = fwspec->param[1] - info->spi_base;
irq_domain_set_hwirq_and_chip(dom, virq, hwirq, &exiu_irq_chip, info);
- parent_fwspec = *fwspec;
parent_fwspec.fwnode = dom->parent->fwnode;
return irq_domain_alloc_irqs_parent(dom, virq, nr_irqs, &parent_fwspec);
}
@@ -164,35 +174,23 @@ static const struct irq_domain_ops exiu_domain_ops = {
.free = irq_domain_free_irqs_common,
};
-static int __init exiu_init(struct device_node *node,
- struct device_node *parent)
+static struct exiu_irq_data *exiu_init(const struct fwnode_handle *fwnode,
+ struct resource *res)
{
- struct irq_domain *parent_domain, *domain;
struct exiu_irq_data *data;
int err;
- if (!parent) {
- pr_err("%pOF: no parent, giving up\n", node);
- return -ENODEV;
- }
-
- parent_domain = irq_find_host(parent);
- if (!parent_domain) {
- pr_err("%pOF: unable to obtain parent domain\n", node);
- return -ENXIO;
- }
-
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- if (of_property_read_u32(node, "socionext,spi-base", &data->spi_base)) {
- pr_err("%pOF: failed to parse 'spi-base' property\n", node);
+ if (fwnode_property_read_u32_array(fwnode, "socionext,spi-base",
+ &data->spi_base, 1)) {
err = -ENODEV;
goto out_free;
}
- data->base = of_iomap(node, 0);
+ data->base = ioremap(res->start, resource_size(res));
if (!data->base) {
err = -ENODEV;
goto out_free;
@@ -202,11 +200,44 @@ static int __init exiu_init(struct device_node *node,
writel_relaxed(0xFFFFFFFF, data->base + EIREQCLR);
writel_relaxed(0xFFFFFFFF, data->base + EIMASK);
+ return data;
+
+out_free:
+ kfree(data);
+ return ERR_PTR(err);
+}
+
+static int __init exiu_dt_init(struct device_node *node,
+ struct device_node *parent)
+{
+ struct irq_domain *parent_domain, *domain;
+ struct exiu_irq_data *data;
+ struct resource res;
+
+ if (!parent) {
+ pr_err("%pOF: no parent, giving up\n", node);
+ return -ENODEV;
+ }
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ pr_err("%pOF: unable to obtain parent domain\n", node);
+ return -ENXIO;
+ }
+
+ if (of_address_to_resource(node, 0, &res)) {
+ pr_err("%pOF: failed to parse memory resource\n", node);
+ return -ENXIO;
+ }
+
+ data = exiu_init(of_node_to_fwnode(node), &res);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_IRQS, node,
&exiu_domain_ops, data);
if (!domain) {
pr_err("%pOF: failed to allocate domain\n", node);
- err = -ENOMEM;
goto out_unmap;
}
@@ -217,8 +248,57 @@ static int __init exiu_init(struct device_node *node,
out_unmap:
iounmap(data->base);
-out_free:
kfree(data);
- return err;
+ return -ENOMEM;
}
-IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_init);
+IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_dt_init);
+
+#ifdef CONFIG_ACPI
+static int exiu_acpi_probe(struct platform_device *pdev)
+{
+ struct irq_domain *domain;
+ struct exiu_irq_data *data;
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "failed to parse memory resource\n");
+ return -ENXIO;
+ }
+
+ data = exiu_init(dev_fwnode(&pdev->dev), res);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ domain = acpi_irq_create_hierarchy(0, NUM_IRQS, dev_fwnode(&pdev->dev),
+ &exiu_domain_ops, data);
+ if (!domain) {
+ dev_err(&pdev->dev, "failed to create IRQ domain\n");
+ goto out_unmap;
+ }
+
+ dev_info(&pdev->dev, "%d interrupts forwarded\n", NUM_IRQS);
+
+ return 0;
+
+out_unmap:
+ iounmap(data->base);
+ kfree(data);
+ return -ENOMEM;
+}
+
+static const struct acpi_device_id exiu_acpi_ids[] = {
+ { "SCX0008" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, exiu_acpi_ids);
+
+static struct platform_driver exiu_driver = {
+ .driver = {
+ .name = "exiu",
+ .acpi_match_table = exiu_acpi_ids,
+ },
+ .probe = exiu_acpi_probe,
+};
+builtin_platform_driver(exiu_driver);
+#endif
diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c
index 067337ab3f20..d88e993aa66d 100644
--- a/drivers/irqchip/qcom-irq-combiner.c
+++ b/drivers/irqchip/qcom-irq-combiner.c
@@ -229,7 +229,6 @@ static int get_registers(struct platform_device *pdev, struct combiner *comb)
static int __init combiner_probe(struct platform_device *pdev)
{
struct combiner *combiner;
- size_t alloc_sz;
int nregs;
int err;
@@ -239,8 +238,8 @@ static int __init combiner_probe(struct platform_device *pdev)
return -EINVAL;
}
- alloc_sz = sizeof(*combiner) + sizeof(struct combiner_reg) * nregs;
- combiner = devm_kzalloc(&pdev->dev, alloc_sz, GFP_KERNEL);
+ combiner = devm_kzalloc(&pdev->dev, struct_size(combiner, regs, nregs),
+ GFP_KERNEL);
if (!combiner)
return -ENOMEM;
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 760f73a49c9f..b0fdeef10bd9 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -784,6 +784,41 @@ config LEDS_NIC78BX
To compile this driver as a module, choose M here: the module
will be called leds-nic78bx.
+config LEDS_SPI_BYTE
+ tristate "LED support for SPI LED controller with a single byte"
+ depends on LEDS_CLASS
+ depends on SPI
+ depends on OF
+ help
+ This option enables support for LED controller which use a single byte
+ for controlling the brightness. Currently the following controller is
+ supported: Ubiquiti airCube ISP microcontroller based LED controller.
+
+config LEDS_TI_LMU_COMMON
+ tristate "LED driver for TI LMU"
+ depends on LEDS_CLASS
+ depends on REGMAP
+ help
+ Say Y to enable the LED driver for TI LMU devices.
+ This supports common features between the TI LM3532, LM3631, LM3632,
+ LM3633, LM3695 and LM3697.
+
+config LEDS_LM3697
+ tristate "LED driver for LM3697"
+ depends on LEDS_TI_LMU_COMMON
+ depends on I2C && OF
+ help
+ Say Y to enable the LM3697 LED driver for TI LMU devices.
+ This supports the LED device LM3697.
+
+config LEDS_LM36274
+ tristate "LED driver for LM36274"
+ depends on LEDS_TI_LMU_COMMON
+ depends on MFD_TI_LMU
+ help
+ Say Y to enable the LM36274 LED driver for TI LMU devices.
+ This supports the LED device LM36274.
+
comment "LED Triggers"
source "drivers/leds/trigger/Kconfig"
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 1e9702ebffee..41fb073a39c1 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -77,10 +77,14 @@ obj-$(CONFIG_LEDS_PM8058) += leds-pm8058.o
obj-$(CONFIG_LEDS_MLXCPLD) += leds-mlxcpld.o
obj-$(CONFIG_LEDS_MLXREG) += leds-mlxreg.o
obj-$(CONFIG_LEDS_NIC78BX) += leds-nic78bx.o
+obj-$(CONFIG_LEDS_SPI_BYTE) += leds-spi-byte.o
obj-$(CONFIG_LEDS_MT6323) += leds-mt6323.o
obj-$(CONFIG_LEDS_LM3692X) += leds-lm3692x.o
obj-$(CONFIG_LEDS_SC27XX_BLTC) += leds-sc27xx-bltc.o
obj-$(CONFIG_LEDS_LM3601X) += leds-lm3601x.o
+obj-$(CONFIG_LEDS_TI_LMU_COMMON) += leds-ti-lmu-common.o
+obj-$(CONFIG_LEDS_LM3697) += leds-lm3697.o
+obj-$(CONFIG_LEDS_LM36274) += leds-lm36274.o
# LED SPI Drivers
obj-$(CONFIG_LEDS_CR0014114) += leds-cr0014114.o
diff --git a/drivers/leds/leds-lm36274.c b/drivers/leds/leds-lm36274.c
new file mode 100644
index 000000000000..ed9dc857ec8f
--- /dev/null
+++ b/drivers/leds/leds-lm36274.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+// TI LM36274 LED chip family driver
+// Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/leds.h>
+#include <linux/leds-ti-lmu-common.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <linux/mfd/ti-lmu.h>
+#include <linux/mfd/ti-lmu-register.h>
+
+#include <uapi/linux/uleds.h>
+
+#define LM36274_MAX_STRINGS 4
+#define LM36274_BL_EN BIT(4)
+
+/**
+ * struct lm36274
+ * @pdev: platform device
+ * @led_dev: led class device
+ * @lmu_data: Register and setting values for common code
+ * @regmap: Devices register map
+ * @dev: Pointer to the devices device struct
+ * @led_sources - The LED strings supported in this array
+ * @num_leds - Number of LED strings are supported in this array
+ */
+struct lm36274 {
+ struct platform_device *pdev;
+ struct led_classdev led_dev;
+ struct ti_lmu_bank lmu_data;
+ struct regmap *regmap;
+ struct device *dev;
+
+ u32 led_sources[LM36274_MAX_STRINGS];
+ int num_leds;
+};
+
+static int lm36274_brightness_set(struct led_classdev *led_cdev,
+ enum led_brightness brt_val)
+{
+ struct lm36274 *led = container_of(led_cdev, struct lm36274, led_dev);
+
+ return ti_lmu_common_set_brightness(&led->lmu_data, brt_val);
+}
+
+static int lm36274_init(struct lm36274 *lm36274_data)
+{
+ int enable_val = 0;
+ int i;
+
+ for (i = 0; i < lm36274_data->num_leds; i++)
+ enable_val |= (1 << lm36274_data->led_sources[i]);
+
+ if (!enable_val) {
+ dev_err(lm36274_data->dev, "No LEDs were enabled\n");
+ return -EINVAL;
+ }
+
+ enable_val |= LM36274_BL_EN;
+
+ return regmap_write(lm36274_data->regmap, LM36274_REG_BL_EN,
+ enable_val);
+}
+
+static int lm36274_parse_dt(struct lm36274 *lm36274_data)
+{
+ struct fwnode_handle *child = NULL;
+ char label[LED_MAX_NAME_SIZE];
+ struct device *dev = &lm36274_data->pdev->dev;
+ const char *name;
+ int child_cnt;
+ int ret = -EINVAL;
+
+ /* There should only be 1 node */
+ child_cnt = device_get_child_node_count(dev);
+ if (child_cnt != 1)
+ return -EINVAL;
+
+ device_for_each_child_node(dev, child) {
+ ret = fwnode_property_read_string(child, "label", &name);
+ if (ret)
+ snprintf(label, sizeof(label),
+ "%s::", lm36274_data->pdev->name);
+ else
+ snprintf(label, sizeof(label),
+ "%s:%s", lm36274_data->pdev->name, name);
+
+ lm36274_data->num_leds = fwnode_property_read_u32_array(child,
+ "led-sources",
+ NULL, 0);
+ if (lm36274_data->num_leds <= 0)
+ return -ENODEV;
+
+ ret = fwnode_property_read_u32_array(child, "led-sources",
+ lm36274_data->led_sources,
+ lm36274_data->num_leds);
+ if (ret) {
+ dev_err(dev, "led-sources property missing\n");
+ return ret;
+ }
+
+ fwnode_property_read_string(child, "linux,default-trigger",
+ &lm36274_data->led_dev.default_trigger);
+
+ }
+
+ lm36274_data->lmu_data.regmap = lm36274_data->regmap;
+ lm36274_data->lmu_data.max_brightness = MAX_BRIGHTNESS_11BIT;
+ lm36274_data->lmu_data.msb_brightness_reg = LM36274_REG_BRT_MSB;
+ lm36274_data->lmu_data.lsb_brightness_reg = LM36274_REG_BRT_LSB;
+
+ lm36274_data->led_dev.name = label;
+ lm36274_data->led_dev.max_brightness = MAX_BRIGHTNESS_11BIT;
+ lm36274_data->led_dev.brightness_set_blocking = lm36274_brightness_set;
+
+ return 0;
+}
+
+static int lm36274_probe(struct platform_device *pdev)
+{
+ struct ti_lmu *lmu = dev_get_drvdata(pdev->dev.parent);
+ struct lm36274 *lm36274_data;
+ int ret;
+
+ lm36274_data = devm_kzalloc(&pdev->dev, sizeof(*lm36274_data),
+ GFP_KERNEL);
+ if (!lm36274_data)
+ return -ENOMEM;
+
+ lm36274_data->pdev = pdev;
+ lm36274_data->dev = lmu->dev;
+ lm36274_data->regmap = lmu->regmap;
+ dev_set_drvdata(&pdev->dev, lm36274_data);
+
+ ret = lm36274_parse_dt(lm36274_data);
+ if (ret) {
+ dev_err(lm36274_data->dev, "Failed to parse DT node\n");
+ return ret;
+ }
+
+ ret = lm36274_init(lm36274_data);
+ if (ret) {
+ dev_err(lm36274_data->dev, "Failed to init the device\n");
+ return ret;
+ }
+
+ return devm_led_classdev_register(lm36274_data->dev,
+ &lm36274_data->led_dev);
+}
+
+static const struct of_device_id of_lm36274_leds_match[] = {
+ { .compatible = "ti,lm36274-backlight", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, of_lm36274_leds_match);
+
+static struct platform_driver lm36274_driver = {
+ .probe = lm36274_probe,
+ .driver = {
+ .name = "lm36274-leds",
+ },
+};
+module_platform_driver(lm36274_driver)
+
+MODULE_DESCRIPTION("Texas Instruments LM36274 LED driver");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-lm3697.c b/drivers/leds/leds-lm3697.c
new file mode 100644
index 000000000000..54e0e35df824
--- /dev/null
+++ b/drivers/leds/leds-lm3697.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+// TI LM3697 LED chip family driver
+// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/
+
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regulator/consumer.h>
+#include <linux/leds-ti-lmu-common.h>
+
+#define LM3697_REV 0x0
+#define LM3697_RESET 0x1
+#define LM3697_OUTPUT_CONFIG 0x10
+#define LM3697_CTRL_A_RAMP 0x11
+#define LM3697_CTRL_B_RAMP 0x12
+#define LM3697_CTRL_A_B_RT_RAMP 0x13
+#define LM3697_CTRL_A_B_RAMP_CFG 0x14
+#define LM3697_CTRL_A_B_BRT_CFG 0x16
+#define LM3697_CTRL_A_FS_CURR_CFG 0x17
+#define LM3697_CTRL_B_FS_CURR_CFG 0x18
+#define LM3697_PWM_CFG 0x1c
+#define LM3697_CTRL_A_BRT_LSB 0x20
+#define LM3697_CTRL_A_BRT_MSB 0x21
+#define LM3697_CTRL_B_BRT_LSB 0x22
+#define LM3697_CTRL_B_BRT_MSB 0x23
+#define LM3697_CTRL_ENABLE 0x24
+
+#define LM3697_SW_RESET BIT(0)
+
+#define LM3697_CTRL_A_EN BIT(0)
+#define LM3697_CTRL_B_EN BIT(1)
+#define LM3697_CTRL_A_B_EN (LM3697_CTRL_A_EN | LM3697_CTRL_B_EN)
+
+#define LM3697_MAX_LED_STRINGS 3
+
+#define LM3697_CONTROL_A 0
+#define LM3697_CONTROL_B 1
+#define LM3697_MAX_CONTROL_BANKS 2
+
+/**
+ * struct lm3697_led -
+ * @hvled_strings: Array of LED strings associated with a control bank
+ * @label: LED label
+ * @led_dev: LED class device
+ * @priv: Pointer to the device struct
+ * @lmu_data: Register and setting values for common code
+ * @control_bank: Control bank the LED is associated to. 0 is control bank A
+ * 1 is control bank B
+ */
+struct lm3697_led {
+ u32 hvled_strings[LM3697_MAX_LED_STRINGS];
+ char label[LED_MAX_NAME_SIZE];
+ struct led_classdev led_dev;
+ struct lm3697 *priv;
+ struct ti_lmu_bank lmu_data;
+ int control_bank;
+ int enabled;
+ int num_leds;
+};
+
+/**
+ * struct lm3697 -
+ * @enable_gpio: Hardware enable gpio
+ * @regulator: LED supply regulator pointer
+ * @client: Pointer to the I2C client
+ * @regmap: Devices register map
+ * @dev: Pointer to the devices device struct
+ * @lock: Lock for reading/writing the device
+ * @leds: Array of LED strings
+ */
+struct lm3697 {
+ struct gpio_desc *enable_gpio;
+ struct regulator *regulator;
+ struct i2c_client *client;
+ struct regmap *regmap;
+ struct device *dev;
+ struct mutex lock;
+
+ int bank_cfg;
+
+ struct lm3697_led leds[];
+};
+
+static const struct reg_default lm3697_reg_defs[] = {
+ {LM3697_OUTPUT_CONFIG, 0x6},
+ {LM3697_CTRL_A_RAMP, 0x0},
+ {LM3697_CTRL_B_RAMP, 0x0},
+ {LM3697_CTRL_A_B_RT_RAMP, 0x0},
+ {LM3697_CTRL_A_B_RAMP_CFG, 0x0},
+ {LM3697_CTRL_A_B_BRT_CFG, 0x0},
+ {LM3697_CTRL_A_FS_CURR_CFG, 0x13},
+ {LM3697_CTRL_B_FS_CURR_CFG, 0x13},
+ {LM3697_PWM_CFG, 0xc},
+ {LM3697_CTRL_A_BRT_LSB, 0x0},
+ {LM3697_CTRL_A_BRT_MSB, 0x0},
+ {LM3697_CTRL_B_BRT_LSB, 0x0},
+ {LM3697_CTRL_B_BRT_MSB, 0x0},
+ {LM3697_CTRL_ENABLE, 0x0},
+};
+
+static const struct regmap_config lm3697_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+
+ .max_register = LM3697_CTRL_ENABLE,
+ .reg_defaults = lm3697_reg_defs,
+ .num_reg_defaults = ARRAY_SIZE(lm3697_reg_defs),
+ .cache_type = REGCACHE_FLAT,
+};
+
+static int lm3697_brightness_set(struct led_classdev *led_cdev,
+ enum led_brightness brt_val)
+{
+ struct lm3697_led *led = container_of(led_cdev, struct lm3697_led,
+ led_dev);
+ int ctrl_en_val = (1 << led->control_bank);
+ int ret;
+
+ mutex_lock(&led->priv->lock);
+
+ if (brt_val == LED_OFF) {
+ ret = regmap_update_bits(led->priv->regmap, LM3697_CTRL_ENABLE,
+ ctrl_en_val, ~ctrl_en_val);
+ if (ret) {
+ dev_err(&led->priv->client->dev, "Cannot write ctrl register\n");
+ goto brightness_out;
+ }
+
+ led->enabled = LED_OFF;
+ } else {
+ ret = ti_lmu_common_set_brightness(&led->lmu_data, brt_val);
+ if (ret) {
+ dev_err(&led->priv->client->dev,
+ "Cannot write brightness\n");
+ goto brightness_out;
+ }
+
+ if (!led->enabled) {
+ ret = regmap_update_bits(led->priv->regmap,
+ LM3697_CTRL_ENABLE,
+ ctrl_en_val, ctrl_en_val);
+ if (ret) {
+ dev_err(&led->priv->client->dev,
+ "Cannot enable the device\n");
+ goto brightness_out;
+ }
+
+ led->enabled = brt_val;
+ }
+ }
+
+brightness_out:
+ mutex_unlock(&led->priv->lock);
+ return ret;
+}
+
+static int lm3697_init(struct lm3697 *priv)
+{
+ struct lm3697_led *led;
+ int i, ret;
+
+ if (priv->enable_gpio) {
+ gpiod_direction_output(priv->enable_gpio, 1);
+ } else {
+ ret = regmap_write(priv->regmap, LM3697_RESET, LM3697_SW_RESET);
+ if (ret) {
+ dev_err(&priv->client->dev, "Cannot reset the device\n");
+ goto out;
+ }
+ }
+
+ ret = regmap_write(priv->regmap, LM3697_CTRL_ENABLE, 0x0);
+ if (ret) {
+ dev_err(&priv->client->dev, "Cannot write ctrl enable\n");
+ goto out;
+ }
+
+ ret = regmap_write(priv->regmap, LM3697_OUTPUT_CONFIG, priv->bank_cfg);
+ if (ret)
+ dev_err(&priv->client->dev, "Cannot write OUTPUT config\n");
+
+ for (i = 0; i < LM3697_MAX_CONTROL_BANKS; i++) {
+ led = &priv->leds[i];
+ ret = ti_lmu_common_set_ramp(&led->lmu_data);
+ if (ret)
+ dev_err(&priv->client->dev, "Setting the ramp rate failed\n");
+ }
+out:
+ return ret;
+}
+
+static int lm3697_probe_dt(struct lm3697 *priv)
+{
+ struct fwnode_handle *child = NULL;
+ struct lm3697_led *led;
+ const char *name;
+ int control_bank;
+ size_t i = 0;
+ int ret = -EINVAL;
+ int j;
+
+ priv->enable_gpio = devm_gpiod_get_optional(&priv->client->dev,
+ "enable", GPIOD_OUT_LOW);
+ if (IS_ERR(priv->enable_gpio)) {
+ ret = PTR_ERR(priv->enable_gpio);
+ dev_err(&priv->client->dev, "Failed to get enable gpio: %d\n",
+ ret);
+ return ret;
+ }
+
+ priv->regulator = devm_regulator_get(&priv->client->dev, "vled");
+ if (IS_ERR(priv->regulator))
+ priv->regulator = NULL;
+
+ device_for_each_child_node(priv->dev, child) {
+ ret = fwnode_property_read_u32(child, "reg", &control_bank);
+ if (ret) {
+ dev_err(&priv->client->dev, "reg property missing\n");
+ fwnode_handle_put(child);
+ goto child_out;
+ }
+
+ if (control_bank > LM3697_CONTROL_B) {
+ dev_err(&priv->client->dev, "reg property is invalid\n");
+ ret = -EINVAL;
+ fwnode_handle_put(child);
+ goto child_out;
+ }
+
+ led = &priv->leds[i];
+
+ ret = ti_lmu_common_get_brt_res(&priv->client->dev,
+ child, &led->lmu_data);
+ if (ret)
+ dev_warn(&priv->client->dev, "brightness resolution property missing\n");
+
+ led->control_bank = control_bank;
+ led->lmu_data.regmap = priv->regmap;
+ led->lmu_data.runtime_ramp_reg = LM3697_CTRL_A_RAMP +
+ control_bank;
+ led->lmu_data.msb_brightness_reg = LM3697_CTRL_A_BRT_MSB +
+ led->control_bank * 2;
+ led->lmu_data.lsb_brightness_reg = LM3697_CTRL_A_BRT_LSB +
+ led->control_bank * 2;
+
+ led->num_leds = fwnode_property_read_u32_array(child,
+ "led-sources",
+ NULL, 0);
+
+ if (led->num_leds > LM3697_MAX_LED_STRINGS) {
+ dev_err(&priv->client->dev, "To many LED strings defined\n");
+ continue;
+ }
+
+ ret = fwnode_property_read_u32_array(child, "led-sources",
+ led->hvled_strings,
+ led->num_leds);
+ if (ret) {
+ dev_err(&priv->client->dev, "led-sources property missing\n");
+ fwnode_handle_put(child);
+ goto child_out;
+ }
+
+ for (j = 0; j < led->num_leds; j++)
+ priv->bank_cfg |=
+ (led->control_bank << led->hvled_strings[j]);
+
+ ret = ti_lmu_common_get_ramp_params(&priv->client->dev,
+ child, &led->lmu_data);
+ if (ret)
+ dev_warn(&priv->client->dev, "runtime-ramp properties missing\n");
+
+ fwnode_property_read_string(child, "linux,default-trigger",
+ &led->led_dev.default_trigger);
+
+ ret = fwnode_property_read_string(child, "label", &name);
+ if (ret)
+ snprintf(led->label, sizeof(led->label),
+ "%s::", priv->client->name);
+ else
+ snprintf(led->label, sizeof(led->label),
+ "%s:%s", priv->client->name, name);
+
+ led->priv = priv;
+ led->led_dev.name = led->label;
+ led->led_dev.max_brightness = led->lmu_data.max_brightness;
+ led->led_dev.brightness_set_blocking = lm3697_brightness_set;
+
+ ret = devm_led_classdev_register(priv->dev, &led->led_dev);
+ if (ret) {
+ dev_err(&priv->client->dev, "led register err: %d\n",
+ ret);
+ fwnode_handle_put(child);
+ goto child_out;
+ }
+
+ i++;
+ }
+
+child_out:
+ return ret;
+}
+
+static int lm3697_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct lm3697 *led;
+ int count;
+ int ret;
+
+ count = device_get_child_node_count(&client->dev);
+ if (!count) {
+ dev_err(&client->dev, "LEDs are not defined in device tree!");
+ return -ENODEV;
+ }
+
+ led = devm_kzalloc(&client->dev, struct_size(led, leds, count),
+ GFP_KERNEL);
+ if (!led)
+ return -ENOMEM;
+
+ mutex_init(&led->lock);
+ i2c_set_clientdata(client, led);
+
+ led->client = client;
+ led->dev = &client->dev;
+ led->regmap = devm_regmap_init_i2c(client, &lm3697_regmap_config);
+ if (IS_ERR(led->regmap)) {
+ ret = PTR_ERR(led->regmap);
+ dev_err(&client->dev, "Failed to allocate register map: %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = lm3697_probe_dt(led);
+ if (ret)
+ return ret;
+
+ return lm3697_init(led);
+}
+
+static int lm3697_remove(struct i2c_client *client)
+{
+ struct lm3697 *led = i2c_get_clientdata(client);
+ int ret;
+
+ ret = regmap_update_bits(led->regmap, LM3697_CTRL_ENABLE,
+ LM3697_CTRL_A_B_EN, 0);
+ if (ret) {
+ dev_err(&led->client->dev, "Failed to disable the device\n");
+ return ret;
+ }
+
+ if (led->enable_gpio)
+ gpiod_direction_output(led->enable_gpio, 0);
+
+ if (led->regulator) {
+ ret = regulator_disable(led->regulator);
+ if (ret)
+ dev_err(&led->client->dev,
+ "Failed to disable regulator\n");
+ }
+
+ mutex_destroy(&led->lock);
+
+ return 0;
+}
+
+static const struct i2c_device_id lm3697_id[] = {
+ { "lm3697", 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(i2c, lm3697_id);
+
+static const struct of_device_id of_lm3697_leds_match[] = {
+ { .compatible = "ti,lm3697", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, of_lm3697_leds_match);
+
+static struct i2c_driver lm3697_driver = {
+ .driver = {
+ .name = "lm3697",
+ .of_match_table = of_lm3697_leds_match,
+ },
+ .probe = lm3697_probe,
+ .remove = lm3697_remove,
+ .id_table = lm3697_id,
+};
+module_i2c_driver(lm3697_driver);
+
+MODULE_DESCRIPTION("Texas Instruments LM3697 LED driver");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-max77650.c b/drivers/leds/leds-max77650.c
index 6b74ce9cac12..8a8e5c65b157 100644
--- a/drivers/leds/leds-max77650.c
+++ b/drivers/leds/leds-max77650.c
@@ -64,7 +64,6 @@ static int max77650_led_probe(struct platform_device *pdev)
{
struct device_node *of_node, *child;
struct max77650_led *leds, *led;
- struct device *parent;
struct device *dev;
struct regmap *map;
const char *label;
@@ -72,7 +71,6 @@ static int max77650_led_probe(struct platform_device *pdev)
u32 reg;
dev = &pdev->dev;
- parent = dev->parent;
of_node = dev->of_node;
if (!of_node)
diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index c2bc8f569760..4037c504589c 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -429,7 +429,7 @@ static int pca955x_probe(struct i2c_client *client,
int ngpios = 0;
chip = &pca955x_chipdefs[id->driver_data];
- adapter = to_i2c_adapter(client->dev.parent);
+ adapter = client->adapter;
pdata = dev_get_platdata(&client->dev);
if (!pdata) {
pdata = pca955x_get_pdata(client, chip);
diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index 9328193189ba..48d068f80f11 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c
@@ -72,7 +72,7 @@ static inline size_t sizeof_pwm_leds_priv(int num_leds)
}
static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
- struct led_pwm *led, struct device_node *child)
+ struct led_pwm *led, struct fwnode_handle *fwnode)
{
struct led_pwm_data *led_data = &priv->leds[priv->num_leds];
struct pwm_args pargs;
@@ -85,8 +85,8 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
led_data->cdev.max_brightness = led->max_brightness;
led_data->cdev.flags = LED_CORE_SUSPENDRESUME;
- if (child)
- led_data->pwm = devm_of_pwm_get(dev, child, NULL);
+ if (fwnode)
+ led_data->pwm = devm_fwnode_pwm_get(dev, fwnode, NULL);
else
led_data->pwm = devm_pwm_get(dev, led->name);
if (IS_ERR(led_data->pwm)) {
@@ -111,7 +111,8 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
if (!led_data->period && (led->pwm_period_ns > 0))
led_data->period = led->pwm_period_ns;
- ret = devm_of_led_classdev_register(dev, child, &led_data->cdev);
+ ret = devm_of_led_classdev_register(dev, to_of_node(fwnode),
+ &led_data->cdev);
if (ret == 0) {
priv->num_leds++;
led_pwm_set(&led_data->cdev, led_data->cdev.brightness);
@@ -123,27 +124,35 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
return ret;
}
-static int led_pwm_create_of(struct device *dev, struct led_pwm_priv *priv)
+static int led_pwm_create_fwnode(struct device *dev, struct led_pwm_priv *priv)
{
- struct device_node *child;
+ struct fwnode_handle *fwnode;
struct led_pwm led;
int ret = 0;
memset(&led, 0, sizeof(led));
- for_each_child_of_node(dev->of_node, child) {
- led.name = of_get_property(child, "label", NULL) ? :
- child->name;
+ device_for_each_child_node(dev, fwnode) {
+ ret = fwnode_property_read_string(fwnode, "label", &led.name);
+ if (ret && is_of_node(fwnode))
+ led.name = to_of_node(fwnode)->name;
- led.default_trigger = of_get_property(child,
- "linux,default-trigger", NULL);
- led.active_low = of_property_read_bool(child, "active-low");
- of_property_read_u32(child, "max-brightness",
- &led.max_brightness);
+ if (!led.name) {
+ fwnode_handle_put(fwnode);
+ return -EINVAL;
+ }
+
+ fwnode_property_read_string(fwnode, "linux,default-trigger",
+ &led.default_trigger);
+
+ led.active_low = fwnode_property_read_bool(fwnode,
+ "active-low");
+ fwnode_property_read_u32(fwnode, "max-brightness",
+ &led.max_brightness);
- ret = led_pwm_add(dev, priv, &led, child);
+ ret = led_pwm_add(dev, priv, &led, fwnode);
if (ret) {
- of_node_put(child);
+ fwnode_handle_put(fwnode);
break;
}
}
@@ -161,7 +170,7 @@ static int led_pwm_probe(struct platform_device *pdev)
if (pdata)
count = pdata->num_leds;
else
- count = of_get_child_count(pdev->dev.of_node);
+ count = device_get_child_node_count(&pdev->dev);
if (!count)
return -EINVAL;
@@ -179,7 +188,7 @@ static int led_pwm_probe(struct platform_device *pdev)
break;
}
} else {
- ret = led_pwm_create_of(&pdev->dev, priv);
+ ret = led_pwm_create_fwnode(&pdev->dev, priv);
}
if (ret)
diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c
new file mode 100644
index 000000000000..b231b563b7bb
--- /dev/null
+++ b/drivers/leds/leds-spi-byte.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Christian Mauderer <oss@c-mauderer.de>
+
+/*
+ * The driver supports controllers with a very simple SPI protocol:
+ * - one LED is controlled by a single byte on MOSI
+ * - the value of the byte gives the brightness between two values (lowest to
+ * highest)
+ * - no return value is necessary (no MISO signal)
+ *
+ * The value for minimum and maximum brightness depends on the device
+ * (compatible string).
+ *
+ * Supported devices:
+ * - "ubnt,acb-spi-led": Microcontroller (SONiX 8F26E611LA) based device used
+ * for example in Ubiquiti airCube ISP. Reverse engineered protocol for this
+ * controller:
+ * * Higher two bits set a mode. Lower six bits are a parameter.
+ * * Mode: 00 -> set brightness between 0x00 (min) and 0x3F (max)
+ * * Mode: 01 -> pulsing pattern (min -> max -> min) with an interval. From
+ * some tests, the period is about (50ms + 102ms * parameter). There is a
+ * slightly different pattern starting from 0x10 (longer gap between the
+ * pulses) but the time still follows that calculation.
+ * * Mode: 10 -> same as 01 but with only a ramp from min to max. Again a
+ * slight jump in the pattern at 0x10.
+ * * Mode: 11 -> blinking (off -> 25% -> off -> 25% -> ...) with a period of
+ * (105ms * parameter)
+ * NOTE: This driver currently only supports mode 00.
+ */
+
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+#include <linux/mutex.h>
+#include <uapi/linux/uleds.h>
+
+struct spi_byte_chipdef {
+ /* SPI byte that will be send to switch the LED off */
+ u8 off_value;
+ /* SPI byte that will be send to switch the LED to maximum brightness */
+ u8 max_value;
+};
+
+struct spi_byte_led {
+ struct led_classdev ldev;
+ struct spi_device *spi;
+ char name[LED_MAX_NAME_SIZE];
+ struct mutex mutex;
+ const struct spi_byte_chipdef *cdef;
+};
+
+static const struct spi_byte_chipdef ubnt_acb_spi_led_cdef = {
+ .off_value = 0x0,
+ .max_value = 0x3F,
+};
+
+static const struct of_device_id spi_byte_dt_ids[] = {
+ { .compatible = "ubnt,acb-spi-led", .data = &ubnt_acb_spi_led_cdef },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, spi_byte_dt_ids);
+
+static int spi_byte_brightness_set_blocking(struct led_classdev *dev,
+ enum led_brightness brightness)
+{
+ struct spi_byte_led *led = container_of(dev, struct spi_byte_led, ldev);
+ u8 value;
+ int ret;
+
+ value = (u8) brightness + led->cdef->off_value;
+
+ mutex_lock(&led->mutex);
+ ret = spi_write(led->spi, &value, sizeof(value));
+ mutex_unlock(&led->mutex);
+
+ return ret;
+}
+
+static int spi_byte_probe(struct spi_device *spi)
+{
+ const struct of_device_id *of_dev_id;
+ struct device_node *child;
+ struct device *dev = &spi->dev;
+ struct spi_byte_led *led;
+ const char *name = "leds-spi-byte::";
+ const char *state;
+ int ret;
+
+ of_dev_id = of_match_device(spi_byte_dt_ids, dev);
+ if (!of_dev_id)
+ return -EINVAL;
+
+ if (of_get_child_count(dev->of_node) != 1) {
+ dev_err(dev, "Device must have exactly one LED sub-node.");
+ return -EINVAL;
+ }
+ child = of_get_next_child(dev->of_node, NULL);
+
+ led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
+ if (!led)
+ return -ENOMEM;
+
+ of_property_read_string(child, "label", &name);
+ strlcpy(led->name, name, sizeof(led->name));
+ led->spi = spi;
+ mutex_init(&led->mutex);
+ led->cdef = of_dev_id->data;
+ led->ldev.name = led->name;
+ led->ldev.brightness = LED_OFF;
+ led->ldev.max_brightness = led->cdef->max_value - led->cdef->off_value;
+ led->ldev.brightness_set_blocking = spi_byte_brightness_set_blocking;
+
+ state = of_get_property(child, "default-state", NULL);
+ if (state) {
+ if (!strcmp(state, "on")) {
+ led->ldev.brightness = led->ldev.max_brightness;
+ } else if (strcmp(state, "off")) {
+ /* all other cases except "off" */
+ dev_err(dev, "default-state can only be 'on' or 'off'");
+ return -EINVAL;
+ }
+ }
+ spi_byte_brightness_set_blocking(&led->ldev,
+ led->ldev.brightness);
+
+ ret = devm_led_classdev_register(&spi->dev, &led->ldev);
+ if (ret) {
+ mutex_destroy(&led->mutex);
+ return ret;
+ }
+ spi_set_drvdata(spi, led);
+
+ return 0;
+}
+
+static int spi_byte_remove(struct spi_device *spi)
+{
+ struct spi_byte_led *led = spi_get_drvdata(spi);
+
+ mutex_destroy(&led->mutex);
+
+ return 0;
+}
+
+static struct spi_driver spi_byte_driver = {
+ .probe = spi_byte_probe,
+ .remove = spi_byte_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = spi_byte_dt_ids,
+ },
+};
+
+module_spi_driver(spi_byte_driver);
+
+MODULE_AUTHOR("Christian Mauderer <oss@c-mauderer.de>");
+MODULE_DESCRIPTION("single byte SPI LED driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:leds-spi-byte");
diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
index c59035e157d1..58be20cae183 100644
--- a/drivers/leds/leds-tca6507.c
+++ b/drivers/leds/leds-tca6507.c
@@ -758,7 +758,7 @@ static int tca6507_probe(struct i2c_client *client,
int err;
int i = 0;
- adapter = to_i2c_adapter(client->dev.parent);
+ adapter = client->adapter;
pdata = dev_get_platdata(&client->dev);
if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
diff --git a/drivers/leds/leds-ti-lmu-common.c b/drivers/leds/leds-ti-lmu-common.c
new file mode 100644
index 000000000000..adc7293004f1
--- /dev/null
+++ b/drivers/leds/leds-ti-lmu-common.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2015 Texas Instruments
+// Copyright 2018 Sebastian Reichel
+// Copyright 2018 Pavel Machek <pavel@ucw.cz>
+// TI LMU LED common framework, based on previous work from
+// Milo Kim <milo.kim@ti.com>
+
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/of_device.h>
+
+#include <linux/leds-ti-lmu-common.h>
+
+const static int ramp_table[16] = {2048, 262000, 524000, 1049000, 2090000,
+ 4194000, 8389000, 16780000, 33550000, 41940000,
+ 50330000, 58720000, 67110000, 83880000,
+ 100660000, 117440000};
+
+static int ti_lmu_common_update_brightness(struct ti_lmu_bank *lmu_bank,
+ int brightness)
+{
+ struct regmap *regmap = lmu_bank->regmap;
+ u8 reg, val;
+ int ret;
+
+ /*
+ * Brightness register update
+ *
+ * 11 bit dimming: update LSB bits and write MSB byte.
+ * MSB brightness should be shifted.
+ * 8 bit dimming: write MSB byte.
+ */
+ if (lmu_bank->max_brightness == MAX_BRIGHTNESS_11BIT) {
+ reg = lmu_bank->lsb_brightness_reg;
+ ret = regmap_update_bits(regmap, reg,
+ LMU_11BIT_LSB_MASK,
+ brightness);
+ if (ret)
+ return ret;
+
+ val = brightness >> LMU_11BIT_MSB_SHIFT;
+ } else {
+ val = brightness;
+ }
+
+ reg = lmu_bank->msb_brightness_reg;
+
+ return regmap_write(regmap, reg, val);
+}
+
+int ti_lmu_common_set_brightness(struct ti_lmu_bank *lmu_bank, int brightness)
+{
+ return ti_lmu_common_update_brightness(lmu_bank, brightness);
+}
+EXPORT_SYMBOL(ti_lmu_common_set_brightness);
+
+static int ti_lmu_common_convert_ramp_to_index(unsigned int usec)
+{
+ int size = ARRAY_SIZE(ramp_table);
+ int i;
+
+ if (usec <= ramp_table[0])
+ return 0;
+
+ if (usec > ramp_table[size - 1])
+ return size - 1;
+
+ for (i = 1; i < size; i++) {
+ if (usec == ramp_table[i])
+ return i;
+
+ /* Find an approximate index by looking up the table */
+ if (usec > ramp_table[i - 1] && usec < ramp_table[i]) {
+ if (usec - ramp_table[i - 1] < ramp_table[i] - usec)
+ return i - 1;
+ else
+ return i;
+ }
+ }
+
+ return -EINVAL;
+}
+
+int ti_lmu_common_set_ramp(struct ti_lmu_bank *lmu_bank)
+{
+ struct regmap *regmap = lmu_bank->regmap;
+ u8 ramp, ramp_up, ramp_down;
+
+ if (lmu_bank->ramp_up_usec == 0 && lmu_bank->ramp_down_usec == 0) {
+ ramp_up = 0;
+ ramp_down = 0;
+ } else {
+ ramp_up = ti_lmu_common_convert_ramp_to_index(lmu_bank->ramp_up_usec);
+ ramp_down = ti_lmu_common_convert_ramp_to_index(lmu_bank->ramp_down_usec);
+ }
+
+ if (ramp_up < 0 || ramp_down < 0)
+ return -EINVAL;
+
+ ramp = (ramp_up << 4) | ramp_down;
+
+ return regmap_write(regmap, lmu_bank->runtime_ramp_reg, ramp);
+
+}
+EXPORT_SYMBOL(ti_lmu_common_set_ramp);
+
+int ti_lmu_common_get_ramp_params(struct device *dev,
+ struct fwnode_handle *child,
+ struct ti_lmu_bank *lmu_data)
+{
+ int ret;
+
+ ret = fwnode_property_read_u32(child, "ramp-up-us",
+ &lmu_data->ramp_up_usec);
+ if (ret)
+ dev_warn(dev, "ramp-up-us property missing\n");
+
+
+ ret = fwnode_property_read_u32(child, "ramp-down-us",
+ &lmu_data->ramp_down_usec);
+ if (ret)
+ dev_warn(dev, "ramp-down-us property missing\n");
+
+ return 0;
+}
+EXPORT_SYMBOL(ti_lmu_common_get_ramp_params);
+
+int ti_lmu_common_get_brt_res(struct device *dev, struct fwnode_handle *child,
+ struct ti_lmu_bank *lmu_data)
+{
+ int ret;
+
+ ret = device_property_read_u32(dev, "ti,brightness-resolution",
+ &lmu_data->max_brightness);
+ if (ret)
+ ret = fwnode_property_read_u32(child,
+ "ti,brightness-resolution",
+ &lmu_data->max_brightness);
+ if (lmu_data->max_brightness <= 0) {
+ lmu_data->max_brightness = MAX_BRIGHTNESS_8BIT;
+ return ret;
+ }
+
+ if (lmu_data->max_brightness > MAX_BRIGHTNESS_11BIT)
+ lmu_data->max_brightness = MAX_BRIGHTNESS_11BIT;
+
+
+ return 0;
+}
+EXPORT_SYMBOL(ti_lmu_common_get_brt_res);
+
+MODULE_DESCRIPTION("TI LMU common LED framework");
+MODULE_AUTHOR("Sebastian Reichel");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ti-lmu-led-common");
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index 7fa9d174a40c..ce9429ca6dde 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -15,7 +15,7 @@ config LEDS_TRIGGER_TIMER
This allows LEDs to be controlled by a programmable timer
via sysfs. Some LED hardware can be programmed to start
blinking the LED without any further software interaction.
- For more details read Documentation/leds/leds-class.txt.
+ For more details read Documentation/leds/leds-class.rst.
If unsure, say Y.
diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c
index 4c8b0c3cf284..6a72b7e13719 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -70,7 +70,7 @@ static void led_activity_function(struct timer_list *t)
* down to 16us, ensuring we won't overflow 32-bit computations below
* even up to 3k CPUs, while keeping divides cheap on smaller systems.
*/
- curr_boot = ktime_get_boot_ns() * cpus;
+ curr_boot = ktime_get_boottime_ns() * cpus;
diff_boot = (curr_boot - activity_data->last_boot) >> 16;
diff_used = (curr_used - activity_data->last_used) >> 16;
activity_data->last_boot = curr_boot;
diff --git a/drivers/leds/trigger/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c
index a80bb82aacc2..80635183fac8 100644
--- a/drivers/leds/trigger/ledtrig-transient.c
+++ b/drivers/leds/trigger/ledtrig-transient.c
@@ -3,7 +3,7 @@
// LED Kernel Transient Trigger
//
// Transient trigger allows one shot timer activation. Please refer to
-// Documentation/leds/ledtrig-transient.txt for details
+// Documentation/leds/ledtrig-transient.rst for details
// Copyright (C) 2012 Shuah Khan <shuahkhan@gmail.com>
//
// Based on Richard Purdie's ledtrig-timer.c and Atsushi Nemoto's
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1b16d34bb785..0fd3ca9bfe54 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2035,7 +2035,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
return -ENOMEM;
key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user,
- key_desc + 1, NULL);
+ key_desc + 1, NULL, NULL);
if (IS_ERR(key)) {
kzfree(new_key_string);
return PTR_ERR(key);
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 139782fefd02..eff26c1b1394 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -19,6 +19,7 @@
#include <linux/io.h>
#include <linux/gpio/driver.h>
#include <linux/gpio/consumer.h> /* GPIO descriptor enum */
+#include <linux/gpio/machine.h>
#include <linux/interrupt.h>
#include <linux/irqdomain.h>
#include <linux/platform_device.h>
@@ -2169,7 +2170,8 @@ static int gpmc_probe_generic_child(struct platform_device *pdev,
waitpin_desc = gpiochip_request_own_desc(&gpmc->gpio_chip,
wait_pin, "WAITPIN",
- 0);
+ GPIO_ACTIVE_HIGH,
+ GPIOD_IN);
if (IS_ERR(waitpin_desc)) {
dev_err(&pdev->dev, "invalid wait-pin: %d\n", wait_pin);
ret = PTR_ERR(waitpin_desc);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a17d275bf1d4..6855ff443e04 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1336,9 +1336,8 @@ config MFD_TI_LMU
select REGMAP_I2C
help
Say yes here to enable support for TI LMU chips.
-
- TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, LM3695 and LM3697.
- It consists of backlight, LED and regulator driver.
+ TI LMU MFD supports LM3532, LM3631, LM3632, LM3633, LM3695 and
+ LM36274. It consists of backlight, LED and regulator driver.
It provides consistent device controls for lighting functions.
config MFD_OMAP_USB_HOST
diff --git a/drivers/mfd/ti-lmu.c b/drivers/mfd/ti-lmu.c
index 96b21b5af570..fd6e8c417baa 100644
--- a/drivers/mfd/ti-lmu.c
+++ b/drivers/mfd/ti-lmu.c
@@ -108,17 +108,14 @@ static const struct mfd_cell lm3695_devices[] = {
},
};
-static const struct mfd_cell lm3697_devices[] = {
+static const struct mfd_cell lm36274_devices[] = {
+ LM363X_REGULATOR(LM36274_BOOST),
+ LM363X_REGULATOR(LM36274_LDO_POS),
+ LM363X_REGULATOR(LM36274_LDO_NEG),
{
- .name = "ti-lmu-backlight",
- .id = LM3697,
- .of_compatible = "ti,lm3697-backlight",
- },
- /* Monitoring driver for open/short circuit detection */
- {
- .name = "ti-lmu-fault-monitor",
- .id = LM3697,
- .of_compatible = "ti,lm3697-fault-monitor",
+ .name = "lm36274-leds",
+ .id = LM36274,
+ .of_compatible = "ti,lm36274-backlight",
},
};
@@ -134,7 +131,7 @@ TI_LMU_DATA(lm3631, LM3631_MAX_REG);
TI_LMU_DATA(lm3632, LM3632_MAX_REG);
TI_LMU_DATA(lm3633, LM3633_MAX_REG);
TI_LMU_DATA(lm3695, LM3695_MAX_REG);
-TI_LMU_DATA(lm3697, LM3697_MAX_REG);
+TI_LMU_DATA(lm36274, LM36274_MAX_REG);
static int ti_lmu_probe(struct i2c_client *cl, const struct i2c_device_id *id)
{
@@ -203,7 +200,7 @@ static const struct of_device_id ti_lmu_of_match[] = {
{ .compatible = "ti,lm3632", .data = &lm3632_data },
{ .compatible = "ti,lm3633", .data = &lm3633_data },
{ .compatible = "ti,lm3695", .data = &lm3695_data },
- { .compatible = "ti,lm3697", .data = &lm3697_data },
+ { .compatible = "ti,lm36274", .data = &lm36274_data },
{ }
};
MODULE_DEVICE_TABLE(of, ti_lmu_of_match);
@@ -213,7 +210,7 @@ static const struct i2c_device_id ti_lmu_ids[] = {
{ "lm3632", LM3632 },
{ "lm3633", LM3633 },
{ "lm3695", LM3695 },
- { "lm3697", LM3697 },
+ { "lm36274", LM36274 },
{ }
};
MODULE_DEVICE_TABLE(i2c, ti_lmu_ids);
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 17f839dee976..d9fcfd3b5af0 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -236,7 +236,7 @@ void lkdtm_CORRUPT_USER_DS(void)
set_fs(KERNEL_DS);
/* Make sure we do not keep running with a KERNEL_DS! */
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
}
/* Test that VMAP_STACK is actually allocating with a leading guard page */
diff --git a/drivers/mtd/nand/raw/ingenic/Kconfig b/drivers/mtd/nand/raw/ingenic/Kconfig
index 19a96ce515c1..66b7cffdb0c2 100644
--- a/drivers/mtd/nand/raw/ingenic/Kconfig
+++ b/drivers/mtd/nand/raw/ingenic/Kconfig
@@ -16,7 +16,7 @@ config MTD_NAND_JZ4780
if MTD_NAND_JZ4780
config MTD_NAND_INGENIC_ECC
- tristate
+ bool
config MTD_NAND_JZ4740_ECC
tristate "Hardware BCH support for JZ4740 SoC"
diff --git a/drivers/mtd/nand/raw/ingenic/Makefile b/drivers/mtd/nand/raw/ingenic/Makefile
index 1ac4f455baea..b63d36889263 100644
--- a/drivers/mtd/nand/raw/ingenic/Makefile
+++ b/drivers/mtd/nand/raw/ingenic/Makefile
@@ -2,7 +2,9 @@
obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o
obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o
-obj-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o
+ingenic_nand-y += ingenic_nand_drv.o
+ingenic_nand-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o
+
obj-$(CONFIG_MTD_NAND_JZ4740_ECC) += jz4740_ecc.o
obj-$(CONFIG_MTD_NAND_JZ4725B_BCH) += jz4725b_bch.o
obj-$(CONFIG_MTD_NAND_JZ4780_BCH) += jz4780_bch.o
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
index d3e085c5685a..c954189606f6 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.c
@@ -30,7 +30,6 @@ int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
{
return ecc->ops->calculate(ecc, params, buf, ecc_code);
}
-EXPORT_SYMBOL(ingenic_ecc_calculate);
/**
* ingenic_ecc_correct() - detect and correct bit errors
@@ -51,7 +50,6 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc,
{
return ecc->ops->correct(ecc, params, buf, ecc_code);
}
-EXPORT_SYMBOL(ingenic_ecc_correct);
/**
* ingenic_ecc_get() - get the ECC controller device
@@ -111,7 +109,6 @@ struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *of_node)
}
return ecc;
}
-EXPORT_SYMBOL(of_ingenic_ecc_get);
/**
* ingenic_ecc_release() - release the ECC controller device
@@ -122,7 +119,6 @@ void ingenic_ecc_release(struct ingenic_ecc *ecc)
clk_disable_unprepare(ecc->clk);
put_device(ecc->dev);
}
-EXPORT_SYMBOL(ingenic_ecc_release);
int ingenic_ecc_probe(struct platform_device *pdev)
{
@@ -159,8 +155,3 @@ int ingenic_ecc_probe(struct platform_device *pdev)
return 0;
}
EXPORT_SYMBOL(ingenic_ecc_probe);
-
-MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
-MODULE_AUTHOR("Harvey Hunt <harveyhuntnexus@gmail.com>");
-MODULE_DESCRIPTION("Ingenic ECC common driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
index d7b7c0f13909..d7b7c0f13909 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_nand.c
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index b021a5720b42..89773293c64d 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -51,6 +51,7 @@
#define NFC_REG_USER_DATA(x) (0x0050 + ((x) * 4))
#define NFC_REG_SPARE_AREA 0x00A0
#define NFC_REG_PAT_ID 0x00A4
+#define NFC_REG_MDMA_CNT 0x00C4
#define NFC_RAM0_BASE 0x0400
#define NFC_RAM1_BASE 0x0800
@@ -69,6 +70,7 @@
#define NFC_PAGE_SHIFT(x) (((x) < 10 ? 0 : (x) - 10) << 8)
#define NFC_SAM BIT(12)
#define NFC_RAM_METHOD BIT(14)
+#define NFC_DMA_TYPE_NORMAL BIT(15)
#define NFC_DEBUG_CTL BIT(31)
/* define bit use in NFC_ST */
@@ -205,14 +207,13 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
* NAND Controller capabilities structure: stores NAND controller capabilities
* for distinction between compatible strings.
*
- * @sram_through_ahb: On A23, we choose to access the internal RAM through AHB
- * instead of MBUS (less configuration). A10, A10s, A13 and
- * A20 use the MBUS but no extra configuration is needed.
+ * @extra_mbus_conf: Contrary to A10, A10s and A13, accessing internal RAM
+ * through MBUS on A23/A33 needs extra configuration.
* @reg_io_data: I/O data register
* @dma_maxburst: DMA maxburst
*/
struct sunxi_nfc_caps {
- bool sram_through_ahb;
+ bool extra_mbus_conf;
unsigned int reg_io_data;
unsigned int dma_maxburst;
};
@@ -368,28 +369,12 @@ static int sunxi_nfc_dma_op_prepare(struct sunxi_nfc *nfc, const void *buf,
goto err_unmap_buf;
}
- /*
- * On A23, we suppose the "internal RAM" (p.12 of the NFC user manual)
- * refers to the NAND controller's internal SRAM. This memory is mapped
- * and so is accessible from the AHB. It seems that it can also be
- * accessed by the MBUS. MBUS accesses are mandatory when using the
- * internal DMA instead of the external DMA engine.
- *
- * During DMA I/O operation, either we access this memory from the AHB
- * by clearing the NFC_RAM_METHOD bit, or we set the bit and use the
- * MBUS. In this case, we should also configure the MBUS DMA length
- * NFC_REG_MDMA_CNT(0xC4) to be chunksize * nchunks. NAND I/O over MBUS
- * are also limited to 32kiB pages.
- */
- if (nfc->caps->sram_through_ahb)
- writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD,
- nfc->regs + NFC_REG_CTL);
- else
- writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
- nfc->regs + NFC_REG_CTL);
-
+ writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
+ nfc->regs + NFC_REG_CTL);
writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM);
writel(chunksize, nfc->regs + NFC_REG_CNT);
+ if (nfc->caps->extra_mbus_conf)
+ writel(chunksize * nchunks, nfc->regs + NFC_REG_MDMA_CNT);
dmat = dmaengine_submit(dmad);
@@ -2151,6 +2136,11 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
dmac_cfg.src_maxburst = nfc->caps->dma_maxburst;
dmac_cfg.dst_maxburst = nfc->caps->dma_maxburst;
dmaengine_slave_config(nfc->dmac, &dmac_cfg);
+
+ if (nfc->caps->extra_mbus_conf)
+ writel(readl(nfc->regs + NFC_REG_CTL) |
+ NFC_DMA_TYPE_NORMAL, nfc->regs + NFC_REG_CTL);
+
} else {
dev_warn(dev, "failed to request rxtx DMA channel\n");
}
@@ -2200,7 +2190,7 @@ static const struct sunxi_nfc_caps sunxi_nfc_a10_caps = {
};
static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = {
- .sram_through_ahb = true,
+ .extra_mbus_conf = true,
.reg_io_data = NFC_REG_A23_IO_DATA,
.dma_maxburst = 8,
};
diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c
index e5586390026a..e6c646007cda 100644
--- a/drivers/mtd/nand/spi/gigadevice.c
+++ b/drivers/mtd/nand/spi/gigadevice.c
@@ -180,7 +180,7 @@ static const struct spinand_info gigadevice_spinand_table[] = {
SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
gd5fxgq4xa_ecc_get_status)),
SPINAND_INFO("GD5F4GQ4xA", 0xF4,
- NAND_MEMORG(1, 2048, 64, 64, 4096, 40, 1, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 4096, 80, 1, 1, 1),
NAND_ECCREQ(8, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
index 6502727049a8..21def3f8fb36 100644
--- a/drivers/mtd/nand/spi/macronix.c
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -100,7 +100,7 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
static const struct spinand_info macronix_spinand_table[] = {
SPINAND_INFO("MX35LF1GE4AB", 0x12,
- NAND_MEMORG(1, 2048, 64, 64, 1024, 40, 1, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
NAND_ECCREQ(4, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
@@ -109,7 +109,7 @@ static const struct spinand_info macronix_spinand_table[] = {
SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
mx35lf1ge4ab_ecc_get_status)),
SPINAND_INFO("MX35LF2GE4AB", 0x22,
- NAND_MEMORG(1, 2048, 64, 64, 2048, 20, 2, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 2, 1, 1),
NAND_ECCREQ(4, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index bf395df3bb37..1a2e2f7629f3 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -87,8 +87,7 @@ config PPP_MPPE
depends on PPP
select CRYPTO
select CRYPTO_SHA1
- select CRYPTO_ARC4
- select CRYPTO_ECB
+ select CRYPTO_LIB_ARC4
---help---
Support for the MPPE Encryption protocol, as employed by the
Microsoft Point-to-Point Tunneling Protocol.
diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c
index 66c8e65f6872..bd3c80b0bc77 100644
--- a/drivers/net/ppp/ppp_mppe.c
+++ b/drivers/net/ppp/ppp_mppe.c
@@ -42,9 +42,10 @@
* deprecated in 2.6
*/
+#include <crypto/arc4.h>
#include <crypto/hash.h>
-#include <crypto/skcipher.h>
#include <linux/err.h>
+#include <linux/fips.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -66,13 +67,6 @@ MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
MODULE_SOFTDEP("pre: arc4");
MODULE_VERSION("1.0.2");
-static unsigned int
-setup_sg(struct scatterlist *sg, const void *address, unsigned int length)
-{
- sg_set_buf(sg, address, length);
- return length;
-}
-
#define SHA1_PAD_SIZE 40
/*
@@ -96,7 +90,7 @@ static inline void sha_pad_init(struct sha_pad *shapad)
* State for an MPPE (de)compressor.
*/
struct ppp_mppe_state {
- struct crypto_sync_skcipher *arc4;
+ struct arc4_ctx arc4;
struct shash_desc *sha1;
unsigned char *sha1_digest;
unsigned char master_key[MPPE_MAX_KEY_LEN];
@@ -155,24 +149,11 @@ static void get_new_key_from_sha(struct ppp_mppe_state * state)
*/
static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
{
- struct scatterlist sg_in[1], sg_out[1];
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
-
- skcipher_request_set_sync_tfm(req, state->arc4);
- skcipher_request_set_callback(req, 0, NULL, NULL);
-
get_new_key_from_sha(state);
if (!initial_key) {
- crypto_sync_skcipher_setkey(state->arc4, state->sha1_digest,
- state->keylen);
- sg_init_table(sg_in, 1);
- sg_init_table(sg_out, 1);
- setup_sg(sg_in, state->sha1_digest, state->keylen);
- setup_sg(sg_out, state->session_key, state->keylen);
- skcipher_request_set_crypt(req, sg_in, sg_out, state->keylen,
- NULL);
- if (crypto_skcipher_encrypt(req))
- printk(KERN_WARNING "mppe_rekey: cipher_encrypt failed\n");
+ arc4_setkey(&state->arc4, state->sha1_digest, state->keylen);
+ arc4_crypt(&state->arc4, state->session_key, state->sha1_digest,
+ state->keylen);
} else {
memcpy(state->session_key, state->sha1_digest, state->keylen);
}
@@ -182,9 +163,7 @@ static void mppe_rekey(struct ppp_mppe_state * state, int initial_key)
state->session_key[1] = 0x26;
state->session_key[2] = 0x9e;
}
- crypto_sync_skcipher_setkey(state->arc4, state->session_key,
- state->keylen);
- skcipher_request_zero(req);
+ arc4_setkey(&state->arc4, state->session_key, state->keylen);
}
/*
@@ -197,7 +176,8 @@ static void *mppe_alloc(unsigned char *options, int optlen)
unsigned int digestsize;
if (optlen != CILEN_MPPE + sizeof(state->master_key) ||
- options[0] != CI_MPPE || options[1] != CILEN_MPPE)
+ options[0] != CI_MPPE || options[1] != CILEN_MPPE ||
+ fips_enabled)
goto out;
state = kzalloc(sizeof(*state), GFP_KERNEL);
@@ -205,12 +185,6 @@ static void *mppe_alloc(unsigned char *options, int optlen)
goto out;
- state->arc4 = crypto_alloc_sync_skcipher("ecb(arc4)", 0, 0);
- if (IS_ERR(state->arc4)) {
- state->arc4 = NULL;
- goto out_free;
- }
-
shash = crypto_alloc_shash("sha1", 0, 0);
if (IS_ERR(shash))
goto out_free;
@@ -251,7 +225,6 @@ out_free:
crypto_free_shash(state->sha1->tfm);
kzfree(state->sha1);
}
- crypto_free_sync_skcipher(state->arc4);
kfree(state);
out:
return NULL;
@@ -267,8 +240,7 @@ static void mppe_free(void *arg)
kfree(state->sha1_digest);
crypto_free_shash(state->sha1->tfm);
kzfree(state->sha1);
- crypto_free_sync_skcipher(state->arc4);
- kfree(state);
+ kzfree(state);
}
}
@@ -367,10 +339,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
int isize, int osize)
{
struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
int proto;
- int err;
- struct scatterlist sg_in[1], sg_out[1];
/*
* Check that the protocol is in the range we handle.
@@ -421,21 +390,7 @@ mppe_compress(void *arg, unsigned char *ibuf, unsigned char *obuf,
ibuf += 2; /* skip to proto field */
isize -= 2;
- /* Encrypt packet */
- sg_init_table(sg_in, 1);
- sg_init_table(sg_out, 1);
- setup_sg(sg_in, ibuf, isize);
- setup_sg(sg_out, obuf, osize);
-
- skcipher_request_set_sync_tfm(req, state->arc4);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in, sg_out, isize, NULL);
- err = crypto_skcipher_encrypt(req);
- skcipher_request_zero(req);
- if (err) {
- printk(KERN_DEBUG "crypto_cypher_encrypt failed\n");
- return -1;
- }
+ arc4_crypt(&state->arc4, obuf, ibuf, isize);
state->stats.unc_bytes += isize;
state->stats.unc_packets++;
@@ -481,10 +436,8 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
int osize)
{
struct ppp_mppe_state *state = (struct ppp_mppe_state *) arg;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, state->arc4);
unsigned ccount;
int flushed = MPPE_BITS(ibuf) & MPPE_BIT_FLUSHED;
- struct scatterlist sg_in[1], sg_out[1];
if (isize <= PPP_HDRLEN + MPPE_OVHD) {
if (state->debug)
@@ -611,19 +564,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
* Decrypt the first byte in order to check if it is
* a compressed or uncompressed protocol field.
*/
- sg_init_table(sg_in, 1);
- sg_init_table(sg_out, 1);
- setup_sg(sg_in, ibuf, 1);
- setup_sg(sg_out, obuf, 1);
-
- skcipher_request_set_sync_tfm(req, state->arc4);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg_in, sg_out, 1, NULL);
- if (crypto_skcipher_decrypt(req)) {
- printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
- osize = DECOMP_ERROR;
- goto out_zap_req;
- }
+ arc4_crypt(&state->arc4, obuf, ibuf, 1);
/*
* Do PFC decompression.
@@ -638,14 +579,7 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
}
/* And finally, decrypt the rest of the packet. */
- setup_sg(sg_in, ibuf + 1, isize - 1);
- setup_sg(sg_out, obuf + 1, osize - 1);
- skcipher_request_set_crypt(req, sg_in, sg_out, isize - 1, NULL);
- if (crypto_skcipher_decrypt(req)) {
- printk(KERN_DEBUG "crypto_cypher_decrypt failed\n");
- osize = DECOMP_ERROR;
- goto out_zap_req;
- }
+ arc4_crypt(&state->arc4, obuf + 1, ibuf + 1, isize - 1);
state->stats.unc_bytes += osize;
state->stats.unc_packets++;
@@ -655,8 +589,6 @@ mppe_decompress(void *arg, unsigned char *ibuf, int isize, unsigned char *obuf,
/* good packet credit */
state->sanity_errors >>= 1;
-out_zap_req:
- skcipher_request_zero(req);
return osize;
sanity_error:
@@ -729,8 +661,7 @@ static struct compressor ppp_mppe = {
static int __init ppp_mppe_init(void)
{
int answer;
- if (!(crypto_has_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) &&
- crypto_has_ahash("sha1", 0, CRYPTO_ALG_ASYNC)))
+ if (fips_enabled || !crypto_has_ahash("sha1", 0, CRYPTO_ALG_ASYNC))
return -ENODEV;
sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index fec38a47696e..9f4b117db9d7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -93,7 +93,7 @@ void iwl_mvm_ftm_restart(struct iwl_mvm *mvm)
struct cfg80211_pmsr_result result = {
.status = NL80211_PMSR_STATUS_FAILURE,
.final = 1,
- .host_time = ktime_get_boot_ns(),
+ .host_time = ktime_get_boottime_ns(),
.type = NL80211_PMSR_TYPE_FTM,
};
int i;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index fbd3014e8b82..160b0db27103 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -555,7 +555,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)))
- rx_status->boottime_ns = ktime_get_boot_ns();
+ rx_status->boottime_ns = ktime_get_boottime_ns();
/* Take a reference briefly to kick off a d0i3 entry delay so
* we can handle bursts of RX packets without toggling the
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1824566d08fc..64f950501287 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -1684,7 +1684,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)))
- rx_status->boottime_ns = ktime_get_boot_ns();
+ rx_status->boottime_ns = ktime_get_boottime_ns();
}
if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index cc56ab88fb43..72cd5b3f2d8d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1445,7 +1445,7 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
}
*gp2 = iwl_mvm_get_systime(mvm);
- *boottime = ktime_get_boot_ns();
+ *boottime = ktime_get_boottime_ns();
if (!ps_disabled) {
mvm->ps_disabled = ps_disabled;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1c699a9fa866..a7bf6519d7aa 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1271,7 +1271,7 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
*/
if (ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)) {
- rx_status.boottime_ns = ktime_get_boot_ns();
+ rx_status.boottime_ns = ktime_get_boottime_ns();
now = data->abs_bcn_ts;
} else {
now = mac80211_hwsim_get_tsf_raw();
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index c9a485ecee7b..b74dc8bc9755 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -483,7 +483,7 @@ static int wlcore_fw_status(struct wl1271 *wl, struct wl_fw_status *status)
}
/* update the host-chipset time offset */
- wl->time_offset = (ktime_get_boot_ns() >> 10) -
+ wl->time_offset = (ktime_get_boottime_ns() >> 10) -
(s64)(status->fw_localtime);
wl->fw_fast_lnk_map = status->link_fast_bitmap;
diff --git a/drivers/net/wireless/ti/wlcore/rx.c b/drivers/net/wireless/ti/wlcore/rx.c
index d96bb602fae6..307fab21050b 100644
--- a/drivers/net/wireless/ti/wlcore/rx.c
+++ b/drivers/net/wireless/ti/wlcore/rx.c
@@ -93,7 +93,7 @@ static void wl1271_rx_status(struct wl1271 *wl,
}
if (beacon || probe_rsp)
- status->boottime_ns = ktime_get_boot_ns();
+ status->boottime_ns = ktime_get_boottime_ns();
if (beacon)
wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel,
diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index 057c6be330e7..90e56d4c3df3 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -273,7 +273,7 @@ static void wl1271_tx_fill_hdr(struct wl1271 *wl, struct wl12xx_vif *wlvif,
}
/* configure packet life time */
- hosttime = (ktime_get_boot_ns() >> 10);
+ hosttime = (ktime_get_boottime_ns() >> 10);
desc->start_time = cpu_to_le32(hosttime - wl->time_offset);
is_dummy = wl12xx_is_dummy_packet(wl, skb);
diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
index 606999f102eb..be92e1220284 100644
--- a/drivers/net/wireless/virt_wifi.c
+++ b/drivers/net/wireless/virt_wifi.c
@@ -172,7 +172,7 @@ static void virt_wifi_scan_result(struct work_struct *work)
informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
CFG80211_BSS_FTYPE_PRESP,
fake_router_bssid,
- ktime_get_boot_ns(),
+ ktime_get_boottime_ns(),
WLAN_CAPABILITY_ESS, 0,
(void *)&ssid, sizeof(ssid),
DBM_TO_MBM(-50), GFP_KERNEL);
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index a570f2263a42..99a5708b37e3 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -55,7 +55,7 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
struct device *dev = &nvdimm->dev;
sprintf(desc, "%s%s", NVDIMM_PREFIX, nvdimm->dimm_id);
- key = request_key(&key_type_encrypted, desc, "");
+ key = request_key(&key_type_encrypted, desc, "", NULL);
if (IS_ERR(key)) {
if (PTR_ERR(key) == -ENOKEY)
dev_dbg(dev, "request_key() found no key\n");
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e4221a107dca..09ae8a970880 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -71,6 +71,14 @@ config ARM_DSU_PMU
system, control logic. The PMU allows counting various events related
to DSU.
+config FSL_IMX8_DDR_PMU
+ tristate "Freescale i.MX8 DDR perf monitor"
+ depends on ARCH_MXC
+ help
+ Provides support for the DDR performance monitor in i.MX8, which
+ can give information about memory throughput and other related
+ events.
+
config HISI_PMU
bool "HiSilicon SoC PMU"
depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 30489941f3d6..2ebb4de17815 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
+obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c
index d2c2978409d2..acce8781c456 100644
--- a/drivers/perf/arm_pmu_acpi.c
+++ b/drivers/perf/arm_pmu_acpi.c
@@ -71,6 +71,76 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
acpi_unregister_gsi(gsi);
}
+#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
+static struct resource spe_resources[] = {
+ {
+ /* irq */
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct platform_device spe_dev = {
+ .name = ARMV8_SPE_PDEV_NAME,
+ .id = -1,
+ .resource = spe_resources,
+ .num_resources = ARRAY_SIZE(spe_resources)
+};
+
+/*
+ * For lack of a better place, hook the normal PMU MADT walk
+ * and create a SPE device if we detect a recent MADT with
+ * a homogeneous PPI mapping.
+ */
+static void arm_spe_acpi_register_device(void)
+{
+ int cpu, hetid, irq, ret;
+ bool first = true;
+ u16 gsi = 0;
+
+ /*
+ * Sanity check all the GICC tables for the same interrupt number.
+ * For now, we only support homogeneous ACPI/SPE machines.
+ */
+ for_each_possible_cpu(cpu) {
+ struct acpi_madt_generic_interrupt *gicc;
+
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->header.length < ACPI_MADT_GICC_SPE)
+ return;
+
+ if (first) {
+ gsi = gicc->spe_interrupt;
+ if (!gsi)
+ return;
+ hetid = find_acpi_cpu_topology_hetero_id(cpu);
+ first = false;
+ } else if ((gsi != gicc->spe_interrupt) ||
+ (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
+ pr_warn("ACPI: SPE must be homogeneous\n");
+ return;
+ }
+ }
+
+ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
+ ACPI_ACTIVE_HIGH);
+ if (irq < 0) {
+ pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
+ return;
+ }
+
+ spe_resources[0].start = irq;
+ ret = platform_device_register(&spe_dev);
+ if (ret < 0) {
+ pr_warn("ACPI: SPE: Unable to register device\n");
+ acpi_unregister_gsi(gsi);
+ }
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
+
static int arm_pmu_acpi_parse_irqs(void)
{
int irq, cpu, irq_cpu, err;
@@ -276,6 +346,8 @@ static int arm_pmu_acpi_init(void)
if (acpi_disabled)
return 0;
+ arm_spe_acpi_register_device();
+
ret = arm_pmu_acpi_parse_irqs();
if (ret)
return ret;
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 49b490925255..4e4984a55cd1 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -27,6 +27,7 @@
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/slab.h>
@@ -1157,7 +1158,13 @@ static const struct of_device_id arm_spe_pmu_of_match[] = {
};
MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
-static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
+static const struct platform_device_id arm_spe_match[] = {
+ { ARMV8_SPE_PDEV_NAME, 0},
+ { }
+};
+MODULE_DEVICE_TABLE(platform, arm_spe_match);
+
+static int arm_spe_pmu_device_probe(struct platform_device *pdev)
{
int ret;
struct arm_spe_pmu *spe_pmu;
@@ -1217,11 +1224,12 @@ static int arm_spe_pmu_device_remove(struct platform_device *pdev)
}
static struct platform_driver arm_spe_pmu_driver = {
+ .id_table = arm_spe_match,
.driver = {
.name = DRVNAME,
.of_match_table = of_match_ptr(arm_spe_pmu_of_match),
},
- .probe = arm_spe_pmu_device_dt_probe,
+ .probe = arm_spe_pmu_device_probe,
.remove = arm_spe_pmu_device_remove,
};
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
new file mode 100644
index 000000000000..63fe21600072
--- /dev/null
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2017 NXP
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#define COUNTER_CNTL 0x0
+#define COUNTER_READ 0x20
+
+#define COUNTER_DPCR1 0x30
+
+#define CNTL_OVER 0x1
+#define CNTL_CLEAR 0x2
+#define CNTL_EN 0x4
+#define CNTL_EN_MASK 0xFFFFFFFB
+#define CNTL_CLEAR_MASK 0xFFFFFFFD
+#define CNTL_OVER_MASK 0xFFFFFFFE
+
+#define CNTL_CSV_SHIFT 24
+#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
+
+#define EVENT_CYCLES_ID 0
+#define EVENT_CYCLES_COUNTER 0
+#define NUM_COUNTERS 4
+
+#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME "imx8_ddr"
+#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+ { .compatible = "fsl,imx8-ddr-pmu",},
+ { .compatible = "fsl,imx8m-ddr-pmu",},
+ { /* sentinel */ }
+};
+
+struct ddr_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct hlist_node node;
+ struct device *dev;
+ struct perf_event *events[NUM_COUNTERS];
+ int active_events;
+ enum cpuhp_state cpuhp_state;
+ int irq;
+ int id;
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+ &ddr_perf_cpumask_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_cpumask_attr_group = {
+ .attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+ IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
+ IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
+ IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
+ IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
+ IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
+ IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
+ IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
+ IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
+ IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
+ IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
+ IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
+ IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
+ IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
+ IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
+ IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
+ IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
+ IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
+ IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
+ IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
+ IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
+ IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
+ IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
+ IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+ NULL,
+};
+
+static struct attribute_group ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_format_attr_group = {
+ .name = "format",
+ .attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &ddr_perf_events_attr_group,
+ &ddr_perf_format_attr_group,
+ &ddr_perf_cpumask_attr_group,
+ NULL,
+};
+
+static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
+{
+ int i;
+
+ /*
+ * Always map cycle event to counter 0
+ * Cycles counter is dedicated for cycle event
+ * can't used for the other events
+ */
+ if (event == EVENT_CYCLES_ID) {
+ if (pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ return EVENT_CYCLES_COUNTER;
+ else
+ return -ENOENT;
+ }
+
+ for (i = 1; i < NUM_COUNTERS; i++) {
+ if (pmu->events[i] == NULL)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
+{
+ pmu->events[counter] = NULL;
+}
+
+static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+ return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (event->cpu < 0) {
+ dev_warn(pmu->dev, "Can't provide per-task data!\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * We must NOT create groups containing mixed PMUs, although software
+ * events are acceptable (for example to create a CCN group
+ * periodically read when a hrtimer aka cpu-clock leader triggers).
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+
+ return 0;
+}
+
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_raw_count, new_raw_count;
+ int counter = hwc->idx;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = ddr_perf_read_counter(pmu, counter);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
+
+ local64_add(delta, &event->count);
+}
+
+static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
+ int counter, bool enable)
+{
+ u8 reg = counter * 4 + COUNTER_CNTL;
+ int val;
+
+ if (enable) {
+ /*
+ * must disable first, then enable again
+ * otherwise, cycle counter will not work
+ * if previous state is enabled.
+ */
+ writel(0, pmu->base + reg);
+ val = CNTL_EN | CNTL_CLEAR;
+ val |= FIELD_PREP(CNTL_CSV_MASK, config);
+ writel(val, pmu->base + reg);
+ } else {
+ /* Disable counter */
+ writel(0, pmu->base + reg);
+ }
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ local64_set(&hwc->prev_count, 0);
+
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+
+ hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter;
+ int cfg = event->attr.config;
+
+ counter = ddr_perf_alloc_counter(pmu, cfg);
+ if (counter < 0) {
+ dev_dbg(pmu->dev, "There are not enough counters\n");
+ return -EOPNOTSUPP;
+ }
+
+ pmu->events[counter] = event;
+ pmu->active_events++;
+ hwc->idx = counter;
+
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ ddr_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
+ ddr_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+ ddr_perf_free_counter(pmu, counter);
+ pmu->active_events--;
+ hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ /* enable cycle counter if cycle is not active event list */
+ if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ ddr_perf_counter_enable(ddr_pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ ddr_perf_counter_enable(ddr_pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ false);
+}
+
+static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+ struct device *dev)
+{
+ *pmu = (struct ddr_pmu) {
+ .pmu = (struct pmu) {
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = attr_groups,
+ .event_init = ddr_perf_event_init,
+ .add = ddr_perf_event_add,
+ .del = ddr_perf_event_del,
+ .start = ddr_perf_event_start,
+ .stop = ddr_perf_event_stop,
+ .read = ddr_perf_event_update,
+ .pmu_enable = ddr_perf_pmu_enable,
+ .pmu_disable = ddr_perf_pmu_disable,
+ },
+ .base = base,
+ .dev = dev,
+ };
+
+ pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+ return pmu->id;
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+ int i;
+ struct ddr_pmu *pmu = (struct ddr_pmu *) p;
+ struct perf_event *event, *cycle_event = NULL;
+
+ /* all counter will stop if cycle counter disabled */
+ ddr_perf_counter_enable(pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ false);
+ /*
+ * When the cycle counter overflows, all counters are stopped,
+ * and an IRQ is raised. If any other counter overflows, it
+ * continues counting, and no IRQ is raised.
+ *
+ * Cycles occur at least 4 times as often as other events, so we
+ * can update all events on a cycle counter overflow and not
+ * lose events.
+ *
+ */
+ for (i = 0; i < NUM_COUNTERS; i++) {
+
+ if (!pmu->events[i])
+ continue;
+
+ event = pmu->events[i];
+
+ ddr_perf_event_update(event);
+
+ if (event->hw.idx == EVENT_CYCLES_COUNTER)
+ cycle_event = event;
+ }
+
+ ddr_perf_counter_enable(pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ true);
+ if (cycle_event)
+ ddr_perf_event_update(cycle_event);
+
+ return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+ int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+
+ WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)));
+
+ return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu;
+ struct device_node *np;
+ void __iomem *base;
+ char *name;
+ int num;
+ int ret;
+ int irq;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ np = pdev->dev.of_node;
+
+ pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ num = ddr_perf_init(pmu, base, &pdev->dev);
+
+ platform_set_drvdata(pdev, pmu);
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
+ num);
+ if (!name)
+ return -ENOMEM;
+
+ pmu->cpu = raw_smp_processor_id();
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ DDR_CPUHP_CB_NAME,
+ NULL,
+ ddr_perf_offline_cpu);
+
+ if (ret < 0) {
+ dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
+ goto ddr_perf_err;
+ }
+
+ pmu->cpuhp_state = ret;
+
+ /* Register the pmu instance for cpu hotplug */
+ cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+ /* Request irq */
+ irq = of_irq_get(np, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "Failed to get irq: %d", irq);
+ ret = irq;
+ goto ddr_perf_err;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq,
+ ddr_perf_irq_handler,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ DDR_CPUHP_CB_NAME,
+ pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Request irq failed: %d", ret);
+ goto ddr_perf_err;
+ }
+
+ pmu->irq = irq;
+ ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu));
+ if (ret) {
+ dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
+ goto ddr_perf_err;
+ }
+
+ ret = perf_pmu_register(&pmu->pmu, name, -1);
+ if (ret)
+ goto ddr_perf_err;
+
+ return 0;
+
+ddr_perf_err:
+ if (pmu->cpuhp_state)
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+ ida_simple_remove(&ddr_ida, pmu->id);
+ dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
+ return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ irq_set_affinity_hint(pmu->irq, NULL);
+
+ perf_pmu_unregister(&pmu->pmu);
+
+ ida_simple_remove(&ddr_ida, pmu->id);
+ return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+ .driver = {
+ .name = "imx-ddr-pmu",
+ .of_match_table = imx_ddr_pmu_dt_ids,
+ },
+ .probe = ddr_perf_probe,
+ .remove = ddr_perf_remove,
+};
+
+module_platform_driver(imx_ddr_pmu_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index f888117b0efc..8692f6b79f93 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -166,12 +166,15 @@ struct rapl_domain {
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
+/* maximum rapl package domain name: package-%d-die-%d */
+#define PACKAGE_DOMAIN_NAME_LENGTH 30
-/* Each physical package contains multiple domains, these are the common
+
+/* Each rapl package contains multiple domains, these are the common
* data across RAPL domains within a package.
*/
struct rapl_package {
- unsigned int id; /* physical package/socket id */
+ unsigned int id; /* logical die id, equals physical 1-die systems */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit;
@@ -186,6 +189,7 @@ struct rapl_package {
int lead_cpu; /* one active cpu per package for access */
/* Track active cpus */
struct cpumask cpumask;
+ char name[PACKAGE_DOMAIN_NAME_LENGTH];
};
struct rapl_defaults {
@@ -252,8 +256,9 @@ static struct powercap_control_type *control_type; /* PowerCap Controller */
static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
/* caller to ensure CPU hotplug lock is held */
-static struct rapl_package *find_package_by_id(int id)
+static struct rapl_package *rapl_find_package_domain(int cpu)
{
+ int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
list_for_each_entry(rp, &rapl_packages, plist) {
@@ -913,8 +918,8 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
- pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n",
- rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
+ pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
+ rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
return 0;
}
@@ -938,8 +943,8 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
- pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n",
- rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
+ pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
+ rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
return 0;
}
@@ -1168,7 +1173,7 @@ static void rapl_update_domain_data(struct rapl_package *rp)
u64 val;
for (dmn = 0; dmn < rp->nr_domains; dmn++) {
- pr_debug("update package %d domain %s data\n", rp->id,
+ pr_debug("update %s domain %s data\n", rp->name,
rp->domains[dmn].name);
/* exclude non-raw primitives */
for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
@@ -1193,7 +1198,6 @@ static void rapl_unregister_powercap(void)
static int rapl_package_register_powercap(struct rapl_package *rp)
{
struct rapl_domain *rd;
- char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/
struct powercap_zone *power_zone = NULL;
int nr_pl, ret;
@@ -1204,20 +1208,16 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
if (rd->id == RAPL_DOMAIN_PACKAGE) {
nr_pl = find_nr_power_limit(rd);
- pr_debug("register socket %d package domain %s\n",
- rp->id, rd->name);
- memset(dev_name, 0, sizeof(dev_name));
- snprintf(dev_name, sizeof(dev_name), "%s-%d",
- rd->name, rp->id);
+ pr_debug("register package domain %s\n", rp->name);
power_zone = powercap_register_zone(&rd->power_zone,
control_type,
- dev_name, NULL,
+ rp->name, NULL,
&zone_ops[rd->id],
nr_pl,
&constraint_ops);
if (IS_ERR(power_zone)) {
- pr_debug("failed to register package, %d\n",
- rp->id);
+ pr_debug("failed to register power zone %s\n",
+ rp->name);
return PTR_ERR(power_zone);
}
/* track parent zone in per package/socket data */
@@ -1243,8 +1243,8 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
&constraint_ops);
if (IS_ERR(power_zone)) {
- pr_debug("failed to register power_zone, %d:%s:%s\n",
- rp->id, rd->name, dev_name);
+ pr_debug("failed to register power_zone, %s:%s\n",
+ rp->name, rd->name);
ret = PTR_ERR(power_zone);
goto err_cleanup;
}
@@ -1257,7 +1257,7 @@ err_cleanup:
* failed after the first domain setup.
*/
while (--rd >= rp->domains) {
- pr_debug("unregister package %d domain %s\n", rp->id, rd->name);
+ pr_debug("unregister %s domain %s\n", rp->name, rd->name);
powercap_unregister_zone(control_type, &rd->power_zone);
}
@@ -1288,7 +1288,7 @@ static int __init rapl_register_psys(void)
rd->rpl[0].name = pl1_name;
rd->rpl[1].prim_id = PL2_ENABLE;
rd->rpl[1].name = pl2_name;
- rd->rp = find_package_by_id(0);
+ rd->rp = rapl_find_package_domain(0);
power_zone = powercap_register_zone(&rd->power_zone, control_type,
"psys", NULL,
@@ -1367,8 +1367,8 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
if (val64) {
- pr_info("RAPL package %d domain %s locked by BIOS\n",
- rd->rp->id, rd->name);
+ pr_info("RAPL %s domain %s locked by BIOS\n",
+ rd->rp->name, rd->name);
rd->state |= DOMAIN_STATE_BIOS_LOCKED;
}
}
@@ -1397,10 +1397,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
}
rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
if (!rp->nr_domains) {
- pr_debug("no valid rapl domains found in package %d\n", rp->id);
+ pr_debug("no valid rapl domains found in %s\n", rp->name);
return -ENODEV;
}
- pr_debug("found %d domains on package %d\n", rp->nr_domains, rp->id);
+ pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
GFP_KERNEL);
@@ -1433,8 +1433,8 @@ static void rapl_remove_package(struct rapl_package *rp)
rd_package = rd;
continue;
}
- pr_debug("remove package, undo power limit on %d: %s\n",
- rp->id, rd->name);
+ pr_debug("remove package, undo power limit on %s: %s\n",
+ rp->name, rd->name);
powercap_unregister_zone(control_type, &rd->power_zone);
}
/* do parent zone last */
@@ -1444,9 +1444,11 @@ static void rapl_remove_package(struct rapl_package *rp)
}
/* called from CPU hotplug notifier, hotplug lock held */
-static struct rapl_package *rapl_add_package(int cpu, int pkgid)
+static struct rapl_package *rapl_add_package(int cpu)
{
+ int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
int ret;
rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
@@ -1454,9 +1456,16 @@ static struct rapl_package *rapl_add_package(int cpu, int pkgid)
return ERR_PTR(-ENOMEM);
/* add the new package to the list */
- rp->id = pkgid;
+ rp->id = id;
rp->lead_cpu = cpu;
+ if (topology_max_die_per_package() > 1)
+ snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
+ "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id);
+ else
+ snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
+ c->phys_proc_id);
+
/* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) ||
rapl_defaults->check_unit(rp, cpu)) {
@@ -1485,12 +1494,11 @@ err_free_package:
*/
static int rapl_cpu_online(unsigned int cpu)
{
- int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
- rp = find_package_by_id(pkgid);
+ rp = rapl_find_package_domain(cpu);
if (!rp) {
- rp = rapl_add_package(cpu, pkgid);
+ rp = rapl_add_package(cpu);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -1500,11 +1508,10 @@ static int rapl_cpu_online(unsigned int cpu)
static int rapl_cpu_down_prep(unsigned int cpu)
{
- int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
int lead_cpu;
- rp = find_package_by_id(pkgid);
+ rp = rapl_find_package_domain(cpu);
if (!rp)
return 0;
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index dff5a93f7daa..a7e57516959e 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -401,6 +401,17 @@ config PWM_SAMSUNG
To compile this driver as a module, choose M here: the module
will be called pwm-samsung.
+config PWM_SIFIVE
+ tristate "SiFive PWM support"
+ depends on OF
+ depends on COMMON_CLK
+ depends on RISCV || COMPILE_TEST
+ help
+ Generic PWM framework driver for SiFive SoCs.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-sifive.
+
config PWM_SPEAR
tristate "STMicroelectronics SPEAr PWM support"
depends on PLAT_SPEAR
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c368599d36c0..76b555b51887 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_PWM_RCAR) += pwm-rcar.o
obj-$(CONFIG_PWM_RENESAS_TPU) += pwm-renesas-tpu.o
obj-$(CONFIG_PWM_ROCKCHIP) += pwm-rockchip.o
obj-$(CONFIG_PWM_SAMSUNG) += pwm-samsung.o
+obj-$(CONFIG_PWM_SIFIVE) += pwm-sifive.o
obj-$(CONFIG_PWM_SPEAR) += pwm-spear.o
obj-$(CONFIG_PWM_STI) += pwm-sti.o
obj-$(CONFIG_PWM_STM32) += pwm-stm32.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 275b5f399a1a..c3ab07ab31a9 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -6,6 +6,7 @@
* Copyright (C) 2011-2012 Avionic Design GmbH
*/
+#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/pwm.h>
#include <linux/radix-tree.h>
@@ -626,8 +627,35 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np)
return ERR_PTR(-EPROBE_DEFER);
}
+static struct device_link *pwm_device_link_add(struct device *dev,
+ struct pwm_device *pwm)
+{
+ struct device_link *dl;
+
+ if (!dev) {
+ /*
+ * No device for the PWM consumer has been provided. It may
+ * impact the PM sequence ordering: the PWM supplier may get
+ * suspended before the consumer.
+ */
+ dev_warn(pwm->chip->dev,
+ "No consumer device specified to create a link to\n");
+ return NULL;
+ }
+
+ dl = device_link_add(dev, pwm->chip->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+ if (!dl) {
+ dev_err(dev, "failed to create device link to %s\n",
+ dev_name(pwm->chip->dev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ return dl;
+}
+
/**
* of_pwm_get() - request a PWM via the PWM framework
+ * @dev: device for PWM consumer
* @np: device node to get the PWM from
* @con_id: consumer name
*
@@ -645,10 +673,12 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np)
* Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded
* error code on failure.
*/
-struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
+struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
+ const char *con_id)
{
struct pwm_device *pwm = NULL;
struct of_phandle_args args;
+ struct device_link *dl;
struct pwm_chip *pc;
int index = 0;
int err;
@@ -679,6 +709,14 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
if (IS_ERR(pwm))
goto put;
+ dl = pwm_device_link_add(dev, pwm);
+ if (IS_ERR(dl)) {
+ /* of_xlate ended up calling pwm_request_from_chip() */
+ pwm_free(pwm);
+ pwm = ERR_CAST(dl);
+ goto put;
+ }
+
/*
* If a consumer name was not given, try to look it up from the
* "pwm-names" property if it exists. Otherwise use the name of
@@ -700,6 +738,85 @@ put:
}
EXPORT_SYMBOL_GPL(of_pwm_get);
+#if IS_ENABLED(CONFIG_ACPI)
+static struct pwm_chip *device_to_pwmchip(struct device *dev)
+{
+ struct pwm_chip *chip;
+
+ mutex_lock(&pwm_lock);
+
+ list_for_each_entry(chip, &pwm_chips, list) {
+ struct acpi_device *adev = ACPI_COMPANION(chip->dev);
+
+ if ((chip->dev == dev) || (adev && &adev->dev == dev)) {
+ mutex_unlock(&pwm_lock);
+ return chip;
+ }
+ }
+
+ mutex_unlock(&pwm_lock);
+
+ return ERR_PTR(-EPROBE_DEFER);
+}
+#endif
+
+/**
+ * acpi_pwm_get() - request a PWM via parsing "pwms" property in ACPI
+ * @fwnode: firmware node to get the "pwm" property from
+ *
+ * Returns the PWM device parsed from the fwnode and index specified in the
+ * "pwms" property or a negative error-code on failure.
+ * Values parsed from the device tree are stored in the returned PWM device
+ * object.
+ *
+ * This is analogous to of_pwm_get() except con_id is not yet supported.
+ * ACPI entries must look like
+ * Package () {"pwms", Package ()
+ * { <PWM device reference>, <PWM index>, <PWM period> [, <PWM flags>]}}
+ *
+ * Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded
+ * error code on failure.
+ */
+static struct pwm_device *acpi_pwm_get(struct fwnode_handle *fwnode)
+{
+ struct pwm_device *pwm = ERR_PTR(-ENODEV);
+#if IS_ENABLED(CONFIG_ACPI)
+ struct fwnode_reference_args args;
+ struct acpi_device *acpi;
+ struct pwm_chip *chip;
+ int ret;
+
+ memset(&args, 0, sizeof(args));
+
+ ret = __acpi_node_get_property_reference(fwnode, "pwms", 0, 3, &args);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ acpi = to_acpi_device_node(args.fwnode);
+ if (!acpi)
+ return ERR_PTR(-EINVAL);
+
+ if (args.nargs < 2)
+ return ERR_PTR(-EPROTO);
+
+ chip = device_to_pwmchip(&acpi->dev);
+ if (IS_ERR(chip))
+ return ERR_CAST(chip);
+
+ pwm = pwm_request_from_chip(chip, args.args[0], NULL);
+ if (IS_ERR(pwm))
+ return pwm;
+
+ pwm->args.period = args.args[1];
+ pwm->args.polarity = PWM_POLARITY_NORMAL;
+
+ if (args.nargs > 2 && args.args[2] & PWM_POLARITY_INVERTED)
+ pwm->args.polarity = PWM_POLARITY_INVERSED;
+#endif
+
+ return pwm;
+}
+
/**
* pwm_add_table() - register PWM device consumers
* @table: array of consumers to register
@@ -754,6 +871,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
const char *dev_id = dev ? dev_name(dev) : NULL;
struct pwm_device *pwm;
struct pwm_chip *chip;
+ struct device_link *dl;
unsigned int best = 0;
struct pwm_lookup *p, *chosen = NULL;
unsigned int match;
@@ -761,7 +879,11 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
/* look up via DT first */
if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
- return of_pwm_get(dev->of_node, con_id);
+ return of_pwm_get(dev, dev->of_node, con_id);
+
+ /* then lookup via ACPI */
+ if (dev && is_acpi_node(dev->fwnode))
+ return acpi_pwm_get(dev->fwnode);
/*
* We look up the provider in the static table typically provided by
@@ -838,6 +960,12 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
if (IS_ERR(pwm))
return pwm;
+ dl = pwm_device_link_add(dev, pwm);
+ if (IS_ERR(dl)) {
+ pwm_free(pwm);
+ return ERR_CAST(dl);
+ }
+
pwm->args.period = chosen->period;
pwm->args.polarity = chosen->polarity;
@@ -930,7 +1058,7 @@ struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
if (!ptr)
return ERR_PTR(-ENOMEM);
- pwm = of_pwm_get(np, con_id);
+ pwm = of_pwm_get(dev, np, con_id);
if (!IS_ERR(pwm)) {
*ptr = pwm;
devres_add(dev, ptr);
@@ -942,6 +1070,44 @@ struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
}
EXPORT_SYMBOL_GPL(devm_of_pwm_get);
+/**
+ * devm_fwnode_pwm_get() - request a resource managed PWM from firmware node
+ * @dev: device for PWM consumer
+ * @fwnode: firmware node to get the PWM from
+ * @con_id: consumer name
+ *
+ * Returns the PWM device parsed from the firmware node. See of_pwm_get() and
+ * acpi_pwm_get() for a detailed description.
+ *
+ * Returns: A pointer to the requested PWM device or an ERR_PTR()-encoded
+ * error code on failure.
+ */
+struct pwm_device *devm_fwnode_pwm_get(struct device *dev,
+ struct fwnode_handle *fwnode,
+ const char *con_id)
+{
+ struct pwm_device **ptr, *pwm = ERR_PTR(-ENODEV);
+
+ ptr = devres_alloc(devm_pwm_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return ERR_PTR(-ENOMEM);
+
+ if (is_of_node(fwnode))
+ pwm = of_pwm_get(dev, to_of_node(fwnode), con_id);
+ else if (is_acpi_node(fwnode))
+ pwm = acpi_pwm_get(fwnode);
+
+ if (!IS_ERR(pwm)) {
+ *ptr = pwm;
+ devres_add(dev, ptr);
+ } else {
+ devres_free(ptr);
+ }
+
+ return pwm;
+}
+EXPORT_SYMBOL_GPL(devm_fwnode_pwm_get);
+
static int devm_pwm_match(struct device *dev, void *res, void *data)
{
struct pwm_device **p = res;
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index 7186db85b15f..d13a83f430ac 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -235,6 +235,7 @@ static const struct of_device_id atmel_hlcdc_dt_ids[] = {
.compatible = "atmel,sama5d4-hlcdc",
.data = &atmel_hlcdc_pwm_sama5d3_errata,
},
+ { .compatible = "microchip,sam9x60-hlcdc", },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, atmel_hlcdc_dt_ids);
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
index 5652f461d994..f6fe0b922e1e 100644
--- a/drivers/pwm/pwm-bcm2835.c
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -70,7 +70,7 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
return -EINVAL;
}
- scaler = NSEC_PER_SEC / rate;
+ scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate);
if (period_ns <= MIN_PERIOD) {
dev_err(pc->dev, "period %d not supported, minimum %d\n",
@@ -78,8 +78,10 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
return -EINVAL;
}
- writel(duty_ns / scaler, pc->base + DUTY(pwm->hwpwm));
- writel(period_ns / scaler, pc->base + PERIOD(pwm->hwpwm));
+ writel(DIV_ROUND_CLOSEST(duty_ns, scaler),
+ pc->base + DUTY(pwm->hwpwm));
+ writel(DIV_ROUND_CLOSEST(period_ns, scaler),
+ pc->base + PERIOD(pwm->hwpwm));
return 0;
}
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index a39b48839df7..9d31a217111d 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -34,17 +34,19 @@ struct fsl_ftm_soc {
bool has_enable_bits;
};
+struct fsl_pwm_periodcfg {
+ enum fsl_pwm_clk clk_select;
+ unsigned int clk_ps;
+ unsigned int mod_period;
+};
+
struct fsl_pwm_chip {
struct pwm_chip chip;
-
struct mutex lock;
-
- unsigned int cnt_select;
- unsigned int clk_ps;
-
struct regmap *regmap;
- int period_ns;
+ /* This value is valid iff a pwm is running */
+ struct fsl_pwm_periodcfg period;
struct clk *ipg_clk;
struct clk *clk[FSL_PWM_CLK_MAX];
@@ -57,6 +59,33 @@ static inline struct fsl_pwm_chip *to_fsl_chip(struct pwm_chip *chip)
return container_of(chip, struct fsl_pwm_chip, chip);
}
+static void ftm_clear_write_protection(struct fsl_pwm_chip *fpc)
+{
+ u32 val;
+
+ regmap_read(fpc->regmap, FTM_FMS, &val);
+ if (val & FTM_FMS_WPEN)
+ regmap_update_bits(fpc->regmap, FTM_MODE, FTM_MODE_WPDIS,
+ FTM_MODE_WPDIS);
+}
+
+static void ftm_set_write_protection(struct fsl_pwm_chip *fpc)
+{
+ regmap_update_bits(fpc->regmap, FTM_FMS, FTM_FMS_WPEN, FTM_FMS_WPEN);
+}
+
+static bool fsl_pwm_periodcfg_are_equal(const struct fsl_pwm_periodcfg *a,
+ const struct fsl_pwm_periodcfg *b)
+{
+ if (a->clk_select != b->clk_select)
+ return false;
+ if (a->clk_ps != b->clk_ps)
+ return false;
+ if (a->mod_period != b->mod_period)
+ return false;
+ return true;
+}
+
static int fsl_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
{
int ret;
@@ -87,89 +116,58 @@ static void fsl_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
clk_disable_unprepare(fpc->ipg_clk);
}
-static int fsl_pwm_calculate_default_ps(struct fsl_pwm_chip *fpc,
- enum fsl_pwm_clk index)
+static unsigned int fsl_pwm_ticks_to_ns(struct fsl_pwm_chip *fpc,
+ unsigned int ticks)
{
- unsigned long sys_rate, cnt_rate;
- unsigned long long ratio;
-
- sys_rate = clk_get_rate(fpc->clk[FSL_PWM_CLK_SYS]);
- if (!sys_rate)
- return -EINVAL;
-
- cnt_rate = clk_get_rate(fpc->clk[fpc->cnt_select]);
- if (!cnt_rate)
- return -EINVAL;
-
- switch (index) {
- case FSL_PWM_CLK_SYS:
- fpc->clk_ps = 1;
- break;
- case FSL_PWM_CLK_FIX:
- ratio = 2 * cnt_rate - 1;
- do_div(ratio, sys_rate);
- fpc->clk_ps = ratio;
- break;
- case FSL_PWM_CLK_EXT:
- ratio = 4 * cnt_rate - 1;
- do_div(ratio, sys_rate);
- fpc->clk_ps = ratio;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
+ unsigned long rate;
+ unsigned long long exval;
+
+ rate = clk_get_rate(fpc->clk[fpc->period.clk_select]);
+ exval = ticks;
+ exval *= 1000000000UL;
+ do_div(exval, rate >> fpc->period.clk_ps);
+ return exval;
}
-static unsigned long fsl_pwm_calculate_cycles(struct fsl_pwm_chip *fpc,
- unsigned long period_ns)
+static bool fsl_pwm_calculate_period_clk(struct fsl_pwm_chip *fpc,
+ unsigned int period_ns,
+ enum fsl_pwm_clk index,
+ struct fsl_pwm_periodcfg *periodcfg
+ )
{
- unsigned long long c, c0;
+ unsigned long long c;
+ unsigned int ps;
- c = clk_get_rate(fpc->clk[fpc->cnt_select]);
+ c = clk_get_rate(fpc->clk[index]);
c = c * period_ns;
do_div(c, 1000000000UL);
- do {
- c0 = c;
- do_div(c0, (1 << fpc->clk_ps));
- if (c0 <= 0xFFFF)
- return (unsigned long)c0;
- } while (++fpc->clk_ps < 8);
-
- return 0;
-}
-
-static unsigned long fsl_pwm_calculate_period_cycles(struct fsl_pwm_chip *fpc,
- unsigned long period_ns,
- enum fsl_pwm_clk index)
-{
- int ret;
+ if (c == 0)
+ return false;
- ret = fsl_pwm_calculate_default_ps(fpc, index);
- if (ret) {
- dev_err(fpc->chip.dev,
- "failed to calculate default prescaler: %d\n",
- ret);
- return 0;
+ for (ps = 0; ps < 8 ; ++ps, c >>= 1) {
+ if (c <= 0x10000) {
+ periodcfg->clk_select = index;
+ periodcfg->clk_ps = ps;
+ periodcfg->mod_period = c - 1;
+ return true;
+ }
}
-
- return fsl_pwm_calculate_cycles(fpc, period_ns);
+ return false;
}
-static unsigned long fsl_pwm_calculate_period(struct fsl_pwm_chip *fpc,
- unsigned long period_ns)
+static bool fsl_pwm_calculate_period(struct fsl_pwm_chip *fpc,
+ unsigned int period_ns,
+ struct fsl_pwm_periodcfg *periodcfg)
{
enum fsl_pwm_clk m0, m1;
- unsigned long fix_rate, ext_rate, cycles;
+ unsigned long fix_rate, ext_rate;
+ bool ret;
- cycles = fsl_pwm_calculate_period_cycles(fpc, period_ns,
- FSL_PWM_CLK_SYS);
- if (cycles) {
- fpc->cnt_select = FSL_PWM_CLK_SYS;
- return cycles;
- }
+ ret = fsl_pwm_calculate_period_clk(fpc, period_ns, FSL_PWM_CLK_SYS,
+ periodcfg);
+ if (ret)
+ return true;
fix_rate = clk_get_rate(fpc->clk[FSL_PWM_CLK_FIX]);
ext_rate = clk_get_rate(fpc->clk[FSL_PWM_CLK_EXT]);
@@ -182,158 +180,185 @@ static unsigned long fsl_pwm_calculate_period(struct fsl_pwm_chip *fpc,
m1 = FSL_PWM_CLK_FIX;
}
- cycles = fsl_pwm_calculate_period_cycles(fpc, period_ns, m0);
- if (cycles) {
- fpc->cnt_select = m0;
- return cycles;
- }
-
- fpc->cnt_select = m1;
+ ret = fsl_pwm_calculate_period_clk(fpc, period_ns, m0, periodcfg);
+ if (ret)
+ return true;
- return fsl_pwm_calculate_period_cycles(fpc, period_ns, m1);
+ return fsl_pwm_calculate_period_clk(fpc, period_ns, m1, periodcfg);
}
-static unsigned long fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc,
- unsigned long period_ns,
- unsigned long duty_ns)
+static unsigned int fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc,
+ unsigned int duty_ns)
{
unsigned long long duty;
- u32 val;
- regmap_read(fpc->regmap, FTM_MOD, &val);
- duty = (unsigned long long)duty_ns * (val + 1);
+ unsigned int period = fpc->period.mod_period + 1;
+ unsigned int period_ns = fsl_pwm_ticks_to_ns(fpc, period);
+
+ duty = (unsigned long long)duty_ns * period;
do_div(duty, period_ns);
- return (unsigned long)duty;
+ return (unsigned int)duty;
}
-static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+static bool fsl_pwm_is_any_pwm_enabled(struct fsl_pwm_chip *fpc,
+ struct pwm_device *pwm)
{
- struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- u32 period, duty;
+ u32 val;
- mutex_lock(&fpc->lock);
+ regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+ if (~val & 0xFF)
+ return true;
+ else
+ return false;
+}
+
+static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc,
+ struct pwm_device *pwm)
+{
+ u32 val;
+ regmap_read(fpc->regmap, FTM_OUTMASK, &val);
+ if (~(val | BIT(pwm->hwpwm)) & 0xFF)
+ return true;
+ else
+ return false;
+}
+
+static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
+ struct pwm_device *pwm,
+ struct pwm_state *newstate)
+{
+ unsigned int duty;
+ u32 reg_polarity;
+
+ struct fsl_pwm_periodcfg periodcfg;
+ bool do_write_period = false;
+
+ if (!fsl_pwm_calculate_period(fpc, newstate->period, &periodcfg)) {
+ dev_err(fpc->chip.dev, "failed to calculate new period\n");
+ return -EINVAL;
+ }
+
+ if (!fsl_pwm_is_any_pwm_enabled(fpc, pwm))
+ do_write_period = true;
/*
* The Freescale FTM controller supports only a single period for
- * all PWM channels, therefore incompatible changes need to be
- * refused.
+ * all PWM channels, therefore verify if the newly computed period
+ * is different than the current period being used. In such case
+ * we allow to change the period only if no other pwm is running.
*/
- if (fpc->period_ns && fpc->period_ns != period_ns) {
- dev_err(fpc->chip.dev,
- "conflicting period requested for PWM %u\n",
- pwm->hwpwm);
- mutex_unlock(&fpc->lock);
- return -EBUSY;
+ else if (!fsl_pwm_periodcfg_are_equal(&fpc->period, &periodcfg)) {
+ if (fsl_pwm_is_other_pwm_enabled(fpc, pwm)) {
+ dev_err(fpc->chip.dev,
+ "Cannot change period for PWM %u, disable other PWMs first\n",
+ pwm->hwpwm);
+ return -EBUSY;
+ }
+ if (fpc->period.clk_select != periodcfg.clk_select) {
+ int ret;
+ enum fsl_pwm_clk oldclk = fpc->period.clk_select;
+ enum fsl_pwm_clk newclk = periodcfg.clk_select;
+
+ ret = clk_prepare_enable(fpc->clk[newclk]);
+ if (ret)
+ return ret;
+ clk_disable_unprepare(fpc->clk[oldclk]);
+ }
+ do_write_period = true;
}
- if (!fpc->period_ns && duty_ns) {
- period = fsl_pwm_calculate_period(fpc, period_ns);
- if (!period) {
- dev_err(fpc->chip.dev, "failed to calculate period\n");
- mutex_unlock(&fpc->lock);
- return -EINVAL;
- }
+ ftm_clear_write_protection(fpc);
+ if (do_write_period) {
+ regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK,
+ FTM_SC_CLK(periodcfg.clk_select));
regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_PS_MASK,
- fpc->clk_ps);
- regmap_write(fpc->regmap, FTM_MOD, period - 1);
+ periodcfg.clk_ps);
+ regmap_write(fpc->regmap, FTM_MOD, periodcfg.mod_period);
- fpc->period_ns = period_ns;
+ fpc->period = periodcfg;
}
- mutex_unlock(&fpc->lock);
-
- duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns);
+ duty = fsl_pwm_calculate_duty(fpc, newstate->duty_cycle);
regmap_write(fpc->regmap, FTM_CSC(pwm->hwpwm),
FTM_CSC_MSB | FTM_CSC_ELSB);
regmap_write(fpc->regmap, FTM_CV(pwm->hwpwm), duty);
- return 0;
-}
-
-static int fsl_pwm_set_polarity(struct pwm_chip *chip,
- struct pwm_device *pwm,
- enum pwm_polarity polarity)
-{
- struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- u32 val;
+ reg_polarity = 0;
+ if (newstate->polarity == PWM_POLARITY_INVERSED)
+ reg_polarity = BIT(pwm->hwpwm);
- regmap_read(fpc->regmap, FTM_POL, &val);
+ regmap_update_bits(fpc->regmap, FTM_POL, BIT(pwm->hwpwm), reg_polarity);
- if (polarity == PWM_POLARITY_INVERSED)
- val |= BIT(pwm->hwpwm);
- else
- val &= ~BIT(pwm->hwpwm);
+ newstate->period = fsl_pwm_ticks_to_ns(fpc,
+ fpc->period.mod_period + 1);
+ newstate->duty_cycle = fsl_pwm_ticks_to_ns(fpc, duty);
- regmap_write(fpc->regmap, FTM_POL, val);
+ ftm_set_write_protection(fpc);
return 0;
}
-static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
-{
- int ret;
-
- /* select counter clock source */
- regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK,
- FTM_SC_CLK(fpc->cnt_select));
-
- ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]);
- if (ret)
- return ret;
-
- ret = clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]);
- if (ret) {
- clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
- return ret;
- }
-
- return 0;
-}
-
-static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *newstate)
{
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- int ret;
-
- mutex_lock(&fpc->lock);
- regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), 0);
+ struct pwm_state *oldstate = &pwm->state;
+ int ret = 0;
- ret = fsl_counter_clock_enable(fpc);
- mutex_unlock(&fpc->lock);
+ /*
+ * oldstate to newstate : action
+ *
+ * disabled to disabled : ignore
+ * enabled to disabled : disable
+ * enabled to enabled : update settings
+ * disabled to enabled : update settings + enable
+ */
- return ret;
-}
+ mutex_lock(&fpc->lock);
-static void fsl_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
- u32 val;
+ if (!newstate->enabled) {
+ if (oldstate->enabled) {
+ regmap_update_bits(fpc->regmap, FTM_OUTMASK,
+ BIT(pwm->hwpwm), BIT(pwm->hwpwm));
+ clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
+ clk_disable_unprepare(fpc->clk[fpc->period.clk_select]);
+ }
- mutex_lock(&fpc->lock);
- regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm),
- BIT(pwm->hwpwm));
+ goto end_mutex;
+ }
- clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
- clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
+ ret = fsl_pwm_apply_config(fpc, pwm, newstate);
+ if (ret)
+ goto end_mutex;
+
+ /* check if need to enable */
+ if (!oldstate->enabled) {
+ ret = clk_prepare_enable(fpc->clk[fpc->period.clk_select]);
+ if (ret)
+ goto end_mutex;
+
+ ret = clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]);
+ if (ret) {
+ clk_disable_unprepare(fpc->clk[fpc->period.clk_select]);
+ goto end_mutex;
+ }
- regmap_read(fpc->regmap, FTM_OUTMASK, &val);
- if ((val & 0xFF) == 0xFF)
- fpc->period_ns = 0;
+ regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm),
+ 0);
+ }
+end_mutex:
mutex_unlock(&fpc->lock);
+ return ret;
}
static const struct pwm_ops fsl_pwm_ops = {
.request = fsl_pwm_request,
.free = fsl_pwm_free,
- .config = fsl_pwm_config,
- .set_polarity = fsl_pwm_set_polarity,
- .enable = fsl_pwm_enable,
- .disable = fsl_pwm_disable,
+ .apply = fsl_pwm_apply,
.owner = THIS_MODULE,
};
@@ -357,6 +382,8 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
static bool fsl_pwm_volatile_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
+ case FTM_FMS:
+ case FTM_MODE:
case FTM_CNT:
return true;
}
@@ -474,7 +501,7 @@ static int fsl_pwm_suspend(struct device *dev)
continue;
clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
- clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
+ clk_disable_unprepare(fpc->clk[fpc->period.clk_select]);
}
return 0;
@@ -496,7 +523,7 @@ static int fsl_pwm_resume(struct device *dev)
if (!pwm_is_enabled(pwm))
continue;
- clk_prepare_enable(fpc->clk[fpc->cnt_select]);
+ clk_prepare_enable(fpc->clk[fpc->period.clk_select]);
clk_prepare_enable(fpc->clk[FSL_PWM_CLK_CNTEN]);
}
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index 88a51a40e695..f901e8a0d33d 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -63,7 +63,15 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
{
uint32_t ctrl = jz4740_timer_get_ctrl(pwm->hwpwm);
- /* Disable PWM output.
+ /*
+ * Set duty > period. This trick allows the TCU channels in TCU2 mode to
+ * properly return to their init level.
+ */
+ jz4740_timer_set_duty(pwm->hwpwm, 0xffff);
+ jz4740_timer_set_period(pwm->hwpwm, 0x0);
+
+ /*
+ * Disable PWM output.
* In TCU2 mode (channel 1/2 on JZ4750+), this must be done before the
* counter is stopped, while in TCU1 mode the order does not matter.
*/
@@ -74,17 +82,16 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
jz4740_timer_disable(pwm->hwpwm);
}
-static int jz4740_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
{
struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
unsigned long long tmp;
unsigned long period, duty;
unsigned int prescaler = 0;
uint16_t ctrl;
- bool is_enabled;
- tmp = (unsigned long long)clk_get_rate(jz4740->clk) * period_ns;
+ tmp = (unsigned long long)clk_get_rate(jz4740->clk) * state->period;
do_div(tmp, 1000000000);
period = tmp;
@@ -96,16 +103,14 @@ static int jz4740_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
if (prescaler == 6)
return -EINVAL;
- tmp = (unsigned long long)period * duty_ns;
- do_div(tmp, period_ns);
+ tmp = (unsigned long long)period * state->duty_cycle;
+ do_div(tmp, state->period);
duty = period - tmp;
if (duty >= period)
duty = period - 1;
- is_enabled = jz4740_timer_is_enabled(pwm->hwpwm);
- if (is_enabled)
- jz4740_pwm_disable(chip, pwm);
+ jz4740_pwm_disable(chip, pwm);
jz4740_timer_set_count(pwm->hwpwm, 0);
jz4740_timer_set_duty(pwm->hwpwm, duty);
@@ -116,18 +121,7 @@ static int jz4740_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
jz4740_timer_set_ctrl(pwm->hwpwm, ctrl);
- if (is_enabled)
- jz4740_pwm_enable(chip, pwm);
-
- return 0;
-}
-
-static int jz4740_pwm_set_polarity(struct pwm_chip *chip,
- struct pwm_device *pwm, enum pwm_polarity polarity)
-{
- uint32_t ctrl = jz4740_timer_get_ctrl(pwm->pwm);
-
- switch (polarity) {
+ switch (state->polarity) {
case PWM_POLARITY_NORMAL:
ctrl &= ~JZ_TIMER_CTRL_PWM_ACTIVE_LOW;
break;
@@ -137,16 +131,17 @@ static int jz4740_pwm_set_polarity(struct pwm_chip *chip,
}
jz4740_timer_set_ctrl(pwm->hwpwm, ctrl);
+
+ if (state->enabled)
+ jz4740_pwm_enable(chip, pwm);
+
return 0;
}
static const struct pwm_ops jz4740_pwm_ops = {
.request = jz4740_pwm_request,
.free = jz4740_pwm_free,
- .config = jz4740_pwm_config,
- .set_polarity = jz4740_pwm_set_polarity,
- .enable = jz4740_pwm_enable,
- .disable = jz4740_pwm_disable,
+ .apply = jz4740_pwm_apply,
.owner = THIS_MODULE,
};
@@ -184,8 +179,6 @@ static int jz4740_pwm_remove(struct platform_device *pdev)
#ifdef CONFIG_OF
static const struct of_device_id jz4740_pwm_dt_ids[] = {
{ .compatible = "ingenic,jz4740-pwm", },
- { .compatible = "ingenic,jz4770-pwm", },
- { .compatible = "ingenic,jz4780-pwm", },
{},
};
MODULE_DEVICE_TABLE(of, jz4740_pwm_dt_ids);
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index fb5a369b1a8d..3cbff5cbb789 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -1,65 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
+ * PWM controller driver for Amlogic Meson SoCs.
*
- * GPL LICENSE SUMMARY
+ * This PWM is only a set of Gates, Dividers and Counters:
+ * PWM output is achieved by calculating a clock that permits calculating
+ * two periods (low and high). The counter then has to be set to switch after
+ * N cycles for the first half period.
+ * The hardware has no "polarity" setting. This driver reverses the period
+ * cycles (the low length is inverted with the high length) for
+ * PWM_POLARITY_INVERSED. This means that .get_state cannot read the polarity
+ * from the hardware.
+ * Setting the duty cycle will disable and re-enable the PWM output.
+ * Disabling the PWM stops the output immediately (without waiting for the
+ * current period to complete first).
*
- * Copyright (c) 2016 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- * Copyright (C) 2014 Amlogic, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- * The full GNU General Public License is included in this distribution
- * in the file called COPYING.
- *
- * BSD LICENSE
+ * The public S912 (GXM) datasheet contains some documentation for this PWM
+ * controller starting on page 543:
+ * https://dl.khadas.com/Hardware/VIM2/Datasheet/S912_Datasheet_V0.220170314publicversion-Wesion.pdf
+ * An updated version of this IP block is found in S922X (G12B) SoCs. The
+ * datasheet contains the description for this IP block revision starting at
+ * page 1084:
+ * https://dn.odroid.com/S922X/ODROID-N2/Datasheet/S922X_Public_Datasheet_V0.2.pdf
*
* Copyright (c) 2016 BayLibre, SAS.
* Author: Neil Armstrong <narmstrong@baylibre.com>
* Copyright (C) 2014 Amlogic, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/bitfield.h>
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/err.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/math64.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -70,7 +45,8 @@
#define REG_PWM_A 0x0
#define REG_PWM_B 0x4
-#define PWM_HIGH_SHIFT 16
+#define PWM_LOW_MASK GENMASK(15, 0)
+#define PWM_HIGH_MASK GENMASK(31, 16)
#define REG_MISC_AB 0x8
#define MISC_B_CLK_EN BIT(23)
@@ -80,13 +56,33 @@
#define MISC_A_CLK_DIV_SHIFT 8
#define MISC_B_CLK_SEL_SHIFT 6
#define MISC_A_CLK_SEL_SHIFT 4
-#define MISC_CLK_SEL_WIDTH 2
+#define MISC_CLK_SEL_MASK 0x3
#define MISC_B_EN BIT(1)
#define MISC_A_EN BIT(0)
-static const unsigned int mux_reg_shifts[] = {
- MISC_A_CLK_SEL_SHIFT,
- MISC_B_CLK_SEL_SHIFT
+#define MESON_NUM_PWMS 2
+
+static struct meson_pwm_channel_data {
+ u8 reg_offset;
+ u8 clk_sel_shift;
+ u8 clk_div_shift;
+ u32 clk_en_mask;
+ u32 pwm_en_mask;
+} meson_pwm_per_channel_data[MESON_NUM_PWMS] = {
+ {
+ .reg_offset = REG_PWM_A,
+ .clk_sel_shift = MISC_A_CLK_SEL_SHIFT,
+ .clk_div_shift = MISC_A_CLK_DIV_SHIFT,
+ .clk_en_mask = MISC_A_CLK_EN,
+ .pwm_en_mask = MISC_A_EN,
+ },
+ {
+ .reg_offset = REG_PWM_B,
+ .clk_sel_shift = MISC_B_CLK_SEL_SHIFT,
+ .clk_div_shift = MISC_B_CLK_DIV_SHIFT,
+ .clk_en_mask = MISC_B_CLK_EN,
+ .pwm_en_mask = MISC_B_EN,
+ }
};
struct meson_pwm_channel {
@@ -94,8 +90,6 @@ struct meson_pwm_channel {
unsigned int lo;
u8 pre_div;
- struct pwm_state state;
-
struct clk *clk_parent;
struct clk_mux mux;
struct clk *clk;
@@ -109,8 +103,8 @@ struct meson_pwm_data {
struct meson_pwm {
struct pwm_chip chip;
const struct meson_pwm_data *data;
+ struct meson_pwm_channel channels[MESON_NUM_PWMS];
void __iomem *base;
- u8 inverter_mask;
/*
* Protects register (write) access to the REG_MISC_AB register
* that is shared between the two PWMs.
@@ -125,12 +119,16 @@ static inline struct meson_pwm *to_meson_pwm(struct pwm_chip *chip)
static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
{
- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
+ struct meson_pwm *meson = to_meson_pwm(chip);
+ struct meson_pwm_channel *channel;
struct device *dev = chip->dev;
int err;
- if (!channel)
- return -ENODEV;
+ channel = pwm_get_chip_data(pwm);
+ if (channel)
+ return 0;
+
+ channel = &meson->channels[pwm->hwpwm];
if (channel->clk_parent) {
err = clk_set_parent(channel->clk, channel->clk_parent);
@@ -149,9 +147,7 @@ static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
return err;
}
- chip->ops->get_state(chip, pwm, &channel->state);
-
- return 0;
+ return pwm_set_chip_data(pwm, channel);
}
static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
@@ -162,20 +158,18 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
clk_disable_unprepare(channel->clk);
}
-static int meson_pwm_calc(struct meson_pwm *meson,
- struct meson_pwm_channel *channel, unsigned int id,
- unsigned int duty, unsigned int period)
+static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
+ struct pwm_state *state)
{
- unsigned int pre_div, cnt, duty_cnt;
+ struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
+ unsigned int duty, period, pre_div, cnt, duty_cnt;
unsigned long fin_freq = -1;
- u64 fin_ps;
- if (~(meson->inverter_mask >> id) & 0x1)
- duty = period - duty;
+ duty = state->duty_cycle;
+ period = state->period;
- if (period == channel->state.period &&
- duty == channel->state.duty_cycle)
- return 0;
+ if (state->polarity == PWM_POLARITY_INVERSED)
+ duty = period - duty;
fin_freq = clk_get_rate(channel->clk);
if (fin_freq == 0) {
@@ -184,24 +178,19 @@ static int meson_pwm_calc(struct meson_pwm *meson,
}
dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq);
- fin_ps = (u64)NSEC_PER_SEC * 1000;
- do_div(fin_ps, fin_freq);
-
- /* Calc pre_div with the period */
- for (pre_div = 0; pre_div <= MISC_CLK_DIV_MASK; pre_div++) {
- cnt = DIV_ROUND_CLOSEST_ULL((u64)period * 1000,
- fin_ps * (pre_div + 1));
- dev_dbg(meson->chip.dev, "fin_ps=%llu pre_div=%u cnt=%u\n",
- fin_ps, pre_div, cnt);
- if (cnt <= 0xffff)
- break;
- }
+ pre_div = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * 0xffffLL);
if (pre_div > MISC_CLK_DIV_MASK) {
dev_err(meson->chip.dev, "unable to get period pre_div\n");
return -EINVAL;
}
+ cnt = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * (pre_div + 1));
+ if (cnt > 0xffff) {
+ dev_err(meson->chip.dev, "unable to get period cnt\n");
+ return -EINVAL;
+ }
+
dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period,
pre_div, cnt);
@@ -215,8 +204,8 @@ static int meson_pwm_calc(struct meson_pwm *meson,
channel->lo = cnt;
} else {
/* Then check is we can have the duty with the same pre_div */
- duty_cnt = DIV_ROUND_CLOSEST_ULL((u64)duty * 1000,
- fin_ps * (pre_div + 1));
+ duty_cnt = div64_u64(fin_freq * (u64)duty,
+ NSEC_PER_SEC * (pre_div + 1));
if (duty_cnt > 0xffff) {
dev_err(meson->chip.dev, "unable to get duty cycle\n");
return -EINVAL;
@@ -233,73 +222,43 @@ static int meson_pwm_calc(struct meson_pwm *meson,
return 0;
}
-static void meson_pwm_enable(struct meson_pwm *meson,
- struct meson_pwm_channel *channel,
- unsigned int id)
+static void meson_pwm_enable(struct meson_pwm *meson, struct pwm_device *pwm)
{
- u32 value, clk_shift, clk_enable, enable;
- unsigned int offset;
+ struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
+ struct meson_pwm_channel_data *channel_data;
unsigned long flags;
+ u32 value;
- switch (id) {
- case 0:
- clk_shift = MISC_A_CLK_DIV_SHIFT;
- clk_enable = MISC_A_CLK_EN;
- enable = MISC_A_EN;
- offset = REG_PWM_A;
- break;
-
- case 1:
- clk_shift = MISC_B_CLK_DIV_SHIFT;
- clk_enable = MISC_B_CLK_EN;
- enable = MISC_B_EN;
- offset = REG_PWM_B;
- break;
-
- default:
- return;
- }
+ channel_data = &meson_pwm_per_channel_data[pwm->hwpwm];
spin_lock_irqsave(&meson->lock, flags);
value = readl(meson->base + REG_MISC_AB);
- value &= ~(MISC_CLK_DIV_MASK << clk_shift);
- value |= channel->pre_div << clk_shift;
- value |= clk_enable;
+ value &= ~(MISC_CLK_DIV_MASK << channel_data->clk_div_shift);
+ value |= channel->pre_div << channel_data->clk_div_shift;
+ value |= channel_data->clk_en_mask;
writel(value, meson->base + REG_MISC_AB);
- value = (channel->hi << PWM_HIGH_SHIFT) | channel->lo;
- writel(value, meson->base + offset);
+ value = FIELD_PREP(PWM_HIGH_MASK, channel->hi) |
+ FIELD_PREP(PWM_LOW_MASK, channel->lo);
+ writel(value, meson->base + channel_data->reg_offset);
value = readl(meson->base + REG_MISC_AB);
- value |= enable;
+ value |= channel_data->pwm_en_mask;
writel(value, meson->base + REG_MISC_AB);
spin_unlock_irqrestore(&meson->lock, flags);
}
-static void meson_pwm_disable(struct meson_pwm *meson, unsigned int id)
+static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm)
{
- u32 value, enable;
unsigned long flags;
-
- switch (id) {
- case 0:
- enable = MISC_A_EN;
- break;
-
- case 1:
- enable = MISC_B_EN;
- break;
-
- default:
- return;
- }
+ u32 value;
spin_lock_irqsave(&meson->lock, flags);
value = readl(meson->base + REG_MISC_AB);
- value &= ~enable;
+ value &= ~meson_pwm_per_channel_data[pwm->hwpwm].pwm_en_mask;
writel(value, meson->base + REG_MISC_AB);
spin_unlock_irqrestore(&meson->lock, flags);
@@ -316,64 +275,97 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
return -EINVAL;
if (!state->enabled) {
- meson_pwm_disable(meson, pwm->hwpwm);
- channel->state.enabled = false;
-
- return 0;
- }
-
- if (state->period != channel->state.period ||
- state->duty_cycle != channel->state.duty_cycle ||
- state->polarity != channel->state.polarity) {
- if (state->polarity != channel->state.polarity) {
- if (state->polarity == PWM_POLARITY_NORMAL)
- meson->inverter_mask |= BIT(pwm->hwpwm);
- else
- meson->inverter_mask &= ~BIT(pwm->hwpwm);
+ if (state->polarity == PWM_POLARITY_INVERSED) {
+ /*
+ * This IP block revision doesn't have an "always high"
+ * setting which we can use for "inverted disabled".
+ * Instead we achieve this using the same settings
+ * that we use a pre_div of 0 (to get the shortest
+ * possible duration for one "count") and
+ * "period == duty_cycle". This results in a signal
+ * which is LOW for one "count", while being HIGH for
+ * the rest of the (so the signal is HIGH for slightly
+ * less than 100% of the period, but this is the best
+ * we can achieve).
+ */
+ channel->pre_div = 0;
+ channel->hi = ~0;
+ channel->lo = 0;
+
+ meson_pwm_enable(meson, pwm);
+ } else {
+ meson_pwm_disable(meson, pwm);
}
-
- err = meson_pwm_calc(meson, channel, pwm->hwpwm,
- state->duty_cycle, state->period);
+ } else {
+ err = meson_pwm_calc(meson, pwm, state);
if (err < 0)
return err;
- channel->state.polarity = state->polarity;
- channel->state.period = state->period;
- channel->state.duty_cycle = state->duty_cycle;
- }
-
- if (state->enabled && !channel->state.enabled) {
- meson_pwm_enable(meson, channel, pwm->hwpwm);
- channel->state.enabled = true;
+ meson_pwm_enable(meson, pwm);
}
return 0;
}
+static unsigned int meson_pwm_cnt_to_ns(struct pwm_chip *chip,
+ struct pwm_device *pwm, u32 cnt)
+{
+ struct meson_pwm *meson = to_meson_pwm(chip);
+ struct meson_pwm_channel *channel;
+ unsigned long fin_freq;
+ u32 fin_ns;
+
+ /* to_meson_pwm() can only be used after .get_state() is called */
+ channel = &meson->channels[pwm->hwpwm];
+
+ fin_freq = clk_get_rate(channel->clk);
+ if (fin_freq == 0)
+ return 0;
+
+ fin_ns = div_u64(NSEC_PER_SEC, fin_freq);
+
+ return cnt * fin_ns * (channel->pre_div + 1);
+}
+
static void meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
struct pwm_state *state)
{
struct meson_pwm *meson = to_meson_pwm(chip);
- u32 value, mask;
+ struct meson_pwm_channel_data *channel_data;
+ struct meson_pwm_channel *channel;
+ u32 value, tmp;
if (!state)
return;
- switch (pwm->hwpwm) {
- case 0:
- mask = MISC_A_EN;
- break;
+ channel = &meson->channels[pwm->hwpwm];
+ channel_data = &meson_pwm_per_channel_data[pwm->hwpwm];
- case 1:
- mask = MISC_B_EN;
- break;
+ value = readl(meson->base + REG_MISC_AB);
- default:
- return;
- }
+ tmp = channel_data->pwm_en_mask | channel_data->clk_en_mask;
+ state->enabled = (value & tmp) == tmp;
- value = readl(meson->base + REG_MISC_AB);
- state->enabled = (value & mask) != 0;
+ tmp = value >> channel_data->clk_div_shift;
+ channel->pre_div = FIELD_GET(MISC_CLK_DIV_MASK, tmp);
+
+ value = readl(meson->base + channel_data->reg_offset);
+
+ channel->lo = FIELD_GET(PWM_LOW_MASK, value);
+ channel->hi = FIELD_GET(PWM_HIGH_MASK, value);
+
+ if (channel->lo == 0) {
+ state->period = meson_pwm_cnt_to_ns(chip, pwm, channel->hi);
+ state->duty_cycle = state->period;
+ } else if (channel->lo >= channel->hi) {
+ state->period = meson_pwm_cnt_to_ns(chip, pwm,
+ channel->lo + channel->hi);
+ state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm,
+ channel->hi);
+ } else {
+ state->period = 0;
+ state->duty_cycle = 0;
+ }
}
static const struct pwm_ops meson_pwm_ops = {
@@ -433,8 +425,17 @@ static const struct meson_pwm_data pwm_axg_ao_data = {
.num_parents = ARRAY_SIZE(pwm_axg_ao_parent_names),
};
+static const char * const pwm_g12a_ao_ab_parent_names[] = {
+ "xtal", "aoclk81", "fclk_div4", "fclk_div5"
+};
+
+static const struct meson_pwm_data pwm_g12a_ao_ab_data = {
+ .parent_names = pwm_g12a_ao_ab_parent_names,
+ .num_parents = ARRAY_SIZE(pwm_g12a_ao_ab_parent_names),
+};
+
static const char * const pwm_g12a_ao_cd_parent_names[] = {
- "aoclk81", "xtal",
+ "xtal", "aoclk81",
};
static const struct meson_pwm_data pwm_g12a_ao_cd_data = {
@@ -478,7 +479,7 @@ static const struct of_device_id meson_pwm_matches[] = {
},
{
.compatible = "amlogic,meson-g12a-ao-pwm-ab",
- .data = &pwm_axg_ao_data
+ .data = &pwm_g12a_ao_ab_data
},
{
.compatible = "amlogic,meson-g12a-ao-pwm-cd",
@@ -488,8 +489,7 @@ static const struct of_device_id meson_pwm_matches[] = {
};
MODULE_DEVICE_TABLE(of, meson_pwm_matches);
-static int meson_pwm_init_channels(struct meson_pwm *meson,
- struct meson_pwm_channel *channels)
+static int meson_pwm_init_channels(struct meson_pwm *meson)
{
struct device *dev = meson->chip.dev;
struct clk_init_data init;
@@ -498,7 +498,7 @@ static int meson_pwm_init_channels(struct meson_pwm *meson,
int err;
for (i = 0; i < meson->chip.npwm; i++) {
- struct meson_pwm_channel *channel = &channels[i];
+ struct meson_pwm_channel *channel = &meson->channels[i];
snprintf(name, sizeof(name), "%s#mux%u", dev_name(dev), i);
@@ -509,8 +509,9 @@ static int meson_pwm_init_channels(struct meson_pwm *meson,
init.num_parents = meson->data->num_parents;
channel->mux.reg = meson->base + REG_MISC_AB;
- channel->mux.shift = mux_reg_shifts[i];
- channel->mux.mask = BIT(MISC_CLK_SEL_WIDTH) - 1;
+ channel->mux.shift =
+ meson_pwm_per_channel_data[i].clk_sel_shift;
+ channel->mux.mask = MISC_CLK_SEL_MASK;
channel->mux.flags = 0;
channel->mux.lock = &meson->lock;
channel->mux.table = NULL;
@@ -525,31 +526,16 @@ static int meson_pwm_init_channels(struct meson_pwm *meson,
snprintf(name, sizeof(name), "clkin%u", i);
- channel->clk_parent = devm_clk_get(dev, name);
- if (IS_ERR(channel->clk_parent)) {
- err = PTR_ERR(channel->clk_parent);
- if (err == -EPROBE_DEFER)
- return err;
-
- channel->clk_parent = NULL;
- }
+ channel->clk_parent = devm_clk_get_optional(dev, name);
+ if (IS_ERR(channel->clk_parent))
+ return PTR_ERR(channel->clk_parent);
}
return 0;
}
-static void meson_pwm_add_channels(struct meson_pwm *meson,
- struct meson_pwm_channel *channels)
-{
- unsigned int i;
-
- for (i = 0; i < meson->chip.npwm; i++)
- pwm_set_chip_data(&meson->chip.pwms[i], &channels[i]);
-}
-
static int meson_pwm_probe(struct platform_device *pdev)
{
- struct meson_pwm_channel *channels;
struct meson_pwm *meson;
struct resource *regs;
int err;
@@ -567,19 +553,13 @@ static int meson_pwm_probe(struct platform_device *pdev)
meson->chip.dev = &pdev->dev;
meson->chip.ops = &meson_pwm_ops;
meson->chip.base = -1;
- meson->chip.npwm = 2;
+ meson->chip.npwm = MESON_NUM_PWMS;
meson->chip.of_xlate = of_pwm_xlate_with_flags;
meson->chip.of_pwm_n_cells = 3;
meson->data = of_device_get_match_data(&pdev->dev);
- meson->inverter_mask = BIT(meson->chip.npwm) - 1;
- channels = devm_kcalloc(&pdev->dev, meson->chip.npwm,
- sizeof(*channels), GFP_KERNEL);
- if (!channels)
- return -ENOMEM;
-
- err = meson_pwm_init_channels(meson, channels);
+ err = meson_pwm_init_channels(meson);
if (err < 0)
return err;
@@ -589,8 +569,6 @@ static int meson_pwm_probe(struct platform_device *pdev)
return err;
}
- meson_pwm_add_channels(meson, channels);
-
platform_set_drvdata(pdev, meson);
return 0;
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index cfe7dd1b448e..5b2b8ecc354c 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -254,50 +254,11 @@ static const struct of_device_id rcar_pwm_of_table[] = {
};
MODULE_DEVICE_TABLE(of, rcar_pwm_of_table);
-#ifdef CONFIG_PM_SLEEP
-static struct pwm_device *rcar_pwm_dev_to_pwm_dev(struct device *dev)
-{
- struct rcar_pwm_chip *rcar_pwm = dev_get_drvdata(dev);
- struct pwm_chip *chip = &rcar_pwm->chip;
-
- return &chip->pwms[0];
-}
-
-static int rcar_pwm_suspend(struct device *dev)
-{
- struct pwm_device *pwm = rcar_pwm_dev_to_pwm_dev(dev);
-
- if (!test_bit(PWMF_REQUESTED, &pwm->flags))
- return 0;
-
- pm_runtime_put(dev);
-
- return 0;
-}
-
-static int rcar_pwm_resume(struct device *dev)
-{
- struct pwm_device *pwm = rcar_pwm_dev_to_pwm_dev(dev);
- struct pwm_state state;
-
- if (!test_bit(PWMF_REQUESTED, &pwm->flags))
- return 0;
-
- pm_runtime_get_sync(dev);
-
- pwm_get_state(pwm, &state);
-
- return rcar_pwm_apply(pwm->chip, pwm, &state);
-}
-#endif /* CONFIG_PM_SLEEP */
-static SIMPLE_DEV_PM_OPS(rcar_pwm_pm_ops, rcar_pwm_suspend, rcar_pwm_resume);
-
static struct platform_driver rcar_pwm_driver = {
.probe = rcar_pwm_probe,
.remove = rcar_pwm_remove,
.driver = {
.name = "pwm-rcar",
- .pm = &rcar_pwm_pm_ops,
.of_match_table = of_match_ptr(rcar_pwm_of_table),
}
};
diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
new file mode 100644
index 000000000000..a7c107f19e66
--- /dev/null
+++ b/drivers/pwm/pwm-sifive.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2018 SiFive
+ * For SiFive's PWM IP block documentation please refer Chapter 14 of
+ * Reference Manual : https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+ *
+ * Limitations:
+ * - When changing both duty cycle and period, we cannot prevent in
+ * software that the output might produce a period with mixed
+ * settings (new period length and old duty cycle).
+ * - The hardware cannot generate a 100% duty cycle.
+ * - The hardware generates only inverted output.
+ */
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+#include <linux/bitfield.h>
+
+/* Register offsets */
+#define PWM_SIFIVE_PWMCFG 0x0
+#define PWM_SIFIVE_PWMCOUNT 0x8
+#define PWM_SIFIVE_PWMS 0x10
+#define PWM_SIFIVE_PWMCMP0 0x20
+
+/* PWMCFG fields */
+#define PWM_SIFIVE_PWMCFG_SCALE GENMASK(3, 0)
+#define PWM_SIFIVE_PWMCFG_STICKY BIT(8)
+#define PWM_SIFIVE_PWMCFG_ZERO_CMP BIT(9)
+#define PWM_SIFIVE_PWMCFG_DEGLITCH BIT(10)
+#define PWM_SIFIVE_PWMCFG_EN_ALWAYS BIT(12)
+#define PWM_SIFIVE_PWMCFG_EN_ONCE BIT(13)
+#define PWM_SIFIVE_PWMCFG_CENTER BIT(16)
+#define PWM_SIFIVE_PWMCFG_GANG BIT(24)
+#define PWM_SIFIVE_PWMCFG_IP BIT(28)
+
+/* PWM_SIFIVE_SIZE_PWMCMP is used to calculate offset for pwmcmpX registers */
+#define PWM_SIFIVE_SIZE_PWMCMP 4
+#define PWM_SIFIVE_CMPWIDTH 16
+#define PWM_SIFIVE_DEFAULT_PERIOD 10000000
+
+struct pwm_sifive_ddata {
+ struct pwm_chip chip;
+ struct mutex lock; /* lock to protect user_count */
+ struct notifier_block notifier;
+ struct clk *clk;
+ void __iomem *regs;
+ unsigned int real_period;
+ unsigned int approx_period;
+ int user_count;
+};
+
+static inline
+struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *c)
+{
+ return container_of(c, struct pwm_sifive_ddata, chip);
+}
+
+static int pwm_sifive_request(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+
+ mutex_lock(&ddata->lock);
+ ddata->user_count++;
+ mutex_unlock(&ddata->lock);
+
+ return 0;
+}
+
+static void pwm_sifive_free(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+
+ mutex_lock(&ddata->lock);
+ ddata->user_count--;
+ mutex_unlock(&ddata->lock);
+}
+
+static void pwm_sifive_update_clock(struct pwm_sifive_ddata *ddata,
+ unsigned long rate)
+{
+ unsigned long long num;
+ unsigned long scale_pow;
+ int scale;
+ u32 val;
+ /*
+ * The PWM unit is used with pwmzerocmp=0, so the only way to modify the
+ * period length is using pwmscale which provides the number of bits the
+ * counter is shifted before being feed to the comparators. A period
+ * lasts (1 << (PWM_SIFIVE_CMPWIDTH + pwmscale)) clock ticks.
+ * (1 << (PWM_SIFIVE_CMPWIDTH + scale)) * 10^9/rate = period
+ */
+ scale_pow = div64_ul(ddata->approx_period * (u64)rate, NSEC_PER_SEC);
+ scale = clamp(ilog2(scale_pow) - PWM_SIFIVE_CMPWIDTH, 0, 0xf);
+
+ val = PWM_SIFIVE_PWMCFG_EN_ALWAYS |
+ FIELD_PREP(PWM_SIFIVE_PWMCFG_SCALE, scale);
+ writel(val, ddata->regs + PWM_SIFIVE_PWMCFG);
+
+ /* As scale <= 15 the shift operation cannot overflow. */
+ num = (unsigned long long)NSEC_PER_SEC << (PWM_SIFIVE_CMPWIDTH + scale);
+ ddata->real_period = div64_ul(num, rate);
+ dev_dbg(ddata->chip.dev,
+ "New real_period = %u ns\n", ddata->real_period);
+}
+
+static void pwm_sifive_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+ u32 duty, val;
+
+ duty = readl(ddata->regs + PWM_SIFIVE_PWMCMP0 +
+ pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP);
+
+ state->enabled = duty > 0;
+
+ val = readl(ddata->regs + PWM_SIFIVE_PWMCFG);
+ if (!(val & PWM_SIFIVE_PWMCFG_EN_ALWAYS))
+ state->enabled = false;
+
+ state->period = ddata->real_period;
+ state->duty_cycle =
+ (u64)duty * ddata->real_period >> PWM_SIFIVE_CMPWIDTH;
+ state->polarity = PWM_POLARITY_INVERSED;
+}
+
+static int pwm_sifive_enable(struct pwm_chip *chip, bool enable)
+{
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+ int ret;
+
+ if (enable) {
+ ret = clk_enable(ddata->clk);
+ if (ret) {
+ dev_err(ddata->chip.dev, "Enable clk failed\n");
+ return ret;
+ }
+ }
+
+ if (!enable)
+ clk_disable(ddata->clk);
+
+ return 0;
+}
+
+static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
+ struct pwm_state cur_state;
+ unsigned int duty_cycle;
+ unsigned long long num;
+ bool enabled;
+ int ret = 0;
+ u32 frac;
+
+ if (state->polarity != PWM_POLARITY_INVERSED)
+ return -EINVAL;
+
+ ret = clk_enable(ddata->clk);
+ if (ret) {
+ dev_err(ddata->chip.dev, "Enable clk failed\n");
+ return ret;
+ }
+
+ mutex_lock(&ddata->lock);
+ cur_state = pwm->state;
+ enabled = cur_state.enabled;
+
+ duty_cycle = state->duty_cycle;
+ if (!state->enabled)
+ duty_cycle = 0;
+
+ /*
+ * The problem of output producing mixed setting as mentioned at top,
+ * occurs here. To minimize the window for this problem, we are
+ * calculating the register values first and then writing them
+ * consecutively
+ */
+ num = (u64)duty_cycle * (1U << PWM_SIFIVE_CMPWIDTH);
+ frac = DIV_ROUND_CLOSEST_ULL(num, state->period);
+ /* The hardware cannot generate a 100% duty cycle */
+ frac = min(frac, (1U << PWM_SIFIVE_CMPWIDTH) - 1);
+
+ if (state->period != ddata->approx_period) {
+ if (ddata->user_count != 1) {
+ ret = -EBUSY;
+ goto exit;
+ }
+ ddata->approx_period = state->period;
+ pwm_sifive_update_clock(ddata, clk_get_rate(ddata->clk));
+ }
+
+ writel(frac, ddata->regs + PWM_SIFIVE_PWMCMP0 +
+ pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP);
+
+ if (state->enabled != enabled)
+ pwm_sifive_enable(chip, state->enabled);
+
+exit:
+ clk_disable(ddata->clk);
+ mutex_unlock(&ddata->lock);
+ return ret;
+}
+
+static const struct pwm_ops pwm_sifive_ops = {
+ .request = pwm_sifive_request,
+ .free = pwm_sifive_free,
+ .get_state = pwm_sifive_get_state,
+ .apply = pwm_sifive_apply,
+ .owner = THIS_MODULE,
+};
+
+static int pwm_sifive_clock_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct clk_notifier_data *ndata = data;
+ struct pwm_sifive_ddata *ddata =
+ container_of(nb, struct pwm_sifive_ddata, notifier);
+
+ if (event == POST_RATE_CHANGE)
+ pwm_sifive_update_clock(ddata, ndata->new_rate);
+
+ return NOTIFY_OK;
+}
+
+static int pwm_sifive_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct pwm_sifive_ddata *ddata;
+ struct pwm_chip *chip;
+ struct resource *res;
+ int ret;
+
+ ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
+ if (!ddata)
+ return -ENOMEM;
+
+ mutex_init(&ddata->lock);
+ chip = &ddata->chip;
+ chip->dev = dev;
+ chip->ops = &pwm_sifive_ops;
+ chip->of_xlate = of_pwm_xlate_with_flags;
+ chip->of_pwm_n_cells = 3;
+ chip->base = -1;
+ chip->npwm = 4;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ddata->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(ddata->regs)) {
+ dev_err(dev, "Unable to map IO resources\n");
+ return PTR_ERR(ddata->regs);
+ }
+
+ ddata->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(ddata->clk)) {
+ if (PTR_ERR(ddata->clk) != -EPROBE_DEFER)
+ dev_err(dev, "Unable to find controller clock\n");
+ return PTR_ERR(ddata->clk);
+ }
+
+ ret = clk_prepare_enable(ddata->clk);
+ if (ret) {
+ dev_err(dev, "failed to enable clock for pwm: %d\n", ret);
+ return ret;
+ }
+
+ /* Watch for changes to underlying clock frequency */
+ ddata->notifier.notifier_call = pwm_sifive_clock_notifier;
+ ret = clk_notifier_register(ddata->clk, &ddata->notifier);
+ if (ret) {
+ dev_err(dev, "failed to register clock notifier: %d\n", ret);
+ goto disable_clk;
+ }
+
+ ret = pwmchip_add(chip);
+ if (ret < 0) {
+ dev_err(dev, "cannot register PWM: %d\n", ret);
+ goto unregister_clk;
+ }
+
+ platform_set_drvdata(pdev, ddata);
+ dev_dbg(dev, "SiFive PWM chip registered %d PWMs\n", chip->npwm);
+
+ return 0;
+
+unregister_clk:
+ clk_notifier_unregister(ddata->clk, &ddata->notifier);
+disable_clk:
+ clk_disable_unprepare(ddata->clk);
+
+ return ret;
+}
+
+static int pwm_sifive_remove(struct platform_device *dev)
+{
+ struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev);
+ bool is_enabled = false;
+ struct pwm_device *pwm;
+ int ret, ch;
+
+ for (ch = 0; ch < ddata->chip.npwm; ch++) {
+ pwm = &ddata->chip.pwms[ch];
+ if (pwm->state.enabled) {
+ is_enabled = true;
+ break;
+ }
+ }
+ if (is_enabled)
+ clk_disable(ddata->clk);
+
+ clk_disable_unprepare(ddata->clk);
+ ret = pwmchip_remove(&ddata->chip);
+ clk_notifier_unregister(ddata->clk, &ddata->notifier);
+
+ return ret;
+}
+
+static const struct of_device_id pwm_sifive_of_match[] = {
+ { .compatible = "sifive,pwm0" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, pwm_sifive_of_match);
+
+static struct platform_driver pwm_sifive_driver = {
+ .probe = pwm_sifive_probe,
+ .remove = pwm_sifive_remove,
+ .driver = {
+ .name = "pwm-sifive",
+ .of_match_table = pwm_sifive_of_match,
+ },
+};
+module_platform_driver(pwm_sifive_driver);
+
+MODULE_DESCRIPTION("SiFive PWM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index 0059b24cfdc3..2211a642066d 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -13,6 +13,7 @@
#include <linux/mfd/stm32-lptimer.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
@@ -223,6 +224,29 @@ static int stm32_pwm_lp_remove(struct platform_device *pdev)
return pwmchip_remove(&priv->chip);
}
+static int __maybe_unused stm32_pwm_lp_suspend(struct device *dev)
+{
+ struct stm32_pwm_lp *priv = dev_get_drvdata(dev);
+ struct pwm_state state;
+
+ pwm_get_state(&priv->chip.pwms[0], &state);
+ if (state.enabled) {
+ dev_err(dev, "The consumer didn't stop us (%s)\n",
+ priv->chip.pwms[0].label);
+ return -EBUSY;
+ }
+
+ return pinctrl_pm_select_sleep_state(dev);
+}
+
+static int __maybe_unused stm32_pwm_lp_resume(struct device *dev)
+{
+ return pinctrl_pm_select_default_state(dev);
+}
+
+static SIMPLE_DEV_PM_OPS(stm32_pwm_lp_pm_ops, stm32_pwm_lp_suspend,
+ stm32_pwm_lp_resume);
+
static const struct of_device_id stm32_pwm_lp_of_match[] = {
{ .compatible = "st,stm32-pwm-lp", },
{},
@@ -235,6 +259,7 @@ static struct platform_driver stm32_pwm_lp_driver = {
.driver = {
.name = "stm32-pwm-lp",
.of_match_table = of_match_ptr(stm32_pwm_lp_of_match),
+ .pm = &stm32_pwm_lp_pm_ops,
},
};
module_platform_driver(stm32_pwm_lp_driver);
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index 4f842550fbd1..740e2dec8313 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -608,6 +608,8 @@ static int stm32_pwm_probe(struct platform_device *pdev)
priv->regmap = ddata->regmap;
priv->clk = ddata->clk;
priv->max_arr = ddata->max_arr;
+ priv->chip.of_xlate = of_pwm_xlate_with_flags;
+ priv->chip.of_pwm_n_cells = 3;
if (!priv->regmap || !priv->clk)
return -EINVAL;
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index bf6823fe0812..2389b8669846 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -18,6 +18,7 @@ struct pwm_export {
struct device child;
struct pwm_device *pwm;
struct mutex lock;
+ struct pwm_state suspend;
};
static struct pwm_export *child_to_pwm_export(struct device *child)
@@ -372,10 +373,111 @@ static struct attribute *pwm_chip_attrs[] = {
};
ATTRIBUTE_GROUPS(pwm_chip);
+/* takes export->lock on success */
+static struct pwm_export *pwm_class_get_state(struct device *parent,
+ struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct device *child;
+ struct pwm_export *export;
+
+ if (!test_bit(PWMF_EXPORTED, &pwm->flags))
+ return NULL;
+
+ child = device_find_child(parent, pwm, pwm_unexport_match);
+ if (!child)
+ return NULL;
+
+ export = child_to_pwm_export(child);
+ put_device(child); /* for device_find_child() */
+
+ mutex_lock(&export->lock);
+ pwm_get_state(pwm, state);
+
+ return export;
+}
+
+static int pwm_class_apply_state(struct pwm_export *export,
+ struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ int ret = pwm_apply_state(pwm, state);
+
+ /* release lock taken in pwm_class_get_state */
+ mutex_unlock(&export->lock);
+
+ return ret;
+}
+
+static int pwm_class_resume_npwm(struct device *parent, unsigned int npwm)
+{
+ struct pwm_chip *chip = dev_get_drvdata(parent);
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < npwm; i++) {
+ struct pwm_device *pwm = &chip->pwms[i];
+ struct pwm_state state;
+ struct pwm_export *export;
+
+ export = pwm_class_get_state(parent, pwm, &state);
+ if (!export)
+ continue;
+
+ state.enabled = export->suspend.enabled;
+ ret = pwm_class_apply_state(export, pwm, &state);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+static int __maybe_unused pwm_class_suspend(struct device *parent)
+{
+ struct pwm_chip *chip = dev_get_drvdata(parent);
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < chip->npwm; i++) {
+ struct pwm_device *pwm = &chip->pwms[i];
+ struct pwm_state state;
+ struct pwm_export *export;
+
+ export = pwm_class_get_state(parent, pwm, &state);
+ if (!export)
+ continue;
+
+ export->suspend = state;
+ state.enabled = false;
+ ret = pwm_class_apply_state(export, pwm, &state);
+ if (ret < 0) {
+ /*
+ * roll back the PWM devices that were disabled by
+ * this suspend function.
+ */
+ pwm_class_resume_npwm(parent, i);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int __maybe_unused pwm_class_resume(struct device *parent)
+{
+ struct pwm_chip *chip = dev_get_drvdata(parent);
+
+ return pwm_class_resume_npwm(parent, chip->npwm);
+}
+
+static SIMPLE_DEV_PM_OPS(pwm_class_pm_ops, pwm_class_suspend, pwm_class_resume);
+
static struct class pwm_class = {
.name = "pwm",
.owner = THIS_MODULE,
.dev_groups = pwm_chip_groups,
+ .pm = &pwm_class_pm_ops,
};
static int pwmchip_sysfs_match(struct device *parent, const void *data)
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 673f8a128397..5d545806d930 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019 Borislav Petkov, SUSE Labs.
+ */
#include <linux/mm.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
@@ -37,9 +40,9 @@
* thus emulate an an LRU-like behavior when deleting elements to free up space
* in the page.
*
- * When an element reaches it's max count of count_threshold, we try to poison
- * it by assuming that errors triggered count_threshold times in a single page
- * are excessive and that page shouldn't be used anymore. count_threshold is
+ * When an element reaches it's max count of action_threshold, we try to poison
+ * it by assuming that errors triggered action_threshold times in a single page
+ * are excessive and that page shouldn't be used anymore. action_threshold is
* initialized to COUNT_MASK which is the maximum.
*
* That error event entry causes cec_add_elem() to return !0 value and thus
@@ -122,7 +125,7 @@ static DEFINE_MUTEX(ce_mutex);
static u64 dfs_pfn;
/* Amount of errors after which we offline */
-static unsigned int count_threshold = COUNT_MASK;
+static u64 action_threshold = COUNT_MASK;
/* Each element "decays" each decay_interval which is 24hrs by default. */
#define CEC_DECAY_DEFAULT_INTERVAL 24 * 60 * 60 /* 24 hrs */
@@ -276,11 +279,39 @@ static u64 __maybe_unused del_lru_elem(void)
return pfn;
}
+static bool sanity_check(struct ce_array *ca)
+{
+ bool ret = false;
+ u64 prev = 0;
+ int i;
+
+ for (i = 0; i < ca->n; i++) {
+ u64 this = PFN(ca->array[i]);
+
+ if (WARN(prev > this, "prev: 0x%016llx <-> this: 0x%016llx\n", prev, this))
+ ret = true;
+
+ prev = this;
+ }
+
+ if (!ret)
+ return ret;
+
+ pr_info("Sanity check dump:\n{ n: %d\n", ca->n);
+ for (i = 0; i < ca->n; i++) {
+ u64 this = PFN(ca->array[i]);
+
+ pr_info(" %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
+ }
+ pr_info("}\n");
+
+ return ret;
+}
int cec_add_elem(u64 pfn)
{
struct ce_array *ca = &ce_arr;
- unsigned int to;
+ unsigned int to = 0;
int count, ret = 0;
/*
@@ -294,6 +325,7 @@ int cec_add_elem(u64 pfn)
ca->ces_entered++;
+ /* Array full, free the LRU slot. */
if (ca->n == MAX_ELEMS)
WARN_ON(!del_lru_elem_unlocked(ca));
@@ -306,24 +338,17 @@ int cec_add_elem(u64 pfn)
(void *)&ca->array[to],
(ca->n - to) * sizeof(u64));
- ca->array[to] = (pfn << PAGE_SHIFT) |
- (DECAY_MASK << COUNT_BITS) | 1;
-
+ ca->array[to] = pfn << PAGE_SHIFT;
ca->n++;
-
- ret = 0;
-
- goto decay;
}
- count = COUNT(ca->array[to]);
-
- if (count < count_threshold) {
- ca->array[to] |= (DECAY_MASK << COUNT_BITS);
- ca->array[to]++;
+ /* Add/refresh element generation and increment count */
+ ca->array[to] |= DECAY_MASK << COUNT_BITS;
+ ca->array[to]++;
- ret = 0;
- } else {
+ /* Check action threshold and soft-offline, if reached. */
+ count = COUNT(ca->array[to]);
+ if (count >= action_threshold) {
u64 pfn = ca->array[to] >> PAGE_SHIFT;
if (!pfn_valid(pfn)) {
@@ -338,20 +363,21 @@ int cec_add_elem(u64 pfn)
del_elem(ca, to);
/*
- * Return a >0 value to denote that we've reached the offlining
- * threshold.
+ * Return a >0 value to callers, to denote that we've reached
+ * the offlining threshold.
*/
ret = 1;
goto unlock;
}
-decay:
ca->decay_count++;
if (ca->decay_count >= CLEAN_ELEMS)
do_spring_cleaning(ca);
+ WARN_ON_ONCE(sanity_check(ca));
+
unlock:
mutex_unlock(&ce_mutex);
@@ -369,45 +395,48 @@ static int pfn_set(void *data, u64 val)
{
*(u64 *)data = val;
- return cec_add_elem(val);
+ cec_add_elem(val);
+
+ return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(pfn_ops, u64_get, pfn_set, "0x%llx\n");
static int decay_interval_set(void *data, u64 val)
{
- *(u64 *)data = val;
-
if (val < CEC_DECAY_MIN_INTERVAL)
return -EINVAL;
if (val > CEC_DECAY_MAX_INTERVAL)
return -EINVAL;
+ *(u64 *)data = val;
decay_interval = val;
cec_mod_work(decay_interval);
+
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
-static int count_threshold_set(void *data, u64 val)
+static int action_threshold_set(void *data, u64 val)
{
*(u64 *)data = val;
if (val > COUNT_MASK)
val = COUNT_MASK;
- count_threshold = val;
+ action_threshold = val;
return 0;
}
-DEFINE_DEBUGFS_ATTRIBUTE(count_threshold_ops, u64_get, count_threshold_set, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%lld\n");
+
+static const char * const bins[] = { "00", "01", "10", "11" };
static int array_dump(struct seq_file *m, void *v)
{
struct ce_array *ca = &ce_arr;
- u64 prev = 0;
int i;
mutex_lock(&ce_mutex);
@@ -416,11 +445,8 @@ static int array_dump(struct seq_file *m, void *v)
for (i = 0; i < ca->n; i++) {
u64 this = PFN(ca->array[i]);
- seq_printf(m, " %03d: [%016llx|%03llx]\n", i, this, FULL_COUNT(ca->array[i]));
-
- WARN_ON(prev > this);
-
- prev = this;
+ seq_printf(m, " %3d: [%016llx|%s|%03llx]\n",
+ i, this, bins[DECAY(ca->array[i])], COUNT(ca->array[i]));
}
seq_printf(m, "}\n");
@@ -433,7 +459,7 @@ static int array_dump(struct seq_file *m, void *v)
seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
seq_printf(m, "Decays: %lld\n", ca->decays_done);
- seq_printf(m, "Action threshold: %d\n", count_threshold);
+ seq_printf(m, "Action threshold: %lld\n", action_threshold);
mutex_unlock(&ce_mutex);
@@ -463,18 +489,6 @@ static int __init create_debugfs_nodes(void)
return -1;
}
- pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
- if (!pfn) {
- pr_warn("Error creating pfn debugfs node!\n");
- goto err;
- }
-
- array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
- if (!array) {
- pr_warn("Error creating array debugfs node!\n");
- goto err;
- }
-
decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
&decay_interval, &decay_interval_ops);
if (!decay) {
@@ -482,13 +496,27 @@ static int __init create_debugfs_nodes(void)
goto err;
}
- count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d,
- &count_threshold, &count_threshold_ops);
+ count = debugfs_create_file("action_threshold", S_IRUSR | S_IWUSR, d,
+ &action_threshold, &action_threshold_ops);
if (!count) {
- pr_warn("Error creating count_threshold debugfs node!\n");
+ pr_warn("Error creating action_threshold debugfs node!\n");
+ goto err;
+ }
+
+ if (!IS_ENABLED(CONFIG_RAS_CEC_DEBUG))
+ return 0;
+
+ pfn = debugfs_create_file("pfn", S_IRUSR | S_IWUSR, d, &dfs_pfn, &pfn_ops);
+ if (!pfn) {
+ pr_warn("Error creating pfn debugfs node!\n");
goto err;
}
+ array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
+ if (!array) {
+ pr_warn("Error creating array debugfs node!\n");
+ goto err;
+ }
return 0;
@@ -509,8 +537,10 @@ void __init cec_init(void)
return;
}
- if (create_debugfs_nodes())
+ if (create_debugfs_nodes()) {
+ free_page((unsigned long)ce_arr.array);
return;
+ }
INIT_DELAYED_WORK(&cec_work, cec_work_fn);
schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
diff --git a/drivers/regulator/88pm800.c b/drivers/regulator/88pm800-regulator.c
index 69ae25886181..69ae25886181 100644
--- a/drivers/regulator/88pm800.c
+++ b/drivers/regulator/88pm800-regulator.c
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 8553bdf87c1d..7928960563e6 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -136,19 +136,20 @@ config REGULATOR_AB8500
signal AB8500 PMIC
config REGULATOR_ARIZONA_LDO1
- tristate "Wolfson Arizona class devices LDO1"
- depends on MFD_ARIZONA
+ tristate "Cirrus Madera and Wolfson Arizona class devices LDO1"
+ depends on MFD_ARIZONA || MFD_MADERA
depends on SND_SOC
help
- Support for the LDO1 regulators found on Wolfson Arizona class
- devices.
+ Support for the LDO1 regulators found on Cirrus Logic Madera codecs
+ and Wolfson Microelectronic Arizona codecs.
config REGULATOR_ARIZONA_MICSUPP
- tristate "Wolfson Arizona class devices MICSUPP"
- depends on MFD_ARIZONA
+ tristate "Cirrus Madera and Wolfson Arizona class devices MICSUPP"
+ depends on MFD_ARIZONA || MFD_MADERA
depends on SND_SOC
help
- Support for the MICSUPP regulators found on Wolfson Arizona class
+ Support for the MICSUPP regulators found on Cirrus Logic Madera codecs
+ and Wolfson Microelectronic Arizona codecs
devices.
config REGULATOR_AS3711
@@ -258,7 +259,7 @@ config REGULATOR_DA9062
config REGULATOR_DA9063
tristate "Dialog Semiconductor DA9063 regulators"
- depends on MFD_DA9063
+ depends on MFD_DA9063 && OF
help
Say y here to support the BUCKs and LDOs regulators found on
DA9063 PMICs.
@@ -364,7 +365,7 @@ config REGULATOR_LM363X
tristate "TI LM363X voltage regulators"
depends on MFD_TI_LMU
help
- This driver supports LM3631 and LM3632 voltage regulators for
+ This driver supports LM3631, LM3632 and LM36274 voltage regulators for
the LCD bias.
One boost output voltage is configurable and always on.
Other LDOs are used for the display module.
@@ -829,6 +830,26 @@ config REGULATOR_SKY81452
This driver can also be built as a module. If so, the module
will be called sky81452-regulator.
+config REGULATOR_SLG51000
+ tristate "Dialog Semiconductor SLG51000 regulators"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ Say y here to support for the Dialog Semiconductor SLG51000.
+ The SLG51000 is seven compact and customizable low dropout
+ regulators.
+
+config REGULATOR_STM32_BOOSTER
+ tristate "STMicroelectronics STM32 BOOSTER"
+ depends on ARCH_STM32 || COMPILE_TEST
+ help
+ This driver supports internal booster (3V3) embedded in some
+ STMicroelectronics STM32 chips. It can be used to supply ADC analog
+ input switches when vdda supply is below 2.7V.
+
+ This driver can also be built as a module. If so, the module
+ will be called stm32-booster.
+
config REGULATOR_STM32_VREFBUF
tristate "STMicroelectronics STM32 VREFBUF"
depends on ARCH_STM32 || COMPILE_TEST
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 93f53840e8f1..eef73b5a35a4 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
obj-$(CONFIG_REGULATOR_88PG86X) += 88pg86x.o
-obj-$(CONFIG_REGULATOR_88PM800) += 88pm800.o
+obj-$(CONFIG_REGULATOR_88PM800) += 88pm800-regulator.o
obj-$(CONFIG_REGULATOR_88PM8607) += 88pm8607.o
obj-$(CONFIG_REGULATOR_CPCAP) += cpcap-regulator.o
obj-$(CONFIG_REGULATOR_AAT2870) += aat2870-regulator.o
@@ -104,6 +104,8 @@ obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o
obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o
obj-$(CONFIG_REGULATOR_SC2731) += sc2731-regulator.o
obj-$(CONFIG_REGULATOR_SKY81452) += sky81452-regulator.o
+obj-$(CONFIG_REGULATOR_SLG51000) += slg51000-regulator.o
+obj-$(CONFIG_REGULATOR_STM32_BOOSTER) += stm32-booster.o
obj-$(CONFIG_REGULATOR_STM32_VREFBUF) += stm32-vrefbuf.o
obj-$(CONFIG_REGULATOR_STM32_PWR) += stm32-pwr.o
obj-$(CONFIG_REGULATOR_STPMIC1) += stpmic1_regulator.o
diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index e4bc7b1e5ccd..1a3d7b720f5e 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -25,6 +25,10 @@
#include <linux/mfd/arizona/pdata.h>
#include <linux/mfd/arizona/registers.h>
+#include <linux/mfd/madera/core.h>
+#include <linux/mfd/madera/pdata.h>
+#include <linux/mfd/madera/registers.h>
+
struct arizona_ldo1 {
struct regulator_dev *regulator;
struct regmap *regmap;
@@ -158,6 +162,31 @@ static const struct regulator_init_data arizona_ldo1_wm5110 = {
.num_consumer_supplies = 1,
};
+static const struct regulator_desc madera_ldo1 = {
+ .name = "LDO1",
+ .supply_name = "LDOVDD",
+ .type = REGULATOR_VOLTAGE,
+ .ops = &arizona_ldo1_ops,
+
+ .vsel_reg = MADERA_LDO1_CONTROL_1,
+ .vsel_mask = MADERA_LDO1_VSEL_MASK,
+ .min_uV = 900000,
+ .uV_step = 25000,
+ .n_voltages = 13,
+ .enable_time = 3000,
+
+ .owner = THIS_MODULE,
+};
+
+static const struct regulator_init_data madera_ldo1_default = {
+ .constraints = {
+ .min_uV = 1200000,
+ .max_uV = 1200000,
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
+ .num_consumer_supplies = 1,
+};
+
static int arizona_ldo1_of_get_pdata(struct arizona_ldo1_pdata *pdata,
struct regulator_config *config,
const struct regulator_desc *desc,
@@ -320,6 +349,32 @@ static int arizona_ldo1_remove(struct platform_device *pdev)
return 0;
}
+static int madera_ldo1_probe(struct platform_device *pdev)
+{
+ struct madera *madera = dev_get_drvdata(pdev->dev.parent);
+ struct arizona_ldo1 *ldo1;
+ bool external_dcvdd;
+ int ret;
+
+ ldo1 = devm_kzalloc(&pdev->dev, sizeof(*ldo1), GFP_KERNEL);
+ if (!ldo1)
+ return -ENOMEM;
+
+ ldo1->regmap = madera->regmap;
+
+ ldo1->init_data = madera_ldo1_default;
+
+ ret = arizona_ldo1_common_init(pdev, ldo1, &madera_ldo1,
+ &madera->pdata.ldo1,
+ &external_dcvdd);
+ if (ret)
+ return ret;
+
+ madera->internal_dcvdd = !external_dcvdd;
+
+ return 0;
+}
+
static struct platform_driver arizona_ldo1_driver = {
.probe = arizona_ldo1_probe,
.remove = arizona_ldo1_remove,
@@ -328,10 +383,36 @@ static struct platform_driver arizona_ldo1_driver = {
},
};
-module_platform_driver(arizona_ldo1_driver);
+static struct platform_driver madera_ldo1_driver = {
+ .probe = madera_ldo1_probe,
+ .remove = arizona_ldo1_remove,
+ .driver = {
+ .name = "madera-ldo1",
+ },
+};
+
+static struct platform_driver * const madera_ldo1_drivers[] = {
+ &arizona_ldo1_driver,
+ &madera_ldo1_driver,
+};
+
+static int __init arizona_ldo1_init(void)
+{
+ return platform_register_drivers(madera_ldo1_drivers,
+ ARRAY_SIZE(madera_ldo1_drivers));
+}
+module_init(arizona_ldo1_init);
+
+static void __exit madera_ldo1_exit(void)
+{
+ platform_unregister_drivers(madera_ldo1_drivers,
+ ARRAY_SIZE(madera_ldo1_drivers));
+}
+module_exit(madera_ldo1_exit);
/* Module information */
MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
MODULE_DESCRIPTION("Arizona LDO1 driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:arizona-ldo1");
+MODULE_ALIAS("platform:madera-ldo1");
diff --git a/drivers/regulator/arizona-micsupp.c b/drivers/regulator/arizona-micsupp.c
index be0d46da51a1..ae1a5de3e57d 100644
--- a/drivers/regulator/arizona-micsupp.c
+++ b/drivers/regulator/arizona-micsupp.c
@@ -16,7 +16,6 @@
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
#include <linux/regulator/of_regulator.h>
-#include <linux/gpio.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <sound/soc.h>
@@ -25,6 +24,10 @@
#include <linux/mfd/arizona/pdata.h>
#include <linux/mfd/arizona/registers.h>
+#include <linux/mfd/madera/core.h>
+#include <linux/mfd/madera/pdata.h>
+#include <linux/mfd/madera/registers.h>
+
#include <linux/regulator/arizona-micsupp.h>
struct arizona_micsupp {
@@ -200,6 +203,28 @@ static const struct regulator_init_data arizona_micsupp_ext_default = {
.num_consumer_supplies = 1,
};
+static const struct regulator_desc madera_micsupp = {
+ .name = "MICVDD",
+ .supply_name = "CPVDD1",
+ .type = REGULATOR_VOLTAGE,
+ .n_voltages = 40,
+ .ops = &arizona_micsupp_ops,
+
+ .vsel_reg = MADERA_LDO2_CONTROL_1,
+ .vsel_mask = MADERA_LDO2_VSEL_MASK,
+ .enable_reg = MADERA_MIC_CHARGE_PUMP_1,
+ .enable_mask = MADERA_CPMIC_ENA,
+ .bypass_reg = MADERA_MIC_CHARGE_PUMP_1,
+ .bypass_mask = MADERA_CPMIC_BYPASS,
+
+ .linear_ranges = arizona_micsupp_ext_ranges,
+ .n_linear_ranges = ARRAY_SIZE(arizona_micsupp_ext_ranges),
+
+ .enable_time = 3000,
+
+ .owner = THIS_MODULE,
+};
+
static int arizona_micsupp_of_get_pdata(struct arizona_micsupp_pdata *pdata,
struct regulator_config *config,
const struct regulator_desc *desc)
@@ -316,6 +341,24 @@ static int arizona_micsupp_probe(struct platform_device *pdev)
&arizona->pdata.micvdd);
}
+static int madera_micsupp_probe(struct platform_device *pdev)
+{
+ struct madera *madera = dev_get_drvdata(pdev->dev.parent);
+ struct arizona_micsupp *micsupp;
+
+ micsupp = devm_kzalloc(&pdev->dev, sizeof(*micsupp), GFP_KERNEL);
+ if (!micsupp)
+ return -ENOMEM;
+
+ micsupp->regmap = madera->regmap;
+ micsupp->dapm = &madera->dapm;
+ micsupp->dev = madera->dev;
+ micsupp->init_data = arizona_micsupp_ext_default;
+
+ return arizona_micsupp_common_init(pdev, micsupp, &madera_micsupp,
+ &madera->pdata.micvdd);
+}
+
static struct platform_driver arizona_micsupp_driver = {
.probe = arizona_micsupp_probe,
.driver = {
@@ -323,10 +366,35 @@ static struct platform_driver arizona_micsupp_driver = {
},
};
-module_platform_driver(arizona_micsupp_driver);
+static struct platform_driver madera_micsupp_driver = {
+ .probe = madera_micsupp_probe,
+ .driver = {
+ .name = "madera-micsupp",
+ },
+};
+
+static struct platform_driver * const arizona_micsupp_drivers[] = {
+ &arizona_micsupp_driver,
+ &madera_micsupp_driver,
+};
+
+static int __init arizona_micsupp_init(void)
+{
+ return platform_register_drivers(arizona_micsupp_drivers,
+ ARRAY_SIZE(arizona_micsupp_drivers));
+}
+module_init(arizona_micsupp_init);
+
+static void __exit arizona_micsupp_exit(void)
+{
+ platform_unregister_drivers(arizona_micsupp_drivers,
+ ARRAY_SIZE(arizona_micsupp_drivers));
+}
+module_exit(arizona_micsupp_exit);
/* Module information */
MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
MODULE_DESCRIPTION("Arizona microphone supply driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:arizona-micsupp");
+MODULE_ALIAS("platform:madera-micsupp");
diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c
index 30e3ed430a8a..0248a61f1006 100644
--- a/drivers/regulator/bd70528-regulator.c
+++ b/drivers/regulator/bd70528-regulator.c
@@ -4,7 +4,6 @@
#include <linux/delay.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/rohm-bd70528.h>
diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index fde4264da6ff..8c22cfb76173 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -4,7 +4,6 @@
#include <linux/delay.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/rohm-bd718x7.h>
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c894cf0d8a28..86ae1825cec1 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1,12 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * core.c -- Voltage/Current Regulator framework.
- *
- * Copyright 2007, 2008 Wolfson Microelectronics PLC.
- * Copyright 2008 SlimLogic Ltd.
- *
- * Author: Liam Girdwood <lrg@slimlogic.co.uk>
- */
+//
+// core.c -- Voltage/Current Regulator framework.
+//
+// Copyright 2007, 2008 Wolfson Microelectronics PLC.
+// Copyright 2008 SlimLogic Ltd.
+//
+// Author: Liam Girdwood <lrg@slimlogic.co.uk>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -23,6 +22,7 @@
#include <linux/regmap.h>
#include <linux/regulator/of_regulator.h>
#include <linux/regulator/consumer.h>
+#include <linux/regulator/coupler.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
#include <linux/module.h>
@@ -50,6 +50,7 @@ static DEFINE_MUTEX(regulator_list_mutex);
static LIST_HEAD(regulator_map_list);
static LIST_HEAD(regulator_ena_gpio_list);
static LIST_HEAD(regulator_supply_alias_list);
+static LIST_HEAD(regulator_coupler_list);
static bool has_full_constraints;
static struct dentry *debugfs_root;
@@ -93,7 +94,6 @@ struct regulator_supply_alias {
static int _regulator_is_enabled(struct regulator_dev *rdev);
static int _regulator_disable(struct regulator *regulator);
-static int _regulator_get_voltage(struct regulator_dev *rdev);
static int _regulator_get_current_limit(struct regulator_dev *rdev);
static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
static int _notifier_call_chain(struct regulator_dev *rdev,
@@ -102,15 +102,12 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
int min_uV, int max_uV);
static int regulator_balance_voltage(struct regulator_dev *rdev,
suspend_state_t state);
-static int regulator_set_voltage_rdev(struct regulator_dev *rdev,
- int min_uV, int max_uV,
- suspend_state_t state);
static struct regulator *create_regulator(struct regulator_dev *rdev,
struct device *dev,
const char *supply_name);
static void _regulator_put(struct regulator *regulator);
-static const char *rdev_get_name(struct regulator_dev *rdev)
+const char *rdev_get_name(struct regulator_dev *rdev)
{
if (rdev->constraints && rdev->constraints->name)
return rdev->constraints->name;
@@ -424,8 +421,8 @@ static struct device_node *of_get_regulator(struct device *dev, const char *supp
}
/* Platform voltage constraint check */
-static int regulator_check_voltage(struct regulator_dev *rdev,
- int *min_uV, int *max_uV)
+int regulator_check_voltage(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV)
{
BUG_ON(*min_uV > *max_uV);
@@ -457,9 +454,9 @@ static int regulator_check_states(suspend_state_t state)
/* Make sure we select a voltage that suits the needs of all
* regulator consumers
*/
-static int regulator_check_consumers(struct regulator_dev *rdev,
- int *min_uV, int *max_uV,
- suspend_state_t state)
+int regulator_check_consumers(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV,
+ suspend_state_t state)
{
struct regulator *regulator;
struct regulator_voltage *voltage;
@@ -570,7 +567,7 @@ static ssize_t regulator_uV_show(struct device *dev,
ssize_t ret;
regulator_lock(rdev);
- ret = sprintf(buf, "%d\n", _regulator_get_voltage(rdev));
+ ret = sprintf(buf, "%d\n", regulator_get_voltage_rdev(rdev));
regulator_unlock(rdev);
return ret;
@@ -941,7 +938,7 @@ static int drms_uA_update(struct regulator_dev *rdev)
rdev_err(rdev, "failed to set load %d\n", current_uA);
} else {
/* get output voltage */
- output_uV = _regulator_get_voltage(rdev);
+ output_uV = regulator_get_voltage_rdev(rdev);
if (output_uV <= 0) {
rdev_err(rdev, "invalid output voltage found\n");
return -EINVAL;
@@ -1054,7 +1051,7 @@ static void print_constraints(struct regulator_dev *rdev)
if (!constraints->min_uV ||
constraints->min_uV != constraints->max_uV) {
- ret = _regulator_get_voltage(rdev);
+ ret = regulator_get_voltage_rdev(rdev);
if (ret > 0)
count += scnprintf(buf + count, len - count,
"at %d mV ", ret / 1000);
@@ -1113,7 +1110,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
if (rdev->constraints->apply_uV &&
rdev->constraints->min_uV && rdev->constraints->max_uV) {
int target_min, target_max;
- int current_uV = _regulator_get_voltage(rdev);
+ int current_uV = regulator_get_voltage_rdev(rdev);
if (current_uV == -ENOTRECOVERABLE) {
/* This regulator can't be read and must be initialized */
@@ -1123,7 +1120,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
_regulator_do_set_voltage(rdev,
rdev->constraints->min_uV,
rdev->constraints->max_uV);
- current_uV = _regulator_get_voltage(rdev);
+ current_uV = regulator_get_voltage_rdev(rdev);
}
if (current_uV < 0) {
@@ -1645,9 +1642,9 @@ static int _regulator_get_enable_time(struct regulator_dev *rdev)
{
if (rdev->constraints && rdev->constraints->enable_time)
return rdev->constraints->enable_time;
- if (!rdev->desc->ops->enable_time)
- return rdev->desc->enable_time;
- return rdev->desc->ops->enable_time(rdev);
+ if (rdev->desc->ops->enable_time)
+ return rdev->desc->ops->enable_time(rdev);
+ return rdev->desc->enable_time;
}
static struct regulator_supply_alias *regulator_find_supply_alias(
@@ -3065,7 +3062,7 @@ static int _regulator_call_set_voltage(struct regulator_dev *rdev,
struct pre_voltage_change_data data;
int ret;
- data.old_uV = _regulator_get_voltage(rdev);
+ data.old_uV = regulator_get_voltage_rdev(rdev);
data.min_uV = min_uV;
data.max_uV = max_uV;
ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
@@ -3089,7 +3086,7 @@ static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev,
struct pre_voltage_change_data data;
int ret;
- data.old_uV = _regulator_get_voltage(rdev);
+ data.old_uV = regulator_get_voltage_rdev(rdev);
data.min_uV = uV;
data.max_uV = uV;
ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
@@ -3107,6 +3104,66 @@ static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev,
return ret;
}
+static int _regulator_set_voltage_sel_step(struct regulator_dev *rdev,
+ int uV, int new_selector)
+{
+ const struct regulator_ops *ops = rdev->desc->ops;
+ int diff, old_sel, curr_sel, ret;
+
+ /* Stepping is only needed if the regulator is enabled. */
+ if (!_regulator_is_enabled(rdev))
+ goto final_set;
+
+ if (!ops->get_voltage_sel)
+ return -EINVAL;
+
+ old_sel = ops->get_voltage_sel(rdev);
+ if (old_sel < 0)
+ return old_sel;
+
+ diff = new_selector - old_sel;
+ if (diff == 0)
+ return 0; /* No change needed. */
+
+ if (diff > 0) {
+ /* Stepping up. */
+ for (curr_sel = old_sel + rdev->desc->vsel_step;
+ curr_sel < new_selector;
+ curr_sel += rdev->desc->vsel_step) {
+ /*
+ * Call the callback directly instead of using
+ * _regulator_call_set_voltage_sel() as we don't
+ * want to notify anyone yet. Same in the branch
+ * below.
+ */
+ ret = ops->set_voltage_sel(rdev, curr_sel);
+ if (ret)
+ goto try_revert;
+ }
+ } else {
+ /* Stepping down. */
+ for (curr_sel = old_sel - rdev->desc->vsel_step;
+ curr_sel > new_selector;
+ curr_sel -= rdev->desc->vsel_step) {
+ ret = ops->set_voltage_sel(rdev, curr_sel);
+ if (ret)
+ goto try_revert;
+ }
+ }
+
+final_set:
+ /* The final selector will trigger the notifiers. */
+ return _regulator_call_set_voltage_sel(rdev, uV, new_selector);
+
+try_revert:
+ /*
+ * At least try to return to the previous voltage if setting a new
+ * one failed.
+ */
+ (void)ops->set_voltage_sel(rdev, old_sel);
+ return ret;
+}
+
static int _regulator_set_voltage_time(struct regulator_dev *rdev,
int old_uV, int new_uV)
{
@@ -3142,7 +3199,7 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
unsigned int selector;
int old_selector = -1;
const struct regulator_ops *ops = rdev->desc->ops;
- int old_uV = _regulator_get_voltage(rdev);
+ int old_uV = regulator_get_voltage_rdev(rdev);
trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV);
@@ -3169,7 +3226,7 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
best_val = ops->list_voltage(rdev,
selector);
else
- best_val = _regulator_get_voltage(rdev);
+ best_val = regulator_get_voltage_rdev(rdev);
}
} else if (ops->set_voltage_sel) {
@@ -3180,6 +3237,9 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
selector = ret;
if (old_selector == selector)
ret = 0;
+ else if (rdev->desc->vsel_step)
+ ret = _regulator_set_voltage_sel_step(
+ rdev, best_val, selector);
else
ret = _regulator_call_set_voltage_sel(
rdev, best_val, selector);
@@ -3288,7 +3348,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator,
* changing the voltage.
*/
if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
- current_uV = _regulator_get_voltage(rdev);
+ current_uV = regulator_get_voltage_rdev(rdev);
if (min_uV <= current_uV && current_uV <= max_uV) {
voltage->min_uV = min_uV;
voltage->max_uV = max_uV;
@@ -3325,8 +3385,8 @@ out:
return ret;
}
-static int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV,
- int max_uV, suspend_state_t state)
+int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV,
+ int max_uV, suspend_state_t state)
{
int best_supply_uV = 0;
int supply_change_uV = 0;
@@ -3354,7 +3414,7 @@ static int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV,
best_supply_uV += rdev->desc->min_dropout_uV;
- current_supply_uV = _regulator_get_voltage(rdev->supply->rdev);
+ current_supply_uV = regulator_get_voltage_rdev(rdev->supply->rdev);
if (current_supply_uV < 0) {
ret = current_supply_uV;
goto out;
@@ -3405,7 +3465,7 @@ static int regulator_limit_voltage_step(struct regulator_dev *rdev,
return 1;
if (*current_uV < 0) {
- *current_uV = _regulator_get_voltage(rdev);
+ *current_uV = regulator_get_voltage_rdev(rdev);
if (*current_uV < 0)
return *current_uV;
@@ -3434,11 +3494,10 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev,
struct coupling_desc *c_desc = &rdev->coupling_desc;
struct regulator_dev **c_rdevs = c_desc->coupled_rdevs;
struct regulation_constraints *constraints = rdev->constraints;
- int max_spread = constraints->max_spread;
int desired_min_uV = 0, desired_max_uV = INT_MAX;
int max_current_uV = 0, min_current_uV = INT_MAX;
int highest_min_uV = 0, target_uV, possible_uV;
- int i, ret;
+ int i, ret, max_spread;
bool done;
*current_uV = -1;
@@ -3492,6 +3551,8 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev,
}
}
+ max_spread = constraints->max_spread[0];
+
/*
* Let target_uV be equal to the desired one if possible.
* If not, set it to minimum voltage, allowed by other coupled
@@ -3509,7 +3570,7 @@ static int regulator_get_optimal_voltage(struct regulator_dev *rdev,
if (!_regulator_is_enabled(c_rdevs[i]))
continue;
- tmp_act = _regulator_get_voltage(c_rdevs[i]);
+ tmp_act = regulator_get_voltage_rdev(c_rdevs[i]);
if (tmp_act < 0)
return tmp_act;
@@ -3551,7 +3612,7 @@ finish:
if (n_coupled > 1 && *current_uV == -1) {
if (_regulator_is_enabled(rdev)) {
- ret = _regulator_get_voltage(rdev);
+ ret = regulator_get_voltage_rdev(rdev);
if (ret < 0)
return ret;
@@ -3573,9 +3634,11 @@ static int regulator_balance_voltage(struct regulator_dev *rdev,
struct regulator_dev **c_rdevs;
struct regulator_dev *best_rdev;
struct coupling_desc *c_desc = &rdev->coupling_desc;
+ struct regulator_coupler *coupler = c_desc->coupler;
int i, ret, n_coupled, best_min_uV, best_max_uV, best_c_rdev;
- bool best_c_rdev_done, c_rdev_done[MAX_COUPLED];
unsigned int delta, best_delta;
+ unsigned long c_rdev_done = 0;
+ bool best_c_rdev_done;
c_rdevs = c_desc->coupled_rdevs;
n_coupled = c_desc->n_coupled;
@@ -3592,8 +3655,9 @@ static int regulator_balance_voltage(struct regulator_dev *rdev,
return -EPERM;
}
- for (i = 0; i < n_coupled; i++)
- c_rdev_done[i] = false;
+ /* Invoke custom balancer for customized couplers */
+ if (coupler && coupler->balance_voltage)
+ return coupler->balance_voltage(coupler, rdev, state);
/*
* Find the best possible voltage change on each loop. Leave the loop
@@ -3620,7 +3684,7 @@ static int regulator_balance_voltage(struct regulator_dev *rdev,
*/
int optimal_uV = 0, optimal_max_uV = 0, current_uV = 0;
- if (c_rdev_done[i])
+ if (test_bit(i, &c_rdev_done))
continue;
ret = regulator_get_optimal_voltage(c_rdevs[i],
@@ -3655,7 +3719,8 @@ static int regulator_balance_voltage(struct regulator_dev *rdev,
if (ret < 0)
goto out;
- c_rdev_done[best_c_rdev] = best_c_rdev_done;
+ if (best_c_rdev_done)
+ set_bit(best_c_rdev, &c_rdev_done);
} while (n_coupled > 1);
@@ -3911,7 +3976,7 @@ out:
}
EXPORT_SYMBOL_GPL(regulator_sync_voltage);
-static int _regulator_get_voltage(struct regulator_dev *rdev)
+int regulator_get_voltage_rdev(struct regulator_dev *rdev)
{
int sel, ret;
bool bypassed;
@@ -3928,7 +3993,7 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
return -EPROBE_DEFER;
}
- return _regulator_get_voltage(rdev->supply->rdev);
+ return regulator_get_voltage_rdev(rdev->supply->rdev);
}
}
@@ -3944,7 +4009,7 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
} else if (rdev->desc->fixed_uV && (rdev->desc->n_voltages == 1)) {
ret = rdev->desc->fixed_uV;
} else if (rdev->supply) {
- ret = _regulator_get_voltage(rdev->supply->rdev);
+ ret = regulator_get_voltage_rdev(rdev->supply->rdev);
} else {
return -EINVAL;
}
@@ -3969,7 +4034,7 @@ int regulator_get_voltage(struct regulator *regulator)
int ret;
regulator_lock_dependent(regulator->rdev, &ww_ctx);
- ret = _regulator_get_voltage(regulator->rdev);
+ ret = regulator_get_voltage_rdev(regulator->rdev);
regulator_unlock_dependent(regulator->rdev, &ww_ctx);
return ret;
@@ -4707,8 +4772,60 @@ static int regulator_register_resolve_supply(struct device *dev, void *data)
return 0;
}
+int regulator_coupler_register(struct regulator_coupler *coupler)
+{
+ mutex_lock(&regulator_list_mutex);
+ list_add_tail(&coupler->list, &regulator_coupler_list);
+ mutex_unlock(&regulator_list_mutex);
+
+ return 0;
+}
+
+static struct regulator_coupler *
+regulator_find_coupler(struct regulator_dev *rdev)
+{
+ struct regulator_coupler *coupler;
+ int err;
+
+ /*
+ * Note that regulators are appended to the list and the generic
+ * coupler is registered first, hence it will be attached at last
+ * if nobody cared.
+ */
+ list_for_each_entry_reverse(coupler, &regulator_coupler_list, list) {
+ err = coupler->attach_regulator(coupler, rdev);
+ if (!err) {
+ if (!coupler->balance_voltage &&
+ rdev->coupling_desc.n_coupled > 2)
+ goto err_unsupported;
+
+ return coupler;
+ }
+
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (err == 1)
+ continue;
+
+ break;
+ }
+
+ return ERR_PTR(-EINVAL);
+
+err_unsupported:
+ if (coupler->detach_regulator)
+ coupler->detach_regulator(coupler, rdev);
+
+ rdev_err(rdev,
+ "Voltage balancing for multiple regulator couples is unimplemented\n");
+
+ return ERR_PTR(-EPERM);
+}
+
static void regulator_resolve_coupling(struct regulator_dev *rdev)
{
+ struct regulator_coupler *coupler = rdev->coupling_desc.coupler;
struct coupling_desc *c_desc = &rdev->coupling_desc;
int n_coupled = c_desc->n_coupled;
struct regulator_dev *c_rdev;
@@ -4724,6 +4841,12 @@ static void regulator_resolve_coupling(struct regulator_dev *rdev)
if (!c_rdev)
continue;
+ if (c_rdev->coupling_desc.coupler != coupler) {
+ rdev_err(rdev, "coupler mismatch with %s\n",
+ rdev_get_name(c_rdev));
+ return;
+ }
+
regulator_lock(c_rdev);
c_desc->coupled_rdevs[i] = c_rdev;
@@ -4737,10 +4860,12 @@ static void regulator_resolve_coupling(struct regulator_dev *rdev)
static void regulator_remove_coupling(struct regulator_dev *rdev)
{
+ struct regulator_coupler *coupler = rdev->coupling_desc.coupler;
struct coupling_desc *__c_desc, *c_desc = &rdev->coupling_desc;
struct regulator_dev *__c_rdev, *c_rdev;
unsigned int __n_coupled, n_coupled;
int i, k;
+ int err;
n_coupled = c_desc->n_coupled;
@@ -4770,21 +4895,33 @@ static void regulator_remove_coupling(struct regulator_dev *rdev)
c_desc->coupled_rdevs[i] = NULL;
c_desc->n_resolved--;
}
+
+ if (coupler && coupler->detach_regulator) {
+ err = coupler->detach_regulator(coupler, rdev);
+ if (err)
+ rdev_err(rdev, "failed to detach from coupler: %d\n",
+ err);
+ }
+
+ kfree(rdev->coupling_desc.coupled_rdevs);
+ rdev->coupling_desc.coupled_rdevs = NULL;
}
static int regulator_init_coupling(struct regulator_dev *rdev)
{
- int n_phandles;
+ int err, n_phandles;
+ size_t alloc_size;
if (!IS_ENABLED(CONFIG_OF))
n_phandles = 0;
else
n_phandles = of_get_n_coupled(rdev);
- if (n_phandles + 1 > MAX_COUPLED) {
- rdev_err(rdev, "too many regulators coupled\n");
- return -EPERM;
- }
+ alloc_size = sizeof(*rdev) * (n_phandles + 1);
+
+ rdev->coupling_desc.coupled_rdevs = kzalloc(alloc_size, GFP_KERNEL);
+ if (!rdev->coupling_desc.coupled_rdevs)
+ return -ENOMEM;
/*
* Every regulator should always have coupling descriptor filled with
@@ -4798,23 +4935,35 @@ static int regulator_init_coupling(struct regulator_dev *rdev)
if (n_phandles == 0)
return 0;
- /* regulator, which can't change its voltage, can't be coupled */
- if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
- rdev_err(rdev, "voltage operation not allowed\n");
+ if (!of_check_coupling_data(rdev))
return -EPERM;
- }
- if (rdev->constraints->max_spread <= 0) {
- rdev_err(rdev, "wrong max_spread value\n");
- return -EPERM;
+ rdev->coupling_desc.coupler = regulator_find_coupler(rdev);
+ if (IS_ERR(rdev->coupling_desc.coupler)) {
+ err = PTR_ERR(rdev->coupling_desc.coupler);
+ rdev_err(rdev, "failed to get coupler: %d\n", err);
+ return err;
}
- if (!of_check_coupling_data(rdev))
+ return 0;
+}
+
+static int generic_coupler_attach(struct regulator_coupler *coupler,
+ struct regulator_dev *rdev)
+{
+ if (rdev->coupling_desc.n_coupled > 2) {
+ rdev_err(rdev,
+ "Voltage balancing for multiple regulator couples is unimplemented\n");
return -EPERM;
+ }
return 0;
}
+static struct regulator_coupler generic_regulator_coupler = {
+ .attach_regulator = generic_coupler_attach,
+};
+
/**
* regulator_register - register regulator
* @regulator_desc: regulator to register
@@ -4976,7 +5125,9 @@ regulator_register(const struct regulator_desc *regulator_desc,
if (ret < 0)
goto wash;
+ mutex_lock(&regulator_list_mutex);
ret = regulator_init_coupling(rdev);
+ mutex_unlock(&regulator_list_mutex);
if (ret < 0)
goto wash;
@@ -5025,6 +5176,7 @@ regulator_register(const struct regulator_desc *regulator_desc,
unset_supplies:
mutex_lock(&regulator_list_mutex);
unset_regulator_supplies(rdev);
+ regulator_remove_coupling(rdev);
mutex_unlock(&regulator_list_mutex);
wash:
kfree(rdev->constraints);
@@ -5278,7 +5430,7 @@ static void regulator_summary_show_subtree(struct seq_file *s,
rdev->use_count, rdev->open_count, rdev->bypass_count,
regulator_opmode_to_str(opmode));
- seq_printf(s, "%5dmV ", _regulator_get_voltage(rdev) / 1000);
+ seq_printf(s, "%5dmV ", regulator_get_voltage_rdev(rdev) / 1000);
seq_printf(s, "%5dmA ",
_regulator_get_current_limit_unlocked(rdev) / 1000);
@@ -5480,6 +5632,8 @@ static int __init regulator_init(void)
#endif
regulator_dummy_init();
+ regulator_coupler_register(&generic_regulator_coupler);
+
return ret;
}
diff --git a/drivers/regulator/cpcap-regulator.c b/drivers/regulator/cpcap-regulator.c
index d3284361e594..f80781d58a28 100644
--- a/drivers/regulator/cpcap-regulator.c
+++ b/drivers/regulator/cpcap-regulator.c
@@ -90,7 +90,7 @@
#define CPCAP_REG_OFF_MODE_SEC BIT(15)
/**
- * SoC specific configuraion for CPCAP regulator. There are at least three
+ * SoC specific configuration for CPCAP regulator. There are at least three
* different SoCs each with their own parameters: omap3, omap4 and tegra2.
*
* The assign_reg and assign_mask seem to allow toggling between primary
diff --git a/drivers/regulator/da9062-regulator.c b/drivers/regulator/da9062-regulator.c
index a02e0488410f..2ffc64622451 100644
--- a/drivers/regulator/da9062-regulator.c
+++ b/drivers/regulator/da9062-regulator.c
@@ -493,12 +493,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO1_CONT,
.desc.enable_mask = DA9062AA_LDO1_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO1_A,
.desc.vsel_mask = DA9062AA_VLDO1_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO1_A,
__builtin_ffs((int)DA9062AA_LDO1_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -525,12 +526,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (600))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO2_CONT,
.desc.enable_mask = DA9062AA_LDO2_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO2_A,
.desc.vsel_mask = DA9062AA_VLDO2_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO2_A,
__builtin_ffs((int)DA9062AA_LDO2_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -557,12 +559,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO3_CONT,
.desc.enable_mask = DA9062AA_LDO3_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO3_A,
.desc.vsel_mask = DA9062AA_VLDO3_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO3_A,
__builtin_ffs((int)DA9062AA_LDO3_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -589,12 +592,13 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO4_CONT,
.desc.enable_mask = DA9062AA_LDO4_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO4_A,
.desc.vsel_mask = DA9062AA_VLDO4_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO4_A,
__builtin_ffs((int)DA9062AA_LDO4_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -769,12 +773,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO1_CONT,
.desc.enable_mask = DA9062AA_LDO1_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO1_A,
.desc.vsel_mask = DA9062AA_VLDO1_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO1_A,
__builtin_ffs((int)DA9062AA_LDO1_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -801,12 +806,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (600))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO2_CONT,
.desc.enable_mask = DA9062AA_LDO2_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO2_A,
.desc.vsel_mask = DA9062AA_VLDO2_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO2_A,
__builtin_ffs((int)DA9062AA_LDO2_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -833,12 +839,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO3_CONT,
.desc.enable_mask = DA9062AA_LDO3_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO3_A,
.desc.vsel_mask = DA9062AA_VLDO3_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO3_A,
__builtin_ffs((int)DA9062AA_LDO3_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -865,12 +872,13 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
.desc.ops = &da9062_ldo_ops,
.desc.min_uV = (900) * 1000,
.desc.uV_step = (50) * 1000,
- .desc.n_voltages = ((3600) - (900))/(50) + 1,
+ .desc.n_voltages = ((3600) - (900))/(50) + 1
+ + DA9062AA_VLDO_A_MIN_SEL,
.desc.enable_reg = DA9062AA_LDO4_CONT,
.desc.enable_mask = DA9062AA_LDO4_EN_MASK,
.desc.vsel_reg = DA9062AA_VLDO4_A,
.desc.vsel_mask = DA9062AA_VLDO4_A_MASK,
- .desc.linear_min_sel = 0,
+ .desc.linear_min_sel = DA9062AA_VLDO_A_MIN_SEL,
.sleep = REG_FIELD(DA9062AA_VLDO4_A,
__builtin_ffs((int)DA9062AA_LDO4_SL_A_MASK) - 1,
sizeof(unsigned int) * 8 -
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index 6f9ce1a6e44d..02f816318fba 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -19,7 +19,6 @@
#include <linux/regulator/machine.h>
#include <linux/regulator/of_regulator.h>
#include <linux/mfd/da9063/core.h>
-#include <linux/mfd/da9063/pdata.h>
#include <linux/mfd/da9063/registers.h>
@@ -28,6 +27,49 @@
REG_FIELD(_reg, __builtin_ffs((int)_mask) - 1, \
sizeof(unsigned int) * 8 - __builtin_clz((_mask)) - 1)
+/* DA9063 and DA9063L regulator IDs */
+enum {
+ /* BUCKs */
+ DA9063_ID_BCORE1,
+ DA9063_ID_BCORE2,
+ DA9063_ID_BPRO,
+ DA9063_ID_BMEM,
+ DA9063_ID_BIO,
+ DA9063_ID_BPERI,
+
+ /* BCORE1 and BCORE2 in merged mode */
+ DA9063_ID_BCORES_MERGED,
+ /* BMEM and BIO in merged mode */
+ DA9063_ID_BMEM_BIO_MERGED,
+ /* When two BUCKs are merged, they cannot be reused separately */
+
+ /* LDOs on both DA9063 and DA9063L */
+ DA9063_ID_LDO3,
+ DA9063_ID_LDO7,
+ DA9063_ID_LDO8,
+ DA9063_ID_LDO9,
+ DA9063_ID_LDO11,
+
+ /* DA9063-only LDOs */
+ DA9063_ID_LDO1,
+ DA9063_ID_LDO2,
+ DA9063_ID_LDO4,
+ DA9063_ID_LDO5,
+ DA9063_ID_LDO6,
+ DA9063_ID_LDO10,
+};
+
+/* Old regulator platform data */
+struct da9063_regulator_data {
+ int id;
+ struct regulator_init_data *initdata;
+};
+
+struct da9063_regulators_pdata {
+ unsigned n_regulators;
+ struct da9063_regulator_data *regulator_data;
+};
+
/* Regulator capabilities and registers description */
struct da9063_regulator_info {
struct regulator_desc desc;
@@ -592,7 +634,6 @@ static const struct regulator_init_data *da9063_get_regulator_initdata(
return NULL;
}
-#ifdef CONFIG_OF
static struct of_regulator_match da9063_matches[] = {
[DA9063_ID_BCORE1] = { .name = "bcore1" },
[DA9063_ID_BCORE2] = { .name = "bcore2" },
@@ -670,20 +711,10 @@ static struct da9063_regulators_pdata *da9063_parse_regulators_dt(
*da9063_reg_matches = da9063_matches;
return pdata;
}
-#else
-static struct da9063_regulators_pdata *da9063_parse_regulators_dt(
- struct platform_device *pdev,
- struct of_regulator_match **da9063_reg_matches)
-{
- *da9063_reg_matches = NULL;
- return ERR_PTR(-ENODEV);
-}
-#endif
static int da9063_regulator_probe(struct platform_device *pdev)
{
struct da9063 *da9063 = dev_get_drvdata(pdev->dev.parent);
- struct da9063_pdata *da9063_pdata = dev_get_platdata(da9063->dev);
struct of_regulator_match *da9063_reg_matches = NULL;
struct da9063_regulators_pdata *regl_pdata;
const struct da9063_dev_model *model;
@@ -693,11 +724,7 @@ static int da9063_regulator_probe(struct platform_device *pdev)
bool bcores_merged, bmem_bio_merged;
int id, irq, n, n_regulators, ret, val;
- regl_pdata = da9063_pdata ? da9063_pdata->regulators_pdata : NULL;
-
- if (!regl_pdata)
- regl_pdata = da9063_parse_regulators_dt(pdev,
- &da9063_reg_matches);
+ regl_pdata = da9063_parse_regulators_dt(pdev, &da9063_reg_matches);
if (IS_ERR(regl_pdata) || regl_pdata->n_regulators == 0) {
dev_err(&pdev->dev,
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index da37b4ccd834..0309823d2c72 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -289,6 +289,8 @@ static struct da9211_pdata *da9211_parse_regulators_dt(
0,
GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE,
"da9211-enable");
+ if (IS_ERR(pdata->gpiod_ren[n]))
+ pdata->gpiod_ren[n] = NULL;
n++;
}
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index b9ae45d2d199..4986cc5064a1 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -1,10 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * helpers.c -- Voltage/Current Regulator framework helper functions.
- *
- * Copyright 2007, 2008 Wolfson Microelectronics PLC.
- * Copyright 2008 SlimLogic Ltd.
- */
+//
+// helpers.c -- Voltage/Current Regulator framework helper functions.
+//
+// Copyright 2007, 2008 Wolfson Microelectronics PLC.
+// Copyright 2008 SlimLogic Ltd.
#include <linux/kernel.h>
#include <linux/err.h>
diff --git a/drivers/regulator/lm363x-regulator.c b/drivers/regulator/lm363x-regulator.c
index 60f15a722760..5647e2f97ff8 100644
--- a/drivers/regulator/lm363x-regulator.c
+++ b/drivers/regulator/lm363x-regulator.c
@@ -34,6 +34,11 @@
#define LM3632_VBOOST_MIN 4500000
#define LM3632_VLDO_MIN 4000000
+/* LM36274 */
+#define LM36274_BOOST_VSEL_MAX 0x3f
+#define LM36274_LDO_VSEL_MAX 0x34
+#define LM36274_VOLTAGE_MIN 4000000
+
/* Common */
#define LM363X_STEP_50mV 50000
#define LM363X_STEP_500mV 500000
@@ -214,6 +219,51 @@ static const struct regulator_desc lm363x_regulator_desc[] = {
.enable_reg = LM3632_REG_BIAS_CONFIG,
.enable_mask = LM3632_EN_VNEG_MASK,
},
+
+ /* LM36274 */
+ {
+ .name = "vboost",
+ .of_match = "vboost",
+ .id = LM36274_BOOST,
+ .ops = &lm363x_boost_voltage_table_ops,
+ .n_voltages = LM36274_BOOST_VSEL_MAX,
+ .min_uV = LM36274_VOLTAGE_MIN,
+ .uV_step = LM363X_STEP_50mV,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+ .vsel_reg = LM36274_REG_VOUT_BOOST,
+ .vsel_mask = LM36274_VOUT_MASK,
+ },
+ {
+ .name = "ldo_vpos",
+ .of_match = "vpos",
+ .id = LM36274_LDO_POS,
+ .ops = &lm363x_regulator_voltage_table_ops,
+ .n_voltages = LM36274_LDO_VSEL_MAX,
+ .min_uV = LM36274_VOLTAGE_MIN,
+ .uV_step = LM363X_STEP_50mV,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+ .vsel_reg = LM36274_REG_VOUT_POS,
+ .vsel_mask = LM36274_VOUT_MASK,
+ .enable_reg = LM36274_REG_BIAS_CONFIG_1,
+ .enable_mask = LM36274_EN_VPOS_MASK,
+ },
+ {
+ .name = "ldo_vneg",
+ .of_match = "vneg",
+ .id = LM36274_LDO_NEG,
+ .ops = &lm363x_regulator_voltage_table_ops,
+ .n_voltages = LM36274_LDO_VSEL_MAX,
+ .min_uV = LM36274_VOLTAGE_MIN,
+ .uV_step = LM363X_STEP_50mV,
+ .type = REGULATOR_VOLTAGE,
+ .owner = THIS_MODULE,
+ .vsel_reg = LM36274_REG_VOUT_NEG,
+ .vsel_mask = LM36274_VOUT_MASK,
+ .enable_reg = LM36274_REG_BIAS_CONFIG_1,
+ .enable_mask = LM36274_EN_VNEG_MASK,
+ },
};
static struct gpio_desc *lm363x_regulator_of_get_enable_gpio(struct device *dev, int id)
@@ -226,9 +276,11 @@ static struct gpio_desc *lm363x_regulator_of_get_enable_gpio(struct device *dev,
*/
switch (id) {
case LM3632_LDO_POS:
+ case LM36274_LDO_POS:
return gpiod_get_index_optional(dev, "enable", 0,
GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE);
case LM3632_LDO_NEG:
+ case LM36274_LDO_NEG:
return gpiod_get_index_optional(dev, "enable", 1,
GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE);
default:
@@ -236,6 +288,27 @@ static struct gpio_desc *lm363x_regulator_of_get_enable_gpio(struct device *dev,
}
}
+static int lm363x_regulator_set_ext_en(struct regmap *regmap, int id)
+{
+ int ext_en_mask = 0;
+
+ switch (id) {
+ case LM3632_LDO_POS:
+ case LM3632_LDO_NEG:
+ ext_en_mask = LM3632_EXT_EN_MASK;
+ break;
+ case LM36274_LDO_POS:
+ case LM36274_LDO_NEG:
+ ext_en_mask = LM36274_EXT_EN_MASK;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ return regmap_update_bits(regmap, lm363x_regulator_desc[id].enable_reg,
+ ext_en_mask, ext_en_mask);
+}
+
static int lm363x_regulator_probe(struct platform_device *pdev)
{
struct ti_lmu *lmu = dev_get_drvdata(pdev->dev.parent);
@@ -260,10 +333,7 @@ static int lm363x_regulator_probe(struct platform_device *pdev)
if (gpiod) {
cfg.ena_gpiod = gpiod;
-
- ret = regmap_update_bits(regmap, LM3632_REG_BIAS_CONFIG,
- LM3632_EXT_EN_MASK,
- LM3632_EXT_EN_MASK);
+ ret = lm363x_regulator_set_ext_en(regmap, id);
if (ret) {
gpiod_put(gpiod);
dev_err(dev, "External pin err: %d\n", ret);
diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 0db367b54ae7..8d9731e4052b 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -467,7 +467,7 @@ static int max77620_regulator_is_enabled(struct regulator_dev *rdev)
{
struct max77620_regulator *pmic = rdev_get_drvdata(rdev);
int id = rdev_get_id(rdev);
- int ret = 1;
+ int ret;
if (pmic->active_fps_src[id] != MAX77620_FPS_SRC_NONE)
return 1;
@@ -758,6 +758,24 @@ static struct max77620_regulator_info max20024_regs_info[MAX77620_NUM_REGS] = {
RAIL_LDO(LDO8, ldo8, "in-ldo7-8", N, 800000, 3950000, 50000),
};
+static struct max77620_regulator_info max77663_regs_info[MAX77620_NUM_REGS] = {
+ RAIL_SD(SD0, sd0, "in-sd0", SD0, 600000, 3387500, 12500, 0xFF, NONE),
+ RAIL_SD(SD1, sd1, "in-sd1", SD1, 800000, 1587500, 12500, 0xFF, NONE),
+ RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE),
+ RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE),
+ RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE),
+
+ RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000),
+ RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000),
+ RAIL_LDO(LDO2, ldo2, "in-ldo2", P, 800000, 3950000, 50000),
+ RAIL_LDO(LDO3, ldo3, "in-ldo3-5", P, 800000, 3950000, 50000),
+ RAIL_LDO(LDO4, ldo4, "in-ldo4-6", P, 800000, 1587500, 12500),
+ RAIL_LDO(LDO5, ldo5, "in-ldo3-5", P, 800000, 3950000, 50000),
+ RAIL_LDO(LDO6, ldo6, "in-ldo4-6", P, 800000, 3950000, 50000),
+ RAIL_LDO(LDO7, ldo7, "in-ldo7-8", N, 800000, 3950000, 50000),
+ RAIL_LDO(LDO8, ldo8, "in-ldo7-8", N, 800000, 3950000, 50000),
+};
+
static int max77620_regulator_probe(struct platform_device *pdev)
{
struct max77620_chip *max77620_chip = dev_get_drvdata(pdev->dev.parent);
@@ -782,9 +800,14 @@ static int max77620_regulator_probe(struct platform_device *pdev)
case MAX77620:
rinfo = max77620_regs_info;
break;
- default:
+ case MAX20024:
rinfo = max20024_regs_info;
break;
+ case MAX77663:
+ rinfo = max77663_regs_info;
+ break;
+ default:
+ return -EINVAL;
}
config.regmap = pmic->rmap;
@@ -878,6 +901,7 @@ static const struct dev_pm_ops max77620_regulator_pm_ops = {
static const struct platform_device_id max77620_regulator_devtype[] = {
{ .name = "max77620-pmic", },
{ .name = "max20024-pmic", },
+ { .name = "max77663-pmic", },
{},
};
MODULE_DEVICE_TABLE(platform, max77620_regulator_devtype);
diff --git a/drivers/regulator/max77650-regulator.c b/drivers/regulator/max77650-regulator.c
index 5c4f86c98510..e57fc9197d62 100644
--- a/drivers/regulator/max77650-regulator.c
+++ b/drivers/regulator/max77650-regulator.c
@@ -20,6 +20,8 @@
#define MAX77650_REGULATOR_V_LDO_MASK GENMASK(6, 0)
#define MAX77650_REGULATOR_V_SBB_MASK GENMASK(5, 0)
+#define MAX77651_REGULATOR_V_SBB1_MASK GENMASK(5, 2)
+#define MAX77651_REGULATOR_V_SBB1_RANGE_MASK GENMASK(1, 0)
#define MAX77650_REGULATOR_AD_MASK BIT(3)
#define MAX77650_REGULATOR_AD_DISABLED 0x00
@@ -41,43 +43,22 @@ struct max77650_regulator_desc {
unsigned int regB;
};
-static const unsigned int max77651_sbb1_regulator_volt_table[] = {
- 2400000, 3200000, 4000000, 4800000,
- 2450000, 3250000, 4050000, 4850000,
- 2500000, 3300000, 4100000, 4900000,
- 2550000, 3350000, 4150000, 4950000,
- 2600000, 3400000, 4200000, 5000000,
- 2650000, 3450000, 4250000, 5050000,
- 2700000, 3500000, 4300000, 5100000,
- 2750000, 3550000, 4350000, 5150000,
- 2800000, 3600000, 4400000, 5200000,
- 2850000, 3650000, 4450000, 5250000,
- 2900000, 3700000, 4500000, 0,
- 2950000, 3750000, 4550000, 0,
- 3000000, 3800000, 4600000, 0,
- 3050000, 3850000, 4650000, 0,
- 3100000, 3900000, 4700000, 0,
- 3150000, 3950000, 4750000, 0,
+static struct max77650_regulator_desc max77651_SBB1_desc;
+
+static const unsigned int max77651_sbb1_volt_range_sel[] = {
+ 0x0, 0x1, 0x2, 0x3
};
-#define MAX77651_REGULATOR_SBB1_SEL_DEC(_val) \
- (((_val & 0x3c) >> 2) | ((_val & 0x03) << 4))
-#define MAX77651_REGULATOR_SBB1_SEL_ENC(_val) \
- (((_val & 0x30) >> 4) | ((_val & 0x0f) << 2))
-
-#define MAX77650_REGULATOR_SBB1_SEL_DECR(_val) \
- do { \
- _val = MAX77651_REGULATOR_SBB1_SEL_DEC(_val); \
- _val--; \
- _val = MAX77651_REGULATOR_SBB1_SEL_ENC(_val); \
- } while (0)
-
-#define MAX77650_REGULATOR_SBB1_SEL_INCR(_val) \
- do { \
- _val = MAX77651_REGULATOR_SBB1_SEL_DEC(_val); \
- _val++; \
- _val = MAX77651_REGULATOR_SBB1_SEL_ENC(_val); \
- } while (0)
+static const struct regulator_linear_range max77651_sbb1_volt_ranges[] = {
+ /* range index 0 */
+ REGULATOR_LINEAR_RANGE(2400000, 0x00, 0x0f, 50000),
+ /* range index 1 */
+ REGULATOR_LINEAR_RANGE(3200000, 0x00, 0x0f, 50000),
+ /* range index 2 */
+ REGULATOR_LINEAR_RANGE(4000000, 0x00, 0x0f, 50000),
+ /* range index 3 */
+ REGULATOR_LINEAR_RANGE(4800000, 0x00, 0x09, 50000),
+};
static const unsigned int max77650_current_limit_table[] = {
1000000, 866000, 707000, 500000,
@@ -127,96 +108,6 @@ static int max77650_regulator_disable(struct regulator_dev *rdev)
MAX77650_REGULATOR_DISABLED);
}
-static int max77650_regulator_set_voltage_sel(struct regulator_dev *rdev,
- unsigned int sel)
-{
- int rv = 0, curr, diff;
- bool ascending;
-
- /*
- * If the regulator is disabled, we can program the desired
- * voltage right away.
- */
- if (!max77650_regulator_is_enabled(rdev))
- return regulator_set_voltage_sel_regmap(rdev, sel);
-
- /*
- * Otherwise we need to manually ramp the output voltage up/down
- * one step at a time.
- */
-
- curr = regulator_get_voltage_sel_regmap(rdev);
- if (curr < 0)
- return curr;
-
- diff = curr - sel;
- if (diff == 0)
- return 0; /* Already there. */
- else if (diff > 0)
- ascending = false;
- else
- ascending = true;
-
- /*
- * Make sure we'll get to the right voltage and break the loop even if
- * the selector equals 0.
- */
- for (ascending ? curr++ : curr--;; ascending ? curr++ : curr--) {
- rv = regulator_set_voltage_sel_regmap(rdev, curr);
- if (rv)
- return rv;
-
- if (curr == sel)
- break;
- }
-
- return 0;
-}
-
-/*
- * Special case: non-linear voltage table for max77651 SBB1 - software
- * must ensure the voltage is ramped in 50mV increments.
- */
-static int max77651_regulator_sbb1_set_voltage_sel(struct regulator_dev *rdev,
- unsigned int sel)
-{
- int rv = 0, curr, vcurr, vdest, vdiff;
-
- /*
- * If the regulator is disabled, we can program the desired
- * voltage right away.
- */
- if (!max77650_regulator_is_enabled(rdev))
- return regulator_set_voltage_sel_regmap(rdev, sel);
-
- curr = regulator_get_voltage_sel_regmap(rdev);
- if (curr < 0)
- return curr;
-
- if (curr == sel)
- return 0; /* Already there. */
-
- vcurr = max77651_sbb1_regulator_volt_table[curr];
- vdest = max77651_sbb1_regulator_volt_table[sel];
- vdiff = vcurr - vdest;
-
- for (;;) {
- if (vdiff > 0)
- MAX77650_REGULATOR_SBB1_SEL_DECR(curr);
- else
- MAX77650_REGULATOR_SBB1_SEL_INCR(curr);
-
- rv = regulator_set_voltage_sel_regmap(rdev, curr);
- if (rv)
- return rv;
-
- if (curr == sel)
- break;
- };
-
- return 0;
-}
-
static const struct regulator_ops max77650_regulator_LDO_ops = {
.is_enabled = max77650_regulator_is_enabled,
.enable = max77650_regulator_enable,
@@ -224,7 +115,7 @@ static const struct regulator_ops max77650_regulator_LDO_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
- .set_voltage_sel = max77650_regulator_set_voltage_sel,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_active_discharge = regulator_set_active_discharge_regmap,
};
@@ -235,20 +126,20 @@ static const struct regulator_ops max77650_regulator_SBB_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
- .set_voltage_sel = max77650_regulator_set_voltage_sel,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
.get_current_limit = regulator_get_current_limit_regmap,
.set_current_limit = regulator_set_current_limit_regmap,
.set_active_discharge = regulator_set_active_discharge_regmap,
};
-/* Special case for max77651 SBB1 - non-linear voltage mapping. */
+/* Special case for max77651 SBB1 - pickable linear-range voltage mapping. */
static const struct regulator_ops max77651_SBB1_regulator_ops = {
.is_enabled = max77650_regulator_is_enabled,
.enable = max77650_regulator_enable,
.disable = max77650_regulator_disable,
- .list_voltage = regulator_list_voltage_table,
- .get_voltage_sel = regulator_get_voltage_sel_regmap,
- .set_voltage_sel = max77651_regulator_sbb1_set_voltage_sel,
+ .list_voltage = regulator_list_voltage_pickable_linear_range,
+ .get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_pickable_regmap,
.get_current_limit = regulator_get_current_limit_regmap,
.set_current_limit = regulator_set_current_limit_regmap,
.set_active_discharge = regulator_set_active_discharge_regmap,
@@ -265,6 +156,7 @@ static struct max77650_regulator_desc max77650_LDO_desc = {
.min_uV = 1350000,
.uV_step = 12500,
.n_voltages = 128,
+ .vsel_step = 1,
.vsel_mask = MAX77650_REGULATOR_V_LDO_MASK,
.vsel_reg = MAX77650_REG_CNFG_LDO_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
@@ -290,6 +182,7 @@ static struct max77650_regulator_desc max77650_SBB0_desc = {
.min_uV = 800000,
.uV_step = 25000,
.n_voltages = 64,
+ .vsel_step = 1,
.vsel_mask = MAX77650_REGULATOR_V_SBB_MASK,
.vsel_reg = MAX77650_REG_CNFG_SBB0_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
@@ -319,6 +212,7 @@ static struct max77650_regulator_desc max77650_SBB1_desc = {
.min_uV = 800000,
.uV_step = 12500,
.n_voltages = 64,
+ .vsel_step = 1,
.vsel_mask = MAX77650_REGULATOR_V_SBB_MASK,
.vsel_reg = MAX77650_REG_CNFG_SBB1_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
@@ -345,9 +239,14 @@ static struct max77650_regulator_desc max77651_SBB1_desc = {
.supply_name = "in-sbb1",
.id = MAX77650_REGULATOR_ID_SBB1,
.ops = &max77651_SBB1_regulator_ops,
- .volt_table = max77651_sbb1_regulator_volt_table,
- .n_voltages = ARRAY_SIZE(max77651_sbb1_regulator_volt_table),
- .vsel_mask = MAX77650_REGULATOR_V_SBB_MASK,
+ .linear_range_selectors = max77651_sbb1_volt_range_sel,
+ .linear_ranges = max77651_sbb1_volt_ranges,
+ .n_linear_ranges = ARRAY_SIZE(max77651_sbb1_volt_ranges),
+ .n_voltages = 58,
+ .vsel_step = 1,
+ .vsel_range_mask = MAX77651_REGULATOR_V_SBB1_RANGE_MASK,
+ .vsel_range_reg = MAX77650_REG_CNFG_SBB1_A,
+ .vsel_mask = MAX77651_REGULATOR_V_SBB1_MASK,
.vsel_reg = MAX77650_REG_CNFG_SBB1_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
.active_discharge_on = MAX77650_REGULATOR_AD_ENABLED,
@@ -376,6 +275,7 @@ static struct max77650_regulator_desc max77650_SBB2_desc = {
.min_uV = 800000,
.uV_step = 50000,
.n_voltages = 64,
+ .vsel_step = 1,
.vsel_mask = MAX77650_REGULATOR_V_SBB_MASK,
.vsel_reg = MAX77650_REG_CNFG_SBB2_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
@@ -405,6 +305,7 @@ static struct max77650_regulator_desc max77651_SBB2_desc = {
.min_uV = 2400000,
.uV_step = 50000,
.n_voltages = 64,
+ .vsel_step = 1,
.vsel_mask = MAX77650_REGULATOR_V_SBB_MASK,
.vsel_reg = MAX77650_REG_CNFG_SBB2_A,
.active_discharge_off = MAX77650_REGULATOR_AD_DISABLED,
@@ -496,3 +397,4 @@ module_platform_driver(max77650_regulator_driver);
MODULE_DESCRIPTION("MAXIM 77650/77651 regulator driver");
MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:max77650-regulator");
diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index ea7b50397300..7b8ec8c0bd15 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c
@@ -14,9 +14,7 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/slab.h>
-#include <linux/gpio/consumer.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/regulator/driver.h>
diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c
index 2a123b87d9f2..ccd5da63cdf2 100644
--- a/drivers/regulator/max8952.c
+++ b/drivers/regulator/max8952.c
@@ -13,11 +13,9 @@
#include <linux/platform_device.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/max8952.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/io.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/regulator/of_regulator.h>
#include <linux/slab.h>
@@ -37,7 +35,8 @@ enum {
struct max8952_data {
struct i2c_client *client;
struct max8952_platform_data *pdata;
-
+ struct gpio_desc *vid0_gpiod;
+ struct gpio_desc *vid1_gpiod;
bool vid0;
bool vid1;
};
@@ -87,16 +86,15 @@ static int max8952_set_voltage_sel(struct regulator_dev *rdev,
{
struct max8952_data *max8952 = rdev_get_drvdata(rdev);
- if (!gpio_is_valid(max8952->pdata->gpio_vid0) ||
- !gpio_is_valid(max8952->pdata->gpio_vid1)) {
+ if (!max8952->vid0_gpiod || !max8952->vid1_gpiod) {
/* DVS not supported */
return -EPERM;
}
max8952->vid0 = selector & 0x1;
max8952->vid1 = (selector >> 1) & 0x1;
- gpio_set_value(max8952->pdata->gpio_vid0, max8952->vid0);
- gpio_set_value(max8952->pdata->gpio_vid1, max8952->vid1);
+ gpiod_set_value(max8952->vid0_gpiod, max8952->vid0);
+ gpiod_set_value(max8952->vid1_gpiod, max8952->vid1);
return 0;
}
@@ -134,9 +132,6 @@ static struct max8952_platform_data *max8952_parse_dt(struct device *dev)
if (!pd)
return NULL;
- pd->gpio_vid0 = of_get_named_gpio(np, "max8952,vid-gpios", 0);
- pd->gpio_vid1 = of_get_named_gpio(np, "max8952,vid-gpios", 1);
-
if (of_property_read_u32(np, "max8952,default-mode", &pd->default_mode))
dev_warn(dev, "Default mode not specified, assuming 0\n");
@@ -179,7 +174,7 @@ static struct max8952_platform_data *max8952_parse_dt(struct device *dev)
static int max8952_pmic_probe(struct i2c_client *client,
const struct i2c_device_id *i2c_id)
{
- struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct i2c_adapter *adapter = client->adapter;
struct max8952_platform_data *pdata = dev_get_platdata(&client->dev);
struct regulator_config config = { };
struct max8952_data *max8952;
@@ -187,7 +182,7 @@ static int max8952_pmic_probe(struct i2c_client *client,
struct gpio_desc *gpiod;
enum gpiod_flags gflags;
- int ret = 0, err = 0;
+ int ret = 0;
if (client->dev.of_node)
pdata = max8952_parse_dt(&client->dev);
@@ -240,32 +235,31 @@ static int max8952_pmic_probe(struct i2c_client *client,
max8952->vid0 = pdata->default_mode & 0x1;
max8952->vid1 = (pdata->default_mode >> 1) & 0x1;
- if (gpio_is_valid(pdata->gpio_vid0) &&
- gpio_is_valid(pdata->gpio_vid1)) {
- unsigned long gpio_flags;
-
- gpio_flags = max8952->vid0 ?
- GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
- if (devm_gpio_request_one(&client->dev, pdata->gpio_vid0,
- gpio_flags, "MAX8952 VID0"))
- err = 1;
-
- gpio_flags = max8952->vid1 ?
- GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
- if (devm_gpio_request_one(&client->dev, pdata->gpio_vid1,
- gpio_flags, "MAX8952 VID1"))
- err = 2;
- } else
- err = 3;
-
- if (err) {
+ /* Fetch vid0 and vid1 GPIOs if available */
+ gflags = max8952->vid0 ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+ max8952->vid0_gpiod = devm_gpiod_get_index_optional(&client->dev,
+ "max8952,vid",
+ 0, gflags);
+ if (IS_ERR(max8952->vid0_gpiod))
+ return PTR_ERR(max8952->vid0_gpiod);
+ gflags = max8952->vid1 ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+ max8952->vid1_gpiod = devm_gpiod_get_index_optional(&client->dev,
+ "max8952,vid",
+ 1, gflags);
+ if (IS_ERR(max8952->vid1_gpiod))
+ return PTR_ERR(max8952->vid1_gpiod);
+
+ /* If either VID GPIO is missing just disable this */
+ if (!max8952->vid0_gpiod || !max8952->vid1_gpiod) {
dev_warn(&client->dev, "VID0/1 gpio invalid: "
- "DVS not available.\n");
+ "DVS not available.\n");
max8952->vid0 = 0;
max8952->vid1 = 0;
- /* Mark invalid */
- pdata->gpio_vid0 = -1;
- pdata->gpio_vid1 = -1;
+ /* Make sure if we have any descriptors they get set to low */
+ if (max8952->vid0_gpiod)
+ gpiod_set_value(max8952->vid0_gpiod, 0);
+ if (max8952->vid1_gpiod)
+ gpiod_set_value(max8952->vid1_gpiod, 0);
/* Disable Pulldown of EN only */
max8952_write_reg(max8952, MAX8952_REG_CONTROL, 0x60);
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 0ead1164e4d6..397918ebba55 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -21,7 +21,8 @@ static const char *const regulator_states[PM_SUSPEND_MAX + 1] = {
[PM_SUSPEND_MAX] = "regulator-state-disk",
};
-static void of_get_regulation_constraints(struct device_node *np,
+static int of_get_regulation_constraints(struct device *dev,
+ struct device_node *np,
struct regulator_init_data **init_data,
const struct regulator_desc *desc)
{
@@ -30,8 +31,13 @@ static void of_get_regulation_constraints(struct device_node *np,
struct device_node *suspend_np;
unsigned int mode;
int ret, i, len;
+ int n_phandles;
u32 pval;
+ n_phandles = of_count_phandle_with_args(np, "regulator-coupled-with",
+ NULL);
+ n_phandles = max(n_phandles, 0);
+
constraints->name = of_get_property(np, "regulator-name", NULL);
if (!of_property_read_u32(np, "regulator-min-microvolt", &pval))
@@ -163,9 +169,17 @@ static void of_get_regulation_constraints(struct device_node *np,
if (!of_property_read_u32(np, "regulator-system-load", &pval))
constraints->system_load = pval;
- if (!of_property_read_u32(np, "regulator-coupled-max-spread",
- &pval))
- constraints->max_spread = pval;
+ if (n_phandles) {
+ constraints->max_spread = devm_kzalloc(dev,
+ sizeof(*constraints->max_spread) * n_phandles,
+ GFP_KERNEL);
+
+ if (!constraints->max_spread)
+ return -ENOMEM;
+
+ of_property_read_u32_array(np, "regulator-coupled-max-spread",
+ constraints->max_spread, n_phandles);
+ }
if (!of_property_read_u32(np, "regulator-max-step-microvolt",
&pval))
@@ -242,6 +256,8 @@ static void of_get_regulation_constraints(struct device_node *np,
suspend_state = NULL;
suspend_np = NULL;
}
+
+ return 0;
}
/**
@@ -267,7 +283,9 @@ struct regulator_init_data *of_get_regulator_init_data(struct device *dev,
if (!init_data)
return NULL; /* Out of memory? */
- of_get_regulation_constraints(node, &init_data, desc);
+ if (of_get_regulation_constraints(dev, node, &init_data, desc))
+ return NULL;
+
return init_data;
}
EXPORT_SYMBOL_GPL(of_get_regulator_init_data);
@@ -473,7 +491,8 @@ int of_get_n_coupled(struct regulator_dev *rdev)
/* Looks for "to_find" device_node in src's "regulator-coupled-with" property */
static bool of_coupling_find_node(struct device_node *src,
- struct device_node *to_find)
+ struct device_node *to_find,
+ int *index)
{
int n_phandles, i;
bool found = false;
@@ -495,8 +514,10 @@ static bool of_coupling_find_node(struct device_node *src,
of_node_put(tmp);
- if (found)
+ if (found) {
+ *index = i;
break;
+ }
}
return found;
@@ -517,22 +538,23 @@ static bool of_coupling_find_node(struct device_node *src,
*/
bool of_check_coupling_data(struct regulator_dev *rdev)
{
- int max_spread = rdev->constraints->max_spread;
struct device_node *node = rdev->dev.of_node;
int n_phandles = of_get_n_coupled(rdev);
struct device_node *c_node;
+ int index;
int i;
bool ret = true;
- if (max_spread <= 0) {
- dev_err(&rdev->dev, "max_spread value invalid\n");
- return false;
- }
-
/* iterate over rdev's phandles */
for (i = 0; i < n_phandles; i++) {
+ int max_spread = rdev->constraints->max_spread[i];
int c_max_spread, c_n_phandles;
+ if (max_spread <= 0) {
+ dev_err(&rdev->dev, "max_spread value invalid\n");
+ return false;
+ }
+
c_node = of_parse_phandle(node,
"regulator-coupled-with", i);
@@ -549,22 +571,23 @@ bool of_check_coupling_data(struct regulator_dev *rdev)
goto clean;
}
- if (of_property_read_u32(c_node, "regulator-coupled-max-spread",
- &c_max_spread)) {
+ if (!of_coupling_find_node(c_node, node, &index)) {
+ dev_err(&rdev->dev, "missing 2-way linking for coupled regulators\n");
ret = false;
goto clean;
}
- if (c_max_spread != max_spread) {
- dev_err(&rdev->dev,
- "coupled regulators max_spread mismatch\n");
+ if (of_property_read_u32_index(c_node, "regulator-coupled-max-spread",
+ index, &c_max_spread)) {
ret = false;
goto clean;
}
- if (!of_coupling_find_node(c_node, node)) {
- dev_err(&rdev->dev, "missing 2-way linking for coupled regulators\n");
+ if (c_max_spread != max_spread) {
+ dev_err(&rdev->dev,
+ "coupled regulators max_spread mismatch\n");
ret = false;
+ goto clean;
}
clean:
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index 6dfc9e176360..7f51c5fc8194 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -96,6 +96,8 @@ enum spmi_regulator_logical_type {
SPMI_REGULATOR_LOGICAL_TYPE_ULT_LO_SMPS,
SPMI_REGULATOR_LOGICAL_TYPE_ULT_HO_SMPS,
SPMI_REGULATOR_LOGICAL_TYPE_ULT_LDO,
+ SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS426,
+ SPMI_REGULATOR_LOGICAL_TYPE_HFS430,
};
enum spmi_regulator_type {
@@ -142,11 +144,13 @@ enum spmi_regulator_subtype {
SPMI_REGULATOR_SUBTYPE_5V_BOOST = 0x01,
SPMI_REGULATOR_SUBTYPE_FTS_CTL = 0x08,
SPMI_REGULATOR_SUBTYPE_FTS2p5_CTL = 0x09,
+ SPMI_REGULATOR_SUBTYPE_FTS426_CTL = 0x0a,
SPMI_REGULATOR_SUBTYPE_BB_2A = 0x01,
SPMI_REGULATOR_SUBTYPE_ULT_HF_CTL1 = 0x0d,
SPMI_REGULATOR_SUBTYPE_ULT_HF_CTL2 = 0x0e,
SPMI_REGULATOR_SUBTYPE_ULT_HF_CTL3 = 0x0f,
SPMI_REGULATOR_SUBTYPE_ULT_HF_CTL4 = 0x10,
+ SPMI_REGULATOR_SUBTYPE_HFS430 = 0x0a,
};
enum spmi_common_regulator_registers {
@@ -162,6 +166,18 @@ enum spmi_common_regulator_registers {
SPMI_COMMON_REG_STEP_CTRL = 0x61,
};
+/*
+ * Second common register layout used by newer devices starting with ftsmps426
+ * Note that some of the registers from the first common layout remain
+ * unchanged and their definition is not duplicated.
+ */
+enum spmi_ftsmps426_regulator_registers {
+ SPMI_FTSMPS426_REG_VOLTAGE_LSB = 0x40,
+ SPMI_FTSMPS426_REG_VOLTAGE_MSB = 0x41,
+ SPMI_FTSMPS426_REG_VOLTAGE_ULS_LSB = 0x68,
+ SPMI_FTSMPS426_REG_VOLTAGE_ULS_MSB = 0x69,
+};
+
enum spmi_vs_registers {
SPMI_VS_REG_OCP = 0x4a,
SPMI_VS_REG_SOFT_START = 0x4c,
@@ -221,6 +237,14 @@ enum spmi_common_control_register_index {
#define SPMI_COMMON_MODE_FOLLOW_HW_EN0_MASK 0x01
#define SPMI_COMMON_MODE_FOLLOW_ALL_MASK 0x1f
+#define SPMI_FTSMPS426_MODE_BYPASS_MASK 3
+#define SPMI_FTSMPS426_MODE_RETENTION_MASK 4
+#define SPMI_FTSMPS426_MODE_LPM_MASK 5
+#define SPMI_FTSMPS426_MODE_AUTO_MASK 6
+#define SPMI_FTSMPS426_MODE_HPM_MASK 7
+
+#define SPMI_FTSMPS426_MODE_MASK 0x07
+
/* Common regulator pull down control register layout */
#define SPMI_COMMON_PULL_DOWN_ENABLE_MASK 0x80
@@ -266,6 +290,25 @@ enum spmi_common_control_register_index {
#define SPMI_FTSMPS_STEP_MARGIN_NUM 4
#define SPMI_FTSMPS_STEP_MARGIN_DEN 5
+#define SPMI_FTSMPS426_STEP_CTRL_DELAY_MASK 0x03
+#define SPMI_FTSMPS426_STEP_CTRL_DELAY_SHIFT 0
+
+/* Clock rate in kHz of the FTSMPS426 regulator reference clock. */
+#define SPMI_FTSMPS426_CLOCK_RATE 4800
+
+#define SPMI_HFS430_CLOCK_RATE 1600
+
+/* Minimum voltage stepper delay for each step. */
+#define SPMI_FTSMPS426_STEP_DELAY 2
+
+/*
+ * The ratio SPMI_FTSMPS426_STEP_MARGIN_NUM/SPMI_FTSMPS426_STEP_MARGIN_DEN is
+ * used to adjust the step rate in order to account for oscillator variance.
+ */
+#define SPMI_FTSMPS426_STEP_MARGIN_NUM 10
+#define SPMI_FTSMPS426_STEP_MARGIN_DEN 11
+
+
/* VSET value to decide the range of ULT SMPS */
#define ULT_SMPS_RANGE_SPLIT 0x60
@@ -439,6 +482,10 @@ static struct spmi_voltage_range ftsmps2p5_ranges[] = {
SPMI_VOLTAGE_RANGE(1, 160000, 1360000, 2200000, 2200000, 10000),
};
+static struct spmi_voltage_range ftsmps426_ranges[] = {
+ SPMI_VOLTAGE_RANGE(0, 0, 320000, 1352000, 1352000, 4000),
+};
+
static struct spmi_voltage_range boost_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 4000000, 4000000, 5550000, 5550000, 50000),
};
@@ -464,6 +511,10 @@ static struct spmi_voltage_range ult_pldo_ranges[] = {
SPMI_VOLTAGE_RANGE(0, 1750000, 1750000, 3337500, 3337500, 12500),
};
+static struct spmi_voltage_range hfs430_ranges[] = {
+ SPMI_VOLTAGE_RANGE(0, 320000, 320000, 2040000, 2040000, 8000),
+};
+
static DEFINE_SPMI_SET_POINTS(pldo);
static DEFINE_SPMI_SET_POINTS(nldo1);
static DEFINE_SPMI_SET_POINTS(nldo2);
@@ -472,12 +523,14 @@ static DEFINE_SPMI_SET_POINTS(ln_ldo);
static DEFINE_SPMI_SET_POINTS(smps);
static DEFINE_SPMI_SET_POINTS(ftsmps);
static DEFINE_SPMI_SET_POINTS(ftsmps2p5);
+static DEFINE_SPMI_SET_POINTS(ftsmps426);
static DEFINE_SPMI_SET_POINTS(boost);
static DEFINE_SPMI_SET_POINTS(boost_byp);
static DEFINE_SPMI_SET_POINTS(ult_lo_smps);
static DEFINE_SPMI_SET_POINTS(ult_ho_smps);
static DEFINE_SPMI_SET_POINTS(ult_nldo);
static DEFINE_SPMI_SET_POINTS(ult_pldo);
+static DEFINE_SPMI_SET_POINTS(hfs430);
static inline int spmi_vreg_read(struct spmi_regulator *vreg, u16 addr, u8 *buf,
int len)
@@ -739,18 +792,31 @@ spmi_regulator_common_set_voltage(struct regulator_dev *rdev, unsigned selector)
return spmi_vreg_write(vreg, SPMI_COMMON_REG_VOLTAGE_RANGE, buf, 2);
}
+static int spmi_regulator_common_list_voltage(struct regulator_dev *rdev,
+ unsigned selector);
+
+static int spmi_regulator_ftsmps426_set_voltage(struct regulator_dev *rdev,
+ unsigned selector)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+ u8 buf[2];
+ int mV;
+
+ mV = spmi_regulator_common_list_voltage(rdev, selector) / 1000;
+
+ buf[0] = mV & 0xff;
+ buf[1] = mV >> 8;
+ return spmi_vreg_write(vreg, SPMI_FTSMPS426_REG_VOLTAGE_LSB, buf, 2);
+}
+
static int spmi_regulator_set_voltage_time_sel(struct regulator_dev *rdev,
unsigned int old_selector, unsigned int new_selector)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
- const struct spmi_voltage_range *range;
int diff_uV;
- range = spmi_regulator_find_range(vreg);
- if (!range)
- return -EINVAL;
-
- diff_uV = abs(new_selector - old_selector) * range->step_uV;
+ diff_uV = abs(spmi_regulator_common_list_voltage(rdev, new_selector) -
+ spmi_regulator_common_list_voltage(rdev, old_selector));
return DIV_ROUND_UP(diff_uV, vreg->slew_rate);
}
@@ -770,6 +836,21 @@ static int spmi_regulator_common_get_voltage(struct regulator_dev *rdev)
return spmi_hw_selector_to_sw(vreg, voltage_sel, range);
}
+static int spmi_regulator_ftsmps426_get_voltage(struct regulator_dev *rdev)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+ const struct spmi_voltage_range *range;
+ u8 buf[2];
+ int uV;
+
+ spmi_vreg_read(vreg, SPMI_FTSMPS426_REG_VOLTAGE_LSB, buf, 2);
+
+ uV = (((unsigned int)buf[1] << 8) | (unsigned int)buf[0]) * 1000;
+ range = vreg->set_points->range;
+
+ return (uV - range->set_point_min_uV) / range->step_uV;
+}
+
static int spmi_regulator_single_map_voltage(struct regulator_dev *rdev,
int min_uV, int max_uV)
{
@@ -903,13 +984,33 @@ static unsigned int spmi_regulator_common_get_mode(struct regulator_dev *rdev)
spmi_vreg_read(vreg, SPMI_COMMON_REG_MODE, &reg, 1);
- if (reg & SPMI_COMMON_MODE_HPM_MASK)
- return REGULATOR_MODE_NORMAL;
+ reg &= SPMI_COMMON_MODE_HPM_MASK | SPMI_COMMON_MODE_AUTO_MASK;
- if (reg & SPMI_COMMON_MODE_AUTO_MASK)
+ switch (reg) {
+ case SPMI_COMMON_MODE_HPM_MASK:
+ return REGULATOR_MODE_NORMAL;
+ case SPMI_COMMON_MODE_AUTO_MASK:
return REGULATOR_MODE_FAST;
+ default:
+ return REGULATOR_MODE_IDLE;
+ }
+}
- return REGULATOR_MODE_IDLE;
+static unsigned int spmi_regulator_ftsmps426_get_mode(struct regulator_dev *rdev)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+ u8 reg;
+
+ spmi_vreg_read(vreg, SPMI_COMMON_REG_MODE, &reg, 1);
+
+ switch (reg) {
+ case SPMI_FTSMPS426_MODE_HPM_MASK:
+ return REGULATOR_MODE_NORMAL;
+ case SPMI_FTSMPS426_MODE_AUTO_MASK:
+ return REGULATOR_MODE_FAST;
+ default:
+ return REGULATOR_MODE_IDLE;
+ }
}
static int
@@ -917,12 +1018,43 @@ spmi_regulator_common_set_mode(struct regulator_dev *rdev, unsigned int mode)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
u8 mask = SPMI_COMMON_MODE_HPM_MASK | SPMI_COMMON_MODE_AUTO_MASK;
- u8 val = 0;
+ u8 val;
- if (mode == REGULATOR_MODE_NORMAL)
+ switch (mode) {
+ case REGULATOR_MODE_NORMAL:
val = SPMI_COMMON_MODE_HPM_MASK;
- else if (mode == REGULATOR_MODE_FAST)
+ break;
+ case REGULATOR_MODE_FAST:
val = SPMI_COMMON_MODE_AUTO_MASK;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+
+ return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_MODE, val, mask);
+}
+
+static int
+spmi_regulator_ftsmps426_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+ u8 mask = SPMI_FTSMPS426_MODE_MASK;
+ u8 val;
+
+ switch (mode) {
+ case REGULATOR_MODE_NORMAL:
+ val = SPMI_FTSMPS426_MODE_HPM_MASK;
+ break;
+ case REGULATOR_MODE_FAST:
+ val = SPMI_FTSMPS426_MODE_AUTO_MASK;
+ break;
+ case REGULATOR_MODE_IDLE:
+ val = SPMI_FTSMPS426_MODE_LPM_MASK;
+ break;
+ default:
+ return -EINVAL;
+ }
return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_MODE, val, mask);
}
@@ -1256,12 +1388,41 @@ static struct regulator_ops spmi_ult_ldo_ops = {
.set_soft_start = spmi_regulator_common_set_soft_start,
};
+static struct regulator_ops spmi_ftsmps426_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = spmi_regulator_ftsmps426_set_voltage,
+ .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
+ .get_voltage_sel = spmi_regulator_ftsmps426_get_voltage,
+ .map_voltage = spmi_regulator_single_map_voltage,
+ .list_voltage = spmi_regulator_common_list_voltage,
+ .set_mode = spmi_regulator_ftsmps426_set_mode,
+ .get_mode = spmi_regulator_ftsmps426_get_mode,
+ .set_load = spmi_regulator_common_set_load,
+ .set_pull_down = spmi_regulator_common_set_pull_down,
+};
+
+static struct regulator_ops spmi_hfs430_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = spmi_regulator_ftsmps426_set_voltage,
+ .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
+ .get_voltage_sel = spmi_regulator_ftsmps426_get_voltage,
+ .map_voltage = spmi_regulator_single_map_voltage,
+ .list_voltage = spmi_regulator_common_list_voltage,
+ .set_mode = spmi_regulator_ftsmps426_set_mode,
+ .get_mode = spmi_regulator_ftsmps426_get_mode,
+};
+
/* Maximum possible digital major revision value */
#define INF 0xFF
static const struct spmi_regulator_mapping supported_regulators[] = {
/* type subtype dig_min dig_max ltype ops setpoints hpm_min */
SPMI_VREG(BUCK, GP_CTL, 0, INF, SMPS, smps, smps, 100000),
+ SPMI_VREG(BUCK, HFS430, 0, INF, HFS430, hfs430, hfs430, 10000),
SPMI_VREG(LDO, N300, 0, INF, LDO, ldo, nldo1, 10000),
SPMI_VREG(LDO, N600, 0, 0, LDO, ldo, nldo2, 10000),
SPMI_VREG(LDO, N1200, 0, 0, LDO, ldo, nldo2, 10000),
@@ -1291,6 +1452,7 @@ static const struct spmi_regulator_mapping supported_regulators[] = {
SPMI_VREG(BOOST, 5V_BOOST, 0, INF, BOOST, boost, boost, 0),
SPMI_VREG(FTS, FTS_CTL, 0, INF, FTSMPS, ftsmps, ftsmps, 100000),
SPMI_VREG(FTS, FTS2p5_CTL, 0, INF, FTSMPS, ftsmps, ftsmps2p5, 100000),
+ SPMI_VREG(FTS, FTS426_CTL, 0, INF, FTSMPS426, ftsmps426, ftsmps426, 100000),
SPMI_VREG(BOOST_BYP, BB_2A, 0, INF, BOOST_BYP, boost, boost_byp, 0),
SPMI_VREG(ULT_BUCK, ULT_HF_CTL1, 0, INF, ULT_LO_SMPS, ult_lo_smps,
ult_lo_smps, 100000),
@@ -1428,6 +1590,35 @@ static int spmi_regulator_init_slew_rate(struct spmi_regulator *vreg)
return ret;
}
+static int spmi_regulator_init_slew_rate_ftsmps426(struct spmi_regulator *vreg,
+ int clock_rate)
+{
+ int ret;
+ u8 reg = 0;
+ int delay, slew_rate;
+ const struct spmi_voltage_range *range = &vreg->set_points->range[0];
+
+ ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_STEP_CTRL, &reg, 1);
+ if (ret) {
+ dev_err(vreg->dev, "spmi read failed, ret=%d\n", ret);
+ return ret;
+ }
+
+ delay = reg & SPMI_FTSMPS426_STEP_CTRL_DELAY_MASK;
+ delay >>= SPMI_FTSMPS426_STEP_CTRL_DELAY_SHIFT;
+
+ /* slew_rate has units of uV/us */
+ slew_rate = clock_rate * range->step_uV;
+ slew_rate /= 1000 * (SPMI_FTSMPS426_STEP_DELAY << delay);
+ slew_rate *= SPMI_FTSMPS426_STEP_MARGIN_NUM;
+ slew_rate /= SPMI_FTSMPS426_STEP_MARGIN_DEN;
+
+ /* Ensure that the slew rate is greater than 0 */
+ vreg->slew_rate = max(slew_rate, 1);
+
+ return ret;
+}
+
static int spmi_regulator_init_registers(struct spmi_regulator *vreg,
const struct spmi_regulator_init_data *data)
{
@@ -1567,6 +1758,19 @@ static int spmi_regulator_of_parse(struct device_node *node,
ret = spmi_regulator_init_slew_rate(vreg);
if (ret)
return ret;
+ break;
+ case SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS426:
+ ret = spmi_regulator_init_slew_rate_ftsmps426(vreg,
+ SPMI_FTSMPS426_CLOCK_RATE);
+ if (ret)
+ return ret;
+ break;
+ case SPMI_REGULATOR_LOGICAL_TYPE_HFS430:
+ ret = spmi_regulator_init_slew_rate_ftsmps426(vreg,
+ SPMI_HFS430_CLOCK_RATE);
+ if (ret)
+ return ret;
+ break;
default:
break;
}
@@ -1723,12 +1927,27 @@ static const struct spmi_regulator_data pmi8994_regulators[] = {
{ }
};
+static const struct spmi_regulator_data pm8005_regulators[] = {
+ { "s1", 0x1400, "vdd_s1", },
+ { "s2", 0x1700, "vdd_s2", },
+ { "s3", 0x1a00, "vdd_s3", },
+ { "s4", 0x1d00, "vdd_s4", },
+ { }
+};
+
+static const struct spmi_regulator_data pms405_regulators[] = {
+ { "s3", 0x1a00, "vdd_s3"},
+ { }
+};
+
static const struct of_device_id qcom_spmi_regulator_match[] = {
+ { .compatible = "qcom,pm8005-regulators", .data = &pm8005_regulators },
{ .compatible = "qcom,pm8841-regulators", .data = &pm8841_regulators },
{ .compatible = "qcom,pm8916-regulators", .data = &pm8916_regulators },
{ .compatible = "qcom,pm8941-regulators", .data = &pm8941_regulators },
{ .compatible = "qcom,pm8994-regulators", .data = &pm8994_regulators },
{ .compatible = "qcom,pmi8994-regulators", .data = &pmi8994_regulators },
+ { .compatible = "qcom,pms405-regulators", .data = &pms405_regulators },
{ }
};
MODULE_DEVICE_TABLE(of, qcom_spmi_regulator_match);
@@ -1736,6 +1955,7 @@ MODULE_DEVICE_TABLE(of, qcom_spmi_regulator_match);
static int qcom_spmi_regulator_probe(struct platform_device *pdev)
{
const struct spmi_regulator_data *reg;
+ const struct spmi_voltage_range *range;
const struct of_device_id *match;
struct regulator_config config = { };
struct regulator_dev *rdev;
@@ -1825,6 +2045,12 @@ static int qcom_spmi_regulator_probe(struct platform_device *pdev)
}
}
+ if (vreg->set_points && vreg->set_points->count == 1) {
+ /* since there is only one range */
+ range = vreg->set_points->range;
+ vreg->desc.uV_step = range->step_uV;
+ }
+
config.dev = dev;
config.driver_data = vreg;
config.regmap = regmap;
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 134c62db36c5..054baaadfdfd 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -34,7 +34,7 @@ struct s2mps11_info {
enum sec_device_type dev_type;
/*
- * One bit for each S2MPS13/S2MPS14/S2MPU02 regulator whether
+ * One bit for each S2MPS11/S2MPS13/S2MPS14/S2MPU02 regulator whether
* the suspend mode was enabled.
*/
DECLARE_BITMAP(suspend_state, S2MPS_REGULATOR_MAX);
@@ -70,10 +70,11 @@ static int s2mps11_regulator_set_voltage_time_sel(struct regulator_dev *rdev,
unsigned int new_selector)
{
struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
+ int rdev_id = rdev_get_id(rdev);
unsigned int ramp_delay = 0;
int old_volt, new_volt;
- switch (rdev_get_id(rdev)) {
+ switch (rdev_id) {
case S2MPS11_BUCK2:
ramp_delay = s2mps11->ramp_delay2;
break;
@@ -111,9 +112,10 @@ static int s2mps11_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
unsigned int ramp_val, ramp_shift, ramp_reg = S2MPS11_REG_RAMP_BUCK;
unsigned int ramp_enable = 1, enable_shift = 0;
+ int rdev_id = rdev_get_id(rdev);
int ret;
- switch (rdev_get_id(rdev)) {
+ switch (rdev_id) {
case S2MPS11_BUCK1:
if (ramp_delay > s2mps11->ramp_delay16)
s2mps11->ramp_delay16 = ramp_delay;
@@ -203,9 +205,8 @@ static int s2mps11_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
goto ramp_disable;
/* Ramp delay can be enabled/disabled only for buck[2346] */
- if ((rdev_get_id(rdev) >= S2MPS11_BUCK2 &&
- rdev_get_id(rdev) <= S2MPS11_BUCK4) ||
- rdev_get_id(rdev) == S2MPS11_BUCK6) {
+ if ((rdev_id >= S2MPS11_BUCK2 && rdev_id <= S2MPS11_BUCK4) ||
+ rdev_id == S2MPS11_BUCK6) {
ret = regmap_update_bits(rdev->regmap, S2MPS11_REG_RAMP,
1 << enable_shift, 1 << enable_shift);
if (ret) {
@@ -224,27 +225,133 @@ ramp_disable:
1 << enable_shift, 0);
}
+static int s2mps11_regulator_enable(struct regulator_dev *rdev)
+{
+ struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
+ int rdev_id = rdev_get_id(rdev);
+ unsigned int val;
+
+ switch (s2mps11->dev_type) {
+ case S2MPS11X:
+ if (test_bit(rdev_id, s2mps11->suspend_state))
+ val = S2MPS14_ENABLE_SUSPEND;
+ else
+ val = rdev->desc->enable_mask;
+ break;
+ case S2MPS13X:
+ case S2MPS14X:
+ if (test_bit(rdev_id, s2mps11->suspend_state))
+ val = S2MPS14_ENABLE_SUSPEND;
+ else if (s2mps11->ext_control_gpiod[rdev_id])
+ val = S2MPS14_ENABLE_EXT_CONTROL;
+ else
+ val = rdev->desc->enable_mask;
+ break;
+ case S2MPU02:
+ if (test_bit(rdev_id, s2mps11->suspend_state))
+ val = S2MPU02_ENABLE_SUSPEND;
+ else
+ val = rdev->desc->enable_mask;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+ rdev->desc->enable_mask, val);
+}
+
+static int s2mps11_regulator_set_suspend_disable(struct regulator_dev *rdev)
+{
+ int ret;
+ unsigned int val, state;
+ struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
+ int rdev_id = rdev_get_id(rdev);
+
+ /* Below LDO should be always on or does not support suspend mode. */
+ switch (s2mps11->dev_type) {
+ case S2MPS11X:
+ switch (rdev_id) {
+ case S2MPS11_LDO2:
+ case S2MPS11_LDO36:
+ case S2MPS11_LDO37:
+ case S2MPS11_LDO38:
+ return 0;
+ default:
+ state = S2MPS14_ENABLE_SUSPEND;
+ break;
+ }
+ break;
+ case S2MPS13X:
+ case S2MPS14X:
+ switch (rdev_id) {
+ case S2MPS14_LDO3:
+ return 0;
+ default:
+ state = S2MPS14_ENABLE_SUSPEND;
+ break;
+ }
+ break;
+ case S2MPU02:
+ switch (rdev_id) {
+ case S2MPU02_LDO13:
+ case S2MPU02_LDO14:
+ case S2MPU02_LDO15:
+ case S2MPU02_LDO17:
+ case S2MPU02_BUCK7:
+ state = S2MPU02_DISABLE_SUSPEND;
+ break;
+ default:
+ state = S2MPU02_ENABLE_SUSPEND;
+ break;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = regmap_read(rdev->regmap, rdev->desc->enable_reg, &val);
+ if (ret < 0)
+ return ret;
+
+ set_bit(rdev_id, s2mps11->suspend_state);
+ /*
+ * Don't enable suspend mode if regulator is already disabled because
+ * this would effectively for a short time turn on the regulator after
+ * resuming.
+ * However we still want to toggle the suspend_state bit for regulator
+ * in case if it got enabled before suspending the system.
+ */
+ if (!(val & rdev->desc->enable_mask))
+ return 0;
+
+ return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+ rdev->desc->enable_mask, state);
+}
+
static const struct regulator_ops s2mps11_ldo_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.is_enabled = regulator_is_enabled_regmap,
- .enable = regulator_enable_regmap,
+ .enable = s2mps11_regulator_enable,
.disable = regulator_disable_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
+ .set_suspend_disable = s2mps11_regulator_set_suspend_disable,
};
static const struct regulator_ops s2mps11_buck_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.is_enabled = regulator_is_enabled_regmap,
- .enable = regulator_enable_regmap,
+ .enable = s2mps11_regulator_enable,
.disable = regulator_disable_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_voltage_time_sel = s2mps11_regulator_set_voltage_time_sel,
.set_ramp_delay = s2mps11_set_ramp_delay,
+ .set_suspend_disable = s2mps11_regulator_set_suspend_disable,
};
#define regulator_desc_s2mps11_ldo(num, step) { \
@@ -269,9 +376,10 @@ static const struct regulator_ops s2mps11_buck_ops = {
.ops = &s2mps11_buck_ops, \
.type = REGULATOR_VOLTAGE, \
.owner = THIS_MODULE, \
- .min_uV = MIN_600_MV, \
+ .min_uV = MIN_650_MV, \
.uV_step = STEP_6_25_MV, \
- .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \
+ .linear_min_sel = 8, \
+ .n_voltages = S2MPS11_BUCK12346_N_VOLTAGES, \
.ramp_delay = S2MPS11_RAMP_DELAY, \
.vsel_reg = S2MPS11_REG_B1CTRL2 + (num - 1) * 2, \
.vsel_mask = S2MPS11_BUCK_VSEL_MASK, \
@@ -285,9 +393,10 @@ static const struct regulator_ops s2mps11_buck_ops = {
.ops = &s2mps11_buck_ops, \
.type = REGULATOR_VOLTAGE, \
.owner = THIS_MODULE, \
- .min_uV = MIN_600_MV, \
+ .min_uV = MIN_650_MV, \
.uV_step = STEP_6_25_MV, \
- .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \
+ .linear_min_sel = 8, \
+ .n_voltages = S2MPS11_BUCK5_N_VOLTAGES, \
.ramp_delay = S2MPS11_RAMP_DELAY, \
.vsel_reg = S2MPS11_REG_B5CTRL2, \
.vsel_mask = S2MPS11_BUCK_VSEL_MASK, \
@@ -295,7 +404,7 @@ static const struct regulator_ops s2mps11_buck_ops = {
.enable_mask = S2MPS11_ENABLE_MASK \
}
-#define regulator_desc_s2mps11_buck67810(num, min, step) { \
+#define regulator_desc_s2mps11_buck67810(num, min, step, min_sel, voltages) { \
.name = "BUCK"#num, \
.id = S2MPS11_BUCK##num, \
.ops = &s2mps11_buck_ops, \
@@ -303,7 +412,8 @@ static const struct regulator_ops s2mps11_buck_ops = {
.owner = THIS_MODULE, \
.min_uV = min, \
.uV_step = step, \
- .n_voltages = S2MPS11_BUCK_N_VOLTAGES, \
+ .linear_min_sel = min_sel, \
+ .n_voltages = voltages, \
.ramp_delay = S2MPS11_RAMP_DELAY, \
.vsel_reg = S2MPS11_REG_B6CTRL2 + (num - 6) * 2, \
.vsel_mask = S2MPS11_BUCK_VSEL_MASK, \
@@ -371,11 +481,15 @@ static const struct regulator_desc s2mps11_regulators[] = {
regulator_desc_s2mps11_buck1_4(3),
regulator_desc_s2mps11_buck1_4(4),
regulator_desc_s2mps11_buck5,
- regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV),
- regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_12_5_MV),
- regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_12_5_MV),
+ regulator_desc_s2mps11_buck67810(6, MIN_650_MV, STEP_6_25_MV, 8,
+ S2MPS11_BUCK12346_N_VOLTAGES),
+ regulator_desc_s2mps11_buck67810(7, MIN_750_MV, STEP_12_5_MV, 0,
+ S2MPS11_BUCK7810_N_VOLTAGES),
+ regulator_desc_s2mps11_buck67810(8, MIN_750_MV, STEP_12_5_MV, 0,
+ S2MPS11_BUCK7810_N_VOLTAGES),
regulator_desc_s2mps11_buck9,
- regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV),
+ regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV, 0,
+ S2MPS11_BUCK7810_N_VOLTAGES),
};
static const struct regulator_ops s2mps14_reg_ops;
@@ -500,101 +614,16 @@ static const struct regulator_desc s2mps13_regulators[] = {
regulator_desc_s2mps13_buck8_10(10, MIN_500_MV, STEP_6_25_MV, 0x10),
};
-static int s2mps14_regulator_enable(struct regulator_dev *rdev)
-{
- struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
- unsigned int val;
-
- switch (s2mps11->dev_type) {
- case S2MPS13X:
- case S2MPS14X:
- if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state))
- val = S2MPS14_ENABLE_SUSPEND;
- else if (s2mps11->ext_control_gpiod[rdev_get_id(rdev)])
- val = S2MPS14_ENABLE_EXT_CONTROL;
- else
- val = rdev->desc->enable_mask;
- break;
- case S2MPU02:
- if (test_bit(rdev_get_id(rdev), s2mps11->suspend_state))
- val = S2MPU02_ENABLE_SUSPEND;
- else
- val = rdev->desc->enable_mask;
- break;
- default:
- return -EINVAL;
- }
-
- return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
- rdev->desc->enable_mask, val);
-}
-
-static int s2mps14_regulator_set_suspend_disable(struct regulator_dev *rdev)
-{
- int ret;
- unsigned int val, state;
- struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
- int rdev_id = rdev_get_id(rdev);
-
- /* Below LDO should be always on or does not support suspend mode. */
- switch (s2mps11->dev_type) {
- case S2MPS13X:
- case S2MPS14X:
- switch (rdev_id) {
- case S2MPS14_LDO3:
- return 0;
- default:
- state = S2MPS14_ENABLE_SUSPEND;
- break;
- }
- break;
- case S2MPU02:
- switch (rdev_id) {
- case S2MPU02_LDO13:
- case S2MPU02_LDO14:
- case S2MPU02_LDO15:
- case S2MPU02_LDO17:
- case S2MPU02_BUCK7:
- state = S2MPU02_DISABLE_SUSPEND;
- break;
- default:
- state = S2MPU02_ENABLE_SUSPEND;
- break;
- }
- break;
- default:
- return -EINVAL;
- }
-
- ret = regmap_read(rdev->regmap, rdev->desc->enable_reg, &val);
- if (ret < 0)
- return ret;
-
- set_bit(rdev_get_id(rdev), s2mps11->suspend_state);
- /*
- * Don't enable suspend mode if regulator is already disabled because
- * this would effectively for a short time turn on the regulator after
- * resuming.
- * However we still want to toggle the suspend_state bit for regulator
- * in case if it got enabled before suspending the system.
- */
- if (!(val & rdev->desc->enable_mask))
- return 0;
-
- return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
- rdev->desc->enable_mask, state);
-}
-
static const struct regulator_ops s2mps14_reg_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.is_enabled = regulator_is_enabled_regmap,
- .enable = s2mps14_regulator_enable,
+ .enable = s2mps11_regulator_enable,
.disable = regulator_disable_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
- .set_suspend_disable = s2mps14_regulator_set_suspend_disable,
+ .set_suspend_disable = s2mps11_regulator_set_suspend_disable,
};
#define regulator_desc_s2mps14_ldo(num, min, step) { \
@@ -821,9 +850,12 @@ static void s2mps14_pmic_dt_parse_ext_control_gpio(struct platform_device *pdev,
0,
GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE,
"s2mps11-regulator");
- if (IS_ERR(gpio[reg])) {
+ if (PTR_ERR(gpio[reg]) == -ENOENT)
+ gpio[reg] = NULL;
+ else if (IS_ERR(gpio[reg])) {
dev_err(&pdev->dev, "Failed to get control GPIO for %d/%s\n",
reg, rdata[reg].name);
+ gpio[reg] = NULL;
continue;
}
if (gpio[reg])
@@ -856,8 +888,9 @@ static int s2mps11_pmic_dt_parse(struct platform_device *pdev,
static int s2mpu02_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
{
unsigned int ramp_val, ramp_shift, ramp_reg;
+ int rdev_id = rdev_get_id(rdev);
- switch (rdev_get_id(rdev)) {
+ switch (rdev_id) {
case S2MPU02_BUCK1:
ramp_shift = S2MPU02_BUCK1_RAMP_SHIFT;
break;
@@ -885,24 +918,24 @@ static const struct regulator_ops s2mpu02_ldo_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.is_enabled = regulator_is_enabled_regmap,
- .enable = s2mps14_regulator_enable,
+ .enable = s2mps11_regulator_enable,
.disable = regulator_disable_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
- .set_suspend_disable = s2mps14_regulator_set_suspend_disable,
+ .set_suspend_disable = s2mps11_regulator_set_suspend_disable,
};
static const struct regulator_ops s2mpu02_buck_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.is_enabled = regulator_is_enabled_regmap,
- .enable = s2mps14_regulator_enable,
+ .enable = s2mps11_regulator_enable,
.disable = regulator_disable_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
- .set_suspend_disable = s2mps14_regulator_set_suspend_disable,
+ .set_suspend_disable = s2mps11_regulator_set_suspend_disable,
.set_ramp_delay = s2mpu02_set_ramp_delay,
};
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index bb9d1a083299..6ca27e9d5ef7 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -574,7 +574,9 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
0,
GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE,
"s5m8767");
- if (IS_ERR(rdata->ext_control_gpiod))
+ if (PTR_ERR(rdata->ext_control_gpiod) == -ENOENT)
+ rdata->ext_control_gpiod = NULL;
+ else if (IS_ERR(rdata->ext_control_gpiod))
return PTR_ERR(rdata->ext_control_gpiod);
rdata->id = i;
diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c
new file mode 100644
index 000000000000..04b732991d69
--- /dev/null
+++ b/drivers/regulator/slg51000-regulator.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// SLG51000 High PSRR, Multi-Output Regulators
+// Copyright (C) 2019 Dialog Semiconductor
+//
+// Author: Eric Jeong <eric.jeong.opensource@diasemi.com>
+
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+#include "slg51000-regulator.h"
+
+#define SLG51000_SCTL_EVT 7
+#define SLG51000_MAX_EVT_REGISTER 8
+#define SLG51000_LDOHP_LV_MIN 1200000
+#define SLG51000_LDOHP_HV_MIN 2400000
+
+enum slg51000_regulators {
+ SLG51000_REGULATOR_LDO1 = 0,
+ SLG51000_REGULATOR_LDO2,
+ SLG51000_REGULATOR_LDO3,
+ SLG51000_REGULATOR_LDO4,
+ SLG51000_REGULATOR_LDO5,
+ SLG51000_REGULATOR_LDO6,
+ SLG51000_REGULATOR_LDO7,
+ SLG51000_MAX_REGULATORS,
+};
+
+struct slg51000 {
+ struct device *dev;
+ struct regmap *regmap;
+ struct regulator_desc *rdesc[SLG51000_MAX_REGULATORS];
+ struct regulator_dev *rdev[SLG51000_MAX_REGULATORS];
+ struct gpio_desc *cs_gpiod;
+ int chip_irq;
+};
+
+struct slg51000_evt_sta {
+ unsigned int ereg;
+ unsigned int sreg;
+};
+
+static const struct slg51000_evt_sta es_reg[SLG51000_MAX_EVT_REGISTER] = {
+ {SLG51000_LDO1_EVENT, SLG51000_LDO1_STATUS},
+ {SLG51000_LDO2_EVENT, SLG51000_LDO2_STATUS},
+ {SLG51000_LDO3_EVENT, SLG51000_LDO3_STATUS},
+ {SLG51000_LDO4_EVENT, SLG51000_LDO4_STATUS},
+ {SLG51000_LDO5_EVENT, SLG51000_LDO5_STATUS},
+ {SLG51000_LDO6_EVENT, SLG51000_LDO6_STATUS},
+ {SLG51000_LDO7_EVENT, SLG51000_LDO7_STATUS},
+ {SLG51000_SYSCTL_EVENT, SLG51000_SYSCTL_STATUS},
+};
+
+static const struct regmap_range slg51000_writeable_ranges[] = {
+ regmap_reg_range(SLG51000_SYSCTL_MATRIX_CONF_A,
+ SLG51000_SYSCTL_MATRIX_CONF_A),
+ regmap_reg_range(SLG51000_LDO1_VSEL, SLG51000_LDO1_VSEL),
+ regmap_reg_range(SLG51000_LDO1_MINV, SLG51000_LDO1_MAXV),
+ regmap_reg_range(SLG51000_LDO1_IRQ_MASK, SLG51000_LDO1_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO2_VSEL, SLG51000_LDO2_VSEL),
+ regmap_reg_range(SLG51000_LDO2_MINV, SLG51000_LDO2_MAXV),
+ regmap_reg_range(SLG51000_LDO2_IRQ_MASK, SLG51000_LDO2_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO3_VSEL, SLG51000_LDO3_VSEL),
+ regmap_reg_range(SLG51000_LDO3_MINV, SLG51000_LDO3_MAXV),
+ regmap_reg_range(SLG51000_LDO3_IRQ_MASK, SLG51000_LDO3_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO4_VSEL, SLG51000_LDO4_VSEL),
+ regmap_reg_range(SLG51000_LDO4_MINV, SLG51000_LDO4_MAXV),
+ regmap_reg_range(SLG51000_LDO4_IRQ_MASK, SLG51000_LDO4_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO5_VSEL, SLG51000_LDO5_VSEL),
+ regmap_reg_range(SLG51000_LDO5_MINV, SLG51000_LDO5_MAXV),
+ regmap_reg_range(SLG51000_LDO5_IRQ_MASK, SLG51000_LDO5_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO6_VSEL, SLG51000_LDO6_VSEL),
+ regmap_reg_range(SLG51000_LDO6_MINV, SLG51000_LDO6_MAXV),
+ regmap_reg_range(SLG51000_LDO6_IRQ_MASK, SLG51000_LDO6_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO7_VSEL, SLG51000_LDO7_VSEL),
+ regmap_reg_range(SLG51000_LDO7_MINV, SLG51000_LDO7_MAXV),
+ regmap_reg_range(SLG51000_LDO7_IRQ_MASK, SLG51000_LDO7_IRQ_MASK),
+ regmap_reg_range(SLG51000_OTP_IRQ_MASK, SLG51000_OTP_IRQ_MASK),
+};
+
+static const struct regmap_range slg51000_readable_ranges[] = {
+ regmap_reg_range(SLG51000_SYSCTL_PATN_ID_B0,
+ SLG51000_SYSCTL_PATN_ID_B2),
+ regmap_reg_range(SLG51000_SYSCTL_SYS_CONF_A,
+ SLG51000_SYSCTL_SYS_CONF_A),
+ regmap_reg_range(SLG51000_SYSCTL_SYS_CONF_D,
+ SLG51000_SYSCTL_MATRIX_CONF_B),
+ regmap_reg_range(SLG51000_SYSCTL_REFGEN_CONF_C,
+ SLG51000_SYSCTL_UVLO_CONF_A),
+ regmap_reg_range(SLG51000_SYSCTL_FAULT_LOG1, SLG51000_SYSCTL_IRQ_MASK),
+ regmap_reg_range(SLG51000_IO_GPIO1_CONF, SLG51000_IO_GPIO_STATUS),
+ regmap_reg_range(SLG51000_LUTARRAY_LUT_VAL_0,
+ SLG51000_LUTARRAY_LUT_VAL_11),
+ regmap_reg_range(SLG51000_MUXARRAY_INPUT_SEL_0,
+ SLG51000_MUXARRAY_INPUT_SEL_63),
+ regmap_reg_range(SLG51000_PWRSEQ_RESOURCE_EN_0,
+ SLG51000_PWRSEQ_INPUT_SENSE_CONF_B),
+ regmap_reg_range(SLG51000_LDO1_VSEL, SLG51000_LDO1_VSEL),
+ regmap_reg_range(SLG51000_LDO1_MINV, SLG51000_LDO1_MAXV),
+ regmap_reg_range(SLG51000_LDO1_MISC1, SLG51000_LDO1_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO1_EVENT, SLG51000_LDO1_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO2_VSEL, SLG51000_LDO2_VSEL),
+ regmap_reg_range(SLG51000_LDO2_MINV, SLG51000_LDO2_MAXV),
+ regmap_reg_range(SLG51000_LDO2_MISC1, SLG51000_LDO2_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO2_EVENT, SLG51000_LDO2_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO3_VSEL, SLG51000_LDO3_VSEL),
+ regmap_reg_range(SLG51000_LDO3_MINV, SLG51000_LDO3_MAXV),
+ regmap_reg_range(SLG51000_LDO3_CONF1, SLG51000_LDO3_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO3_EVENT, SLG51000_LDO3_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO4_VSEL, SLG51000_LDO4_VSEL),
+ regmap_reg_range(SLG51000_LDO4_MINV, SLG51000_LDO4_MAXV),
+ regmap_reg_range(SLG51000_LDO4_CONF1, SLG51000_LDO4_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO4_EVENT, SLG51000_LDO4_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO5_VSEL, SLG51000_LDO5_VSEL),
+ regmap_reg_range(SLG51000_LDO5_MINV, SLG51000_LDO5_MAXV),
+ regmap_reg_range(SLG51000_LDO5_TRIM2, SLG51000_LDO5_TRIM2),
+ regmap_reg_range(SLG51000_LDO5_CONF1, SLG51000_LDO5_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO5_EVENT, SLG51000_LDO5_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO6_VSEL, SLG51000_LDO6_VSEL),
+ regmap_reg_range(SLG51000_LDO6_MINV, SLG51000_LDO6_MAXV),
+ regmap_reg_range(SLG51000_LDO6_TRIM2, SLG51000_LDO6_TRIM2),
+ regmap_reg_range(SLG51000_LDO6_CONF1, SLG51000_LDO6_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO6_EVENT, SLG51000_LDO6_IRQ_MASK),
+ regmap_reg_range(SLG51000_LDO7_VSEL, SLG51000_LDO7_VSEL),
+ regmap_reg_range(SLG51000_LDO7_MINV, SLG51000_LDO7_MAXV),
+ regmap_reg_range(SLG51000_LDO7_CONF1, SLG51000_LDO7_VSEL_ACTUAL),
+ regmap_reg_range(SLG51000_LDO7_EVENT, SLG51000_LDO7_IRQ_MASK),
+ regmap_reg_range(SLG51000_OTP_EVENT, SLG51000_OTP_EVENT),
+ regmap_reg_range(SLG51000_OTP_IRQ_MASK, SLG51000_OTP_IRQ_MASK),
+ regmap_reg_range(SLG51000_OTP_LOCK_OTP_PROG, SLG51000_OTP_LOCK_CTRL),
+ regmap_reg_range(SLG51000_LOCK_GLOBAL_LOCK_CTRL1,
+ SLG51000_LOCK_GLOBAL_LOCK_CTRL1),
+};
+
+static const struct regmap_range slg51000_volatile_ranges[] = {
+ regmap_reg_range(SLG51000_SYSCTL_FAULT_LOG1, SLG51000_SYSCTL_STATUS),
+ regmap_reg_range(SLG51000_IO_GPIO_STATUS, SLG51000_IO_GPIO_STATUS),
+ regmap_reg_range(SLG51000_LDO1_EVENT, SLG51000_LDO1_STATUS),
+ regmap_reg_range(SLG51000_LDO2_EVENT, SLG51000_LDO2_STATUS),
+ regmap_reg_range(SLG51000_LDO3_EVENT, SLG51000_LDO3_STATUS),
+ regmap_reg_range(SLG51000_LDO4_EVENT, SLG51000_LDO4_STATUS),
+ regmap_reg_range(SLG51000_LDO5_EVENT, SLG51000_LDO5_STATUS),
+ regmap_reg_range(SLG51000_LDO6_EVENT, SLG51000_LDO6_STATUS),
+ regmap_reg_range(SLG51000_LDO7_EVENT, SLG51000_LDO7_STATUS),
+ regmap_reg_range(SLG51000_OTP_EVENT, SLG51000_OTP_EVENT),
+};
+
+static const struct regmap_access_table slg51000_writeable_table = {
+ .yes_ranges = slg51000_writeable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(slg51000_writeable_ranges),
+};
+
+static const struct regmap_access_table slg51000_readable_table = {
+ .yes_ranges = slg51000_readable_ranges,
+ .n_yes_ranges = ARRAY_SIZE(slg51000_readable_ranges),
+};
+
+static const struct regmap_access_table slg51000_volatile_table = {
+ .yes_ranges = slg51000_volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(slg51000_volatile_ranges),
+};
+
+static const struct regmap_config slg51000_regmap_config = {
+ .reg_bits = 16,
+ .val_bits = 8,
+ .max_register = 0x8000,
+ .wr_table = &slg51000_writeable_table,
+ .rd_table = &slg51000_readable_table,
+ .volatile_table = &slg51000_volatile_table,
+};
+
+static const struct regulator_ops slg51000_regl_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .list_voltage = regulator_list_voltage_linear,
+ .map_voltage = regulator_map_voltage_linear,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+};
+
+static const struct regulator_ops slg51000_switch_ops = {
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+};
+
+static int slg51000_of_parse_cb(struct device_node *np,
+ const struct regulator_desc *desc,
+ struct regulator_config *config)
+{
+ struct slg51000 *chip = config->driver_data;
+ struct gpio_desc *ena_gpiod;
+ enum gpiod_flags gflags = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE;
+
+ ena_gpiod = devm_gpiod_get_from_of_node(chip->dev, np,
+ "enable-gpios", 0,
+ gflags, "gpio-en-ldo");
+ if (ena_gpiod) {
+ config->ena_gpiod = ena_gpiod;
+ devm_gpiod_unhinge(chip->dev, config->ena_gpiod);
+ }
+
+ return 0;
+}
+
+#define SLG51000_REGL_DESC(_id, _name, _s_name, _min, _step) \
+ [SLG51000_REGULATOR_##_id] = { \
+ .name = #_name, \
+ .supply_name = _s_name, \
+ .id = SLG51000_REGULATOR_##_id, \
+ .of_match = of_match_ptr(#_name), \
+ .of_parse_cb = slg51000_of_parse_cb, \
+ .ops = &slg51000_regl_ops, \
+ .regulators_node = of_match_ptr("regulators"), \
+ .n_voltages = 256, \
+ .min_uV = _min, \
+ .uV_step = _step, \
+ .linear_min_sel = 0, \
+ .vsel_mask = SLG51000_VSEL_MASK, \
+ .vsel_reg = SLG51000_##_id##_VSEL, \
+ .enable_reg = SLG51000_SYSCTL_MATRIX_CONF_A, \
+ .enable_mask = BIT(SLG51000_REGULATOR_##_id), \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ }
+
+static struct regulator_desc regls_desc[SLG51000_MAX_REGULATORS] = {
+ SLG51000_REGL_DESC(LDO1, ldo1, NULL, 2400000, 5000),
+ SLG51000_REGL_DESC(LDO2, ldo2, NULL, 2400000, 5000),
+ SLG51000_REGL_DESC(LDO3, ldo3, "vin3", 1200000, 10000),
+ SLG51000_REGL_DESC(LDO4, ldo4, "vin4", 1200000, 10000),
+ SLG51000_REGL_DESC(LDO5, ldo5, "vin5", 400000, 5000),
+ SLG51000_REGL_DESC(LDO6, ldo6, "vin6", 400000, 5000),
+ SLG51000_REGL_DESC(LDO7, ldo7, "vin7", 1200000, 10000),
+};
+
+static int slg51000_regulator_init(struct slg51000 *chip)
+{
+ struct regulator_config config = { };
+ struct regulator_desc *rdesc;
+ unsigned int reg, val;
+ u8 vsel_range[2];
+ int id, ret = 0;
+ const unsigned int min_regs[SLG51000_MAX_REGULATORS] = {
+ SLG51000_LDO1_MINV, SLG51000_LDO2_MINV, SLG51000_LDO3_MINV,
+ SLG51000_LDO4_MINV, SLG51000_LDO5_MINV, SLG51000_LDO6_MINV,
+ SLG51000_LDO7_MINV,
+ };
+
+ for (id = 0; id < SLG51000_MAX_REGULATORS; id++) {
+ chip->rdesc[id] = &regls_desc[id];
+ rdesc = chip->rdesc[id];
+ config.regmap = chip->regmap;
+ config.dev = chip->dev;
+ config.driver_data = chip;
+
+ ret = regmap_bulk_read(chip->regmap, min_regs[id],
+ vsel_range, 2);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read the MIN register\n");
+ return ret;
+ }
+
+ switch (id) {
+ case SLG51000_REGULATOR_LDO1:
+ case SLG51000_REGULATOR_LDO2:
+ if (id == SLG51000_REGULATOR_LDO1)
+ reg = SLG51000_LDO1_MISC1;
+ else
+ reg = SLG51000_LDO2_MISC1;
+
+ ret = regmap_read(chip->regmap, reg, &val);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read voltage range of ldo%d\n",
+ id + 1);
+ return ret;
+ }
+
+ rdesc->linear_min_sel = vsel_range[0];
+ rdesc->n_voltages = vsel_range[1] + 1;
+ if (val & SLG51000_SEL_VRANGE_MASK)
+ rdesc->min_uV = SLG51000_LDOHP_HV_MIN
+ + (vsel_range[0]
+ * rdesc->uV_step);
+ else
+ rdesc->min_uV = SLG51000_LDOHP_LV_MIN
+ + (vsel_range[0]
+ * rdesc->uV_step);
+ break;
+
+ case SLG51000_REGULATOR_LDO5:
+ case SLG51000_REGULATOR_LDO6:
+ if (id == SLG51000_REGULATOR_LDO5)
+ reg = SLG51000_LDO5_TRIM2;
+ else
+ reg = SLG51000_LDO6_TRIM2;
+
+ ret = regmap_read(chip->regmap, reg, &val);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read LDO mode register\n");
+ return ret;
+ }
+
+ if (val & SLG51000_SEL_BYP_MODE_MASK) {
+ rdesc->ops = &slg51000_switch_ops;
+ rdesc->n_voltages = 0;
+ rdesc->min_uV = 0;
+ rdesc->uV_step = 0;
+ rdesc->linear_min_sel = 0;
+ break;
+ }
+ /* Fall through - to the check below.*/
+
+ default:
+ rdesc->linear_min_sel = vsel_range[0];
+ rdesc->n_voltages = vsel_range[1] + 1;
+ rdesc->min_uV = rdesc->min_uV
+ + (vsel_range[0] * rdesc->uV_step);
+ break;
+ }
+
+ chip->rdev[id] = devm_regulator_register(chip->dev, rdesc,
+ &config);
+ if (IS_ERR(chip->rdev[id])) {
+ ret = PTR_ERR(chip->rdev[id]);
+ dev_err(chip->dev,
+ "Failed to register regulator(%s):%d\n",
+ chip->rdesc[id]->name, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static irqreturn_t slg51000_irq_handler(int irq, void *data)
+{
+ struct slg51000 *chip = data;
+ struct regmap *regmap = chip->regmap;
+ enum { R0 = 0, R1, R2, REG_MAX };
+ u8 evt[SLG51000_MAX_EVT_REGISTER][REG_MAX];
+ int ret, i, handled = IRQ_NONE;
+ unsigned int evt_otp, mask_otp;
+
+ /* Read event[R0], status[R1] and mask[R2] register */
+ for (i = 0; i < SLG51000_MAX_EVT_REGISTER; i++) {
+ ret = regmap_bulk_read(regmap, es_reg[i].ereg, evt[i], REG_MAX);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read event registers(%d)\n", ret);
+ return IRQ_NONE;
+ }
+ }
+
+ ret = regmap_read(regmap, SLG51000_OTP_EVENT, &evt_otp);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read otp event registers(%d)\n", ret);
+ return IRQ_NONE;
+ }
+
+ ret = regmap_read(regmap, SLG51000_OTP_IRQ_MASK, &mask_otp);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to read otp mask register(%d)\n", ret);
+ return IRQ_NONE;
+ }
+
+ if ((evt_otp & SLG51000_EVT_CRC_MASK) &&
+ !(mask_otp & SLG51000_IRQ_CRC_MASK)) {
+ dev_info(chip->dev,
+ "OTP has been read or OTP crc is not zero\n");
+ handled = IRQ_HANDLED;
+ }
+
+ for (i = 0; i < SLG51000_MAX_REGULATORS; i++) {
+ if (!(evt[i][R2] & SLG51000_IRQ_ILIM_FLAG_MASK) &&
+ (evt[i][R0] & SLG51000_EVT_ILIM_FLAG_MASK)) {
+ regulator_lock(chip->rdev[i]);
+ regulator_notifier_call_chain(chip->rdev[i],
+ REGULATOR_EVENT_OVER_CURRENT, NULL);
+ regulator_unlock(chip->rdev[i]);
+
+ if (evt[i][R1] & SLG51000_STA_ILIM_FLAG_MASK)
+ dev_warn(chip->dev,
+ "Over-current limit(ldo%d)\n", i + 1);
+ handled = IRQ_HANDLED;
+ }
+ }
+
+ if (!(evt[SLG51000_SCTL_EVT][R2] & SLG51000_IRQ_HIGH_TEMP_WARN_MASK) &&
+ (evt[SLG51000_SCTL_EVT][R0] & SLG51000_EVT_HIGH_TEMP_WARN_MASK)) {
+ for (i = 0; i < SLG51000_MAX_REGULATORS; i++) {
+ if (!(evt[i][R1] & SLG51000_STA_ILIM_FLAG_MASK) &&
+ (evt[i][R1] & SLG51000_STA_VOUT_OK_FLAG_MASK)) {
+ regulator_lock(chip->rdev[i]);
+ regulator_notifier_call_chain(chip->rdev[i],
+ REGULATOR_EVENT_OVER_TEMP, NULL);
+ regulator_unlock(chip->rdev[i]);
+ }
+ }
+ handled = IRQ_HANDLED;
+ if (evt[SLG51000_SCTL_EVT][R1] &
+ SLG51000_STA_HIGH_TEMP_WARN_MASK)
+ dev_warn(chip->dev, "High temperature warning!\n");
+ }
+
+ return handled;
+}
+
+static void slg51000_clear_fault_log(struct slg51000 *chip)
+{
+ unsigned int val = 0;
+ int ret = 0;
+
+ ret = regmap_read(chip->regmap, SLG51000_SYSCTL_FAULT_LOG1, &val);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to read Fault log register\n");
+ return;
+ }
+
+ if (val & SLG51000_FLT_OVER_TEMP_MASK)
+ dev_dbg(chip->dev, "Fault log: FLT_OVER_TEMP\n");
+ if (val & SLG51000_FLT_POWER_SEQ_CRASH_REQ_MASK)
+ dev_dbg(chip->dev, "Fault log: FLT_POWER_SEQ_CRASH_REQ\n");
+ if (val & SLG51000_FLT_RST_MASK)
+ dev_dbg(chip->dev, "Fault log: FLT_RST\n");
+ if (val & SLG51000_FLT_POR_MASK)
+ dev_dbg(chip->dev, "Fault log: FLT_POR\n");
+}
+
+static int slg51000_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct device *dev = &client->dev;
+ struct slg51000 *chip;
+ struct gpio_desc *cs_gpiod = NULL;
+ int error, ret;
+
+ chip = devm_kzalloc(dev, sizeof(struct slg51000), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ cs_gpiod = devm_gpiod_get_from_of_node(dev, dev->of_node,
+ "dlg,cs-gpios", 0,
+ GPIOD_OUT_HIGH
+ | GPIOD_FLAGS_BIT_NONEXCLUSIVE,
+ "slg51000-cs");
+ if (cs_gpiod) {
+ dev_info(dev, "Found chip selector property\n");
+ chip->cs_gpiod = cs_gpiod;
+ }
+
+ i2c_set_clientdata(client, chip);
+ chip->chip_irq = client->irq;
+ chip->dev = dev;
+ chip->regmap = devm_regmap_init_i2c(client, &slg51000_regmap_config);
+ if (IS_ERR(chip->regmap)) {
+ error = PTR_ERR(chip->regmap);
+ dev_err(dev, "Failed to allocate register map: %d\n",
+ error);
+ return error;
+ }
+
+ ret = slg51000_regulator_init(chip);
+ if (ret < 0) {
+ dev_err(chip->dev, "Failed to init regulator(%d)\n", ret);
+ return ret;
+ }
+
+ slg51000_clear_fault_log(chip);
+
+ if (chip->chip_irq) {
+ ret = devm_request_threaded_irq(dev, chip->chip_irq, NULL,
+ slg51000_irq_handler,
+ (IRQF_TRIGGER_HIGH |
+ IRQF_ONESHOT),
+ "slg51000-irq", chip);
+ if (ret != 0) {
+ dev_err(dev, "Failed to request IRQ: %d\n",
+ chip->chip_irq);
+ return ret;
+ }
+ } else {
+ dev_info(dev, "No IRQ configured\n");
+ }
+
+ return ret;
+}
+
+static const struct i2c_device_id slg51000_i2c_id[] = {
+ {"slg51000", 0},
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, slg51000_i2c_id);
+
+static struct i2c_driver slg51000_regulator_driver = {
+ .driver = {
+ .name = "slg51000-regulator",
+ },
+ .probe = slg51000_i2c_probe,
+ .id_table = slg51000_i2c_id,
+};
+
+module_i2c_driver(slg51000_regulator_driver);
+
+MODULE_AUTHOR("Eric Jeong <eric.jeong.opensource@diasemi.com>");
+MODULE_DESCRIPTION("SLG51000 regulator driver");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/regulator/slg51000-regulator.h b/drivers/regulator/slg51000-regulator.h
new file mode 100644
index 000000000000..20feb7f91942
--- /dev/null
+++ b/drivers/regulator/slg51000-regulator.h
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SLG51000 High PSRR, Multi-Output Regulators
+ * Copyright (C) 2019 Dialog Semiconductor
+ *
+ * Author: Eric Jeong <eric.jeong.opensource@diasemi.com>
+ */
+
+#ifndef __SLG51000_REGISTERS_H__
+#define __SLG51000_REGISTERS_H__
+
+/* Registers */
+
+#define SLG51000_SYSCTL_PATN_ID_B0 0x1105
+#define SLG51000_SYSCTL_PATN_ID_B1 0x1106
+#define SLG51000_SYSCTL_PATN_ID_B2 0x1107
+#define SLG51000_SYSCTL_SYS_CONF_A 0x1109
+#define SLG51000_SYSCTL_SYS_CONF_D 0x110c
+#define SLG51000_SYSCTL_MATRIX_CONF_A 0x110d
+#define SLG51000_SYSCTL_MATRIX_CONF_B 0x110e
+#define SLG51000_SYSCTL_REFGEN_CONF_C 0x1111
+#define SLG51000_SYSCTL_UVLO_CONF_A 0x1112
+#define SLG51000_SYSCTL_FAULT_LOG1 0x1115
+#define SLG51000_SYSCTL_EVENT 0x1116
+#define SLG51000_SYSCTL_STATUS 0x1117
+#define SLG51000_SYSCTL_IRQ_MASK 0x1118
+#define SLG51000_IO_GPIO1_CONF 0x1500
+#define SLG51000_IO_GPIO2_CONF 0x1501
+#define SLG51000_IO_GPIO3_CONF 0x1502
+#define SLG51000_IO_GPIO4_CONF 0x1503
+#define SLG51000_IO_GPIO5_CONF 0x1504
+#define SLG51000_IO_GPIO6_CONF 0x1505
+#define SLG51000_IO_GPIO_STATUS 0x1506
+#define SLG51000_LUTARRAY_LUT_VAL_0 0x1600
+#define SLG51000_LUTARRAY_LUT_VAL_1 0x1601
+#define SLG51000_LUTARRAY_LUT_VAL_2 0x1602
+#define SLG51000_LUTARRAY_LUT_VAL_3 0x1603
+#define SLG51000_LUTARRAY_LUT_VAL_4 0x1604
+#define SLG51000_LUTARRAY_LUT_VAL_5 0x1605
+#define SLG51000_LUTARRAY_LUT_VAL_6 0x1606
+#define SLG51000_LUTARRAY_LUT_VAL_7 0x1607
+#define SLG51000_LUTARRAY_LUT_VAL_8 0x1608
+#define SLG51000_LUTARRAY_LUT_VAL_9 0x1609
+#define SLG51000_LUTARRAY_LUT_VAL_10 0x160a
+#define SLG51000_LUTARRAY_LUT_VAL_11 0x160b
+#define SLG51000_MUXARRAY_INPUT_SEL_0 0x1700
+#define SLG51000_MUXARRAY_INPUT_SEL_1 0x1701
+#define SLG51000_MUXARRAY_INPUT_SEL_2 0x1702
+#define SLG51000_MUXARRAY_INPUT_SEL_3 0x1703
+#define SLG51000_MUXARRAY_INPUT_SEL_4 0x1704
+#define SLG51000_MUXARRAY_INPUT_SEL_5 0x1705
+#define SLG51000_MUXARRAY_INPUT_SEL_6 0x1706
+#define SLG51000_MUXARRAY_INPUT_SEL_7 0x1707
+#define SLG51000_MUXARRAY_INPUT_SEL_8 0x1708
+#define SLG51000_MUXARRAY_INPUT_SEL_9 0x1709
+#define SLG51000_MUXARRAY_INPUT_SEL_10 0x170a
+#define SLG51000_MUXARRAY_INPUT_SEL_11 0x170b
+#define SLG51000_MUXARRAY_INPUT_SEL_12 0x170c
+#define SLG51000_MUXARRAY_INPUT_SEL_13 0x170d
+#define SLG51000_MUXARRAY_INPUT_SEL_14 0x170e
+#define SLG51000_MUXARRAY_INPUT_SEL_15 0x170f
+#define SLG51000_MUXARRAY_INPUT_SEL_16 0x1710
+#define SLG51000_MUXARRAY_INPUT_SEL_17 0x1711
+#define SLG51000_MUXARRAY_INPUT_SEL_18 0x1712
+#define SLG51000_MUXARRAY_INPUT_SEL_19 0x1713
+#define SLG51000_MUXARRAY_INPUT_SEL_20 0x1714
+#define SLG51000_MUXARRAY_INPUT_SEL_21 0x1715
+#define SLG51000_MUXARRAY_INPUT_SEL_22 0x1716
+#define SLG51000_MUXARRAY_INPUT_SEL_23 0x1717
+#define SLG51000_MUXARRAY_INPUT_SEL_24 0x1718
+#define SLG51000_MUXARRAY_INPUT_SEL_25 0x1719
+#define SLG51000_MUXARRAY_INPUT_SEL_26 0x171a
+#define SLG51000_MUXARRAY_INPUT_SEL_27 0x171b
+#define SLG51000_MUXARRAY_INPUT_SEL_28 0x171c
+#define SLG51000_MUXARRAY_INPUT_SEL_29 0x171d
+#define SLG51000_MUXARRAY_INPUT_SEL_30 0x171e
+#define SLG51000_MUXARRAY_INPUT_SEL_31 0x171f
+#define SLG51000_MUXARRAY_INPUT_SEL_32 0x1720
+#define SLG51000_MUXARRAY_INPUT_SEL_33 0x1721
+#define SLG51000_MUXARRAY_INPUT_SEL_34 0x1722
+#define SLG51000_MUXARRAY_INPUT_SEL_35 0x1723
+#define SLG51000_MUXARRAY_INPUT_SEL_36 0x1724
+#define SLG51000_MUXARRAY_INPUT_SEL_37 0x1725
+#define SLG51000_MUXARRAY_INPUT_SEL_38 0x1726
+#define SLG51000_MUXARRAY_INPUT_SEL_39 0x1727
+#define SLG51000_MUXARRAY_INPUT_SEL_40 0x1728
+#define SLG51000_MUXARRAY_INPUT_SEL_41 0x1729
+#define SLG51000_MUXARRAY_INPUT_SEL_42 0x172a
+#define SLG51000_MUXARRAY_INPUT_SEL_43 0x172b
+#define SLG51000_MUXARRAY_INPUT_SEL_44 0x172c
+#define SLG51000_MUXARRAY_INPUT_SEL_45 0x172d
+#define SLG51000_MUXARRAY_INPUT_SEL_46 0x172e
+#define SLG51000_MUXARRAY_INPUT_SEL_47 0x172f
+#define SLG51000_MUXARRAY_INPUT_SEL_48 0x1730
+#define SLG51000_MUXARRAY_INPUT_SEL_49 0x1731
+#define SLG51000_MUXARRAY_INPUT_SEL_50 0x1732
+#define SLG51000_MUXARRAY_INPUT_SEL_51 0x1733
+#define SLG51000_MUXARRAY_INPUT_SEL_52 0x1734
+#define SLG51000_MUXARRAY_INPUT_SEL_53 0x1735
+#define SLG51000_MUXARRAY_INPUT_SEL_54 0x1736
+#define SLG51000_MUXARRAY_INPUT_SEL_55 0x1737
+#define SLG51000_MUXARRAY_INPUT_SEL_56 0x1738
+#define SLG51000_MUXARRAY_INPUT_SEL_57 0x1739
+#define SLG51000_MUXARRAY_INPUT_SEL_58 0x173a
+#define SLG51000_MUXARRAY_INPUT_SEL_59 0x173b
+#define SLG51000_MUXARRAY_INPUT_SEL_60 0x173c
+#define SLG51000_MUXARRAY_INPUT_SEL_61 0x173d
+#define SLG51000_MUXARRAY_INPUT_SEL_62 0x173e
+#define SLG51000_MUXARRAY_INPUT_SEL_63 0x173f
+#define SLG51000_PWRSEQ_RESOURCE_EN_0 0x1900
+#define SLG51000_PWRSEQ_RESOURCE_EN_1 0x1901
+#define SLG51000_PWRSEQ_RESOURCE_EN_2 0x1902
+#define SLG51000_PWRSEQ_RESOURCE_EN_3 0x1903
+#define SLG51000_PWRSEQ_RESOURCE_EN_4 0x1904
+#define SLG51000_PWRSEQ_RESOURCE_EN_5 0x1905
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP0 0x1906
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN0 0x1907
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP1 0x1908
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN1 0x1909
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP2 0x190a
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN2 0x190b
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP3 0x190c
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN3 0x190d
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP4 0x190e
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN4 0x190f
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_UP5 0x1910
+#define SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN5 0x1911
+#define SLG51000_PWRSEQ_SLOT_TIME_MAX_CONF_A 0x1912
+#define SLG51000_PWRSEQ_SLOT_TIME_MAX_CONF_B 0x1913
+#define SLG51000_PWRSEQ_SLOT_TIME_MAX_CONF_C 0x1914
+#define SLG51000_PWRSEQ_INPUT_SENSE_CONF_A 0x1915
+#define SLG51000_PWRSEQ_INPUT_SENSE_CONF_B 0x1916
+#define SLG51000_LDO1_VSEL 0x2000
+#define SLG51000_LDO1_MINV 0x2060
+#define SLG51000_LDO1_MAXV 0x2061
+#define SLG51000_LDO1_MISC1 0x2064
+#define SLG51000_LDO1_VSEL_ACTUAL 0x2065
+#define SLG51000_LDO1_EVENT 0x20c0
+#define SLG51000_LDO1_STATUS 0x20c1
+#define SLG51000_LDO1_IRQ_MASK 0x20c2
+#define SLG51000_LDO2_VSEL 0x2200
+#define SLG51000_LDO2_MINV 0x2260
+#define SLG51000_LDO2_MAXV 0x2261
+#define SLG51000_LDO2_MISC1 0x2264
+#define SLG51000_LDO2_VSEL_ACTUAL 0x2265
+#define SLG51000_LDO2_EVENT 0x22c0
+#define SLG51000_LDO2_STATUS 0x22c1
+#define SLG51000_LDO2_IRQ_MASK 0x22c2
+#define SLG51000_LDO3_VSEL 0x2300
+#define SLG51000_LDO3_MINV 0x2360
+#define SLG51000_LDO3_MAXV 0x2361
+#define SLG51000_LDO3_CONF1 0x2364
+#define SLG51000_LDO3_CONF2 0x2365
+#define SLG51000_LDO3_VSEL_ACTUAL 0x2366
+#define SLG51000_LDO3_EVENT 0x23c0
+#define SLG51000_LDO3_STATUS 0x23c1
+#define SLG51000_LDO3_IRQ_MASK 0x23c2
+#define SLG51000_LDO4_VSEL 0x2500
+#define SLG51000_LDO4_MINV 0x2560
+#define SLG51000_LDO4_MAXV 0x2561
+#define SLG51000_LDO4_CONF1 0x2564
+#define SLG51000_LDO4_CONF2 0x2565
+#define SLG51000_LDO4_VSEL_ACTUAL 0x2566
+#define SLG51000_LDO4_EVENT 0x25c0
+#define SLG51000_LDO4_STATUS 0x25c1
+#define SLG51000_LDO4_IRQ_MASK 0x25c2
+#define SLG51000_LDO5_VSEL 0x2700
+#define SLG51000_LDO5_MINV 0x2760
+#define SLG51000_LDO5_MAXV 0x2761
+#define SLG51000_LDO5_TRIM2 0x2763
+#define SLG51000_LDO5_CONF1 0x2765
+#define SLG51000_LDO5_CONF2 0x2766
+#define SLG51000_LDO5_VSEL_ACTUAL 0x2767
+#define SLG51000_LDO5_EVENT 0x27c0
+#define SLG51000_LDO5_STATUS 0x27c1
+#define SLG51000_LDO5_IRQ_MASK 0x27c2
+#define SLG51000_LDO6_VSEL 0x2900
+#define SLG51000_LDO6_MINV 0x2960
+#define SLG51000_LDO6_MAXV 0x2961
+#define SLG51000_LDO6_TRIM2 0x2963
+#define SLG51000_LDO6_CONF1 0x2965
+#define SLG51000_LDO6_CONF2 0x2966
+#define SLG51000_LDO6_VSEL_ACTUAL 0x2967
+#define SLG51000_LDO6_EVENT 0x29c0
+#define SLG51000_LDO6_STATUS 0x29c1
+#define SLG51000_LDO6_IRQ_MASK 0x29c2
+#define SLG51000_LDO7_VSEL 0x3100
+#define SLG51000_LDO7_MINV 0x3160
+#define SLG51000_LDO7_MAXV 0x3161
+#define SLG51000_LDO7_CONF1 0x3164
+#define SLG51000_LDO7_CONF2 0x3165
+#define SLG51000_LDO7_VSEL_ACTUAL 0x3166
+#define SLG51000_LDO7_EVENT 0x31c0
+#define SLG51000_LDO7_STATUS 0x31c1
+#define SLG51000_LDO7_IRQ_MASK 0x31c2
+#define SLG51000_OTP_EVENT 0x782b
+#define SLG51000_OTP_IRQ_MASK 0x782d
+#define SLG51000_OTP_LOCK_OTP_PROG 0x78fe
+#define SLG51000_OTP_LOCK_CTRL 0x78ff
+#define SLG51000_LOCK_GLOBAL_LOCK_CTRL1 0x8000
+
+/* Register Bit Fields */
+
+/* SLG51000_SYSCTL_PATTERN_ID_BYTE0 = 0x1105 */
+#define SLG51000_PATTERN_ID_BYTE0_SHIFT 0
+#define SLG51000_PATTERN_ID_BYTE0_MASK (0xff << 0)
+
+/* SLG51000_SYSCTL_PATTERN_ID_BYTE1 = 0x1106 */
+#define SLG51000_PATTERN_ID_BYTE1_SHIFT 0
+#define SLG51000_PATTERN_ID_BYTE1_MASK (0xff << 0)
+
+/* SLG51000_SYSCTL_PATTERN_ID_BYTE2 = 0x1107 */
+#define SLG51000_PATTERN_ID_BYTE2_SHIFT 0
+#define SLG51000_PATTERN_ID_BYTE2_MASK (0xff << 0)
+
+/* SLG51000_SYSCTL_SYS_CONF_A = 0x1109 */
+#define SLG51000_I2C_ADDRESS_SHIFT 0
+#define SLG51000_I2C_ADDRESS_MASK (0x7f << 0)
+#define SLG51000_I2C_DISABLE_SHIFT 7
+#define SLG51000_I2C_DISABLE_MASK (0x01 << 7)
+
+/* SLG51000_SYSCTL_SYS_CONF_D = 0x110c */
+#define SLG51000_CS_T_DEB_SHIFT 6
+#define SLG51000_CS_T_DEB_MASK (0x03 << 6)
+#define SLG51000_I2C_CLR_MODE_SHIFT 5
+#define SLG51000_I2C_CLR_MODE_MASK (0x01 << 5)
+
+/* SLG51000_SYSCTL_MATRIX_CTRL_CONF_A = 0x110d */
+#define SLG51000_RESOURCE_CTRL_SHIFT 0
+#define SLG51000_RESOURCE_CTRL_MASK (0xff << 0)
+
+/* SLG51000_SYSCTL_MATRIX_CTRL_CONF_B = 0x110e */
+#define SLG51000_MATRIX_EVENT_SENSE_SHIFT 0
+#define SLG51000_MATRIX_EVENT_SENSE_MASK (0x07 << 0)
+
+/* SLG51000_SYSCTL_REFGEN_CONF_C = 0x1111 */
+#define SLG51000_REFGEN_SEL_TEMP_WARN_DEBOUNCE_SHIFT 2
+#define SLG51000_REFGEN_SEL_TEMP_WARN_DEBOUNCE_MASK (0x03 << 2)
+#define SLG51000_REFGEN_SEL_TEMP_WARN_THR_SHIFT 0
+#define SLG51000_REFGEN_SEL_TEMP_WARN_THR_MASK (0x03 << 0)
+
+/* SLG51000_SYSCTL_UVLO_CONF_A = 0x1112 */
+#define SLG51000_VMON_UVLO_SEL_THR_SHIFT 0
+#define SLG51000_VMON_UVLO_SEL_THR_MASK (0x1f << 0)
+
+/* SLG51000_SYSCTL_FAULT_LOG1 = 0x1115 */
+#define SLG51000_FLT_POR_SHIFT 5
+#define SLG51000_FLT_POR_MASK (0x01 << 5)
+#define SLG51000_FLT_RST_SHIFT 4
+#define SLG51000_FLT_RST_MASK (0x01 << 4)
+#define SLG51000_FLT_POWER_SEQ_CRASH_REQ_SHIFT 2
+#define SLG51000_FLT_POWER_SEQ_CRASH_REQ_MASK (0x01 << 2)
+#define SLG51000_FLT_OVER_TEMP_SHIFT 1
+#define SLG51000_FLT_OVER_TEMP_MASK (0x01 << 1)
+
+/* SLG51000_SYSCTL_EVENT = 0x1116 */
+#define SLG51000_EVT_MATRIX_SHIFT 1
+#define SLG51000_EVT_MATRIX_MASK (0x01 << 1)
+#define SLG51000_EVT_HIGH_TEMP_WARN_SHIFT 0
+#define SLG51000_EVT_HIGH_TEMP_WARN_MASK (0x01 << 0)
+
+/* SLG51000_SYSCTL_STATUS = 0x1117 */
+#define SLG51000_STA_MATRIX_SHIFT 1
+#define SLG51000_STA_MATRIX_MASK (0x01 << 1)
+#define SLG51000_STA_HIGH_TEMP_WARN_SHIFT 0
+#define SLG51000_STA_HIGH_TEMP_WARN_MASK (0x01 << 0)
+
+/* SLG51000_SYSCTL_IRQ_MASK = 0x1118 */
+#define SLG51000_IRQ_MATRIX_SHIFT 1
+#define SLG51000_IRQ_MATRIX_MASK (0x01 << 1)
+#define SLG51000_IRQ_HIGH_TEMP_WARN_SHIFT 0
+#define SLG51000_IRQ_HIGH_TEMP_WARN_MASK (0x01 << 0)
+
+/* SLG51000_IO_GPIO1_CONF ~ SLG51000_IO_GPIO5_CONF =
+ * 0x1500, 0x1501, 0x1502, 0x1503, 0x1504
+ */
+#define SLG51000_GPIO_DIR_SHIFT 7
+#define SLG51000_GPIO_DIR_MASK (0x01 << 7)
+#define SLG51000_GPIO_SENS_SHIFT 5
+#define SLG51000_GPIO_SENS_MASK (0x03 << 5)
+#define SLG51000_GPIO_INVERT_SHIFT 4
+#define SLG51000_GPIO_INVERT_MASK (0x01 << 4)
+#define SLG51000_GPIO_BYP_SHIFT 3
+#define SLG51000_GPIO_BYP_MASK (0x01 << 3)
+#define SLG51000_GPIO_T_DEB_SHIFT 1
+#define SLG51000_GPIO_T_DEB_MASK (0x03 << 1)
+#define SLG51000_GPIO_LEVEL_SHIFT 0
+#define SLG51000_GPIO_LEVEL_MASK (0x01 << 0)
+
+/* SLG51000_IO_GPIO6_CONF = 0x1505 */
+#define SLG51000_GPIO6_SENS_SHIFT 5
+#define SLG51000_GPIO6_SENS_MASK (0x03 << 5)
+#define SLG51000_GPIO6_INVERT_SHIFT 4
+#define SLG51000_GPIO6_INVERT_MASK (0x01 << 4)
+#define SLG51000_GPIO6_T_DEB_SHIFT 1
+#define SLG51000_GPIO6_T_DEB_MASK (0x03 << 1)
+#define SLG51000_GPIO6_LEVEL_SHIFT 0
+#define SLG51000_GPIO6_LEVEL_MASK (0x01 << 0)
+
+/* SLG51000_IO_GPIO_STATUS = 0x1506 */
+#define SLG51000_GPIO6_STATUS_SHIFT 5
+#define SLG51000_GPIO6_STATUS_MASK (0x01 << 5)
+#define SLG51000_GPIO5_STATUS_SHIFT 4
+#define SLG51000_GPIO5_STATUS_MASK (0x01 << 4)
+#define SLG51000_GPIO4_STATUS_SHIFT 3
+#define SLG51000_GPIO4_STATUS_MASK (0x01 << 3)
+#define SLG51000_GPIO3_STATUS_SHIFT 2
+#define SLG51000_GPIO3_STATUS_MASK (0x01 << 2)
+#define SLG51000_GPIO2_STATUS_SHIFT 1
+#define SLG51000_GPIO2_STATUS_MASK (0x01 << 1)
+#define SLG51000_GPIO1_STATUS_SHIFT 0
+#define SLG51000_GPIO1_STATUS_MASK (0x01 << 0)
+
+/* SLG51000_LUTARRAY_LUT_VAL_0 ~ SLG51000_LUTARRAY_LUT_VAL_11
+ * 0x1600, 0x1601, 0x1602, 0x1603, 0x1604, 0x1605,
+ * 0x1606, 0x1607, 0x1608, 0x1609, 0x160a, 0x160b
+ */
+#define SLG51000_LUT_VAL_SHIFT 0
+#define SLG51000_LUT_VAL_MASK (0xff << 0)
+
+/* SLG51000_MUXARRAY_INPUT_SEL_0 ~ SLG51000_MUXARRAY_INPUT_SEL_63
+ * 0x1700, 0x1701, 0x1702, 0x1703, 0x1704, 0x1705,
+ * 0x1706, 0x1707, 0x1708, 0x1709, 0x170a, 0x170b,
+ * 0x170c, 0x170d, 0x170e, 0x170f, 0x1710, 0x1711,
+ * 0x1712, 0x1713, 0x1714, 0x1715, 0x1716, 0x1717,
+ * 0x1718, 0x1719, 0x171a, 0x171b, 0x171c, 0x171d,
+ * 0x171e, 0x171f, 0x1720, 0x1721, 0x1722, 0x1723,
+ * 0x1724, 0x1725, 0x1726, 0x1727, 0x1728, 0x1729,
+ * 0x173a, 0x173b, 0x173c, 0x173d, 0x173e, 0x173f,
+ */
+#define SLG51000_INPUT_SEL_SHIFT 0
+#define SLG51000_INPUT_SEL_MASK (0x3f << 0)
+
+/* SLG51000_PWRSEQ_RESOURCE_EN_0 ~ SLG51000_PWRSEQ_RESOURCE_EN_5
+ * 0x1900, 0x1901, 0x1902, 0x1903, 0x1904, 0x1905
+ */
+#define SLG51000_RESOURCE_EN_DOWN0_SHIFT 4
+#define SLG51000_RESOURCE_EN_DOWN0_MASK (0x07 << 4)
+#define SLG51000_RESOURCE_EN_UP0_SHIFT 0
+#define SLG51000_RESOURCE_EN_UP0_MASK (0x07 << 0)
+
+/* SLG51000_PWRSEQ_SLOT_TIME_MIN_UP0 ~ SLG51000_PWRSEQ_SLOT_TIME_MIN_UP5
+ * 0x1906, 0x1908, 0x190a, 0x190c, 0x190e, 0x1910
+ */
+#define SLG51000_SLOT_TIME_MIN_UP_SHIFT 0
+#define SLG51000_SLOT_TIME_MIN_UP_MASK (0xff << 0)
+
+/* SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN0 ~ SLG51000_PWRSEQ_SLOT_TIME_MIN_DOWN5
+ * 0x1907, 0x1909, 0x190b, 0x190d, 0x190f, 0x1911
+ */
+#define SLG51000_SLOT_TIME_MIN_DOWN_SHIFT 0
+#define SLG51000_SLOT_TIME_MIN_DOWN_MASK (0xff << 0)
+
+/* SLG51000_PWRSEQ_SLOT_TIME_MAX_CONF_A ~ SLG51000_PWRSEQ_SLOT_TIME_MAX_CONF_C
+ * 0x1912, 0x1913, 0x1914
+ */
+#define SLG51000_SLOT_TIME_MAX_DOWN1_SHIFT 6
+#define SLG51000_SLOT_TIME_MAX_DOWN1_MASK (0x03 << 6)
+#define SLG51000_SLOT_TIME_MAX_UP1_SHIFT 4
+#define SLG51000_SLOT_TIME_MAX_UP1_MASK (0x03 << 4)
+#define SLG51000_SLOT_TIME_MAX_DOWN0_SHIFT 2
+#define SLG51000_SLOT_TIME_MAX_DOWN0_MASK (0x03 << 2)
+#define SLG51000_SLOT_TIME_MAX_UP0_SHIFT 0
+#define SLG51000_SLOT_TIME_MAX_UP0_MASK (0x03 << 0)
+
+/* SLG51000_PWRSEQ_INPUT_SENSE_CONF_A = 0x1915 */
+#define SLG51000_TRIG_UP_SENSE_SHIFT 6
+#define SLG51000_TRIG_UP_SENSE_MASK (0x01 << 6)
+#define SLG51000_UP_EN_SENSE5_SHIFT 5
+#define SLG51000_UP_EN_SENSE5_MASK (0x01 << 5)
+#define SLG51000_UP_EN_SENSE4_SHIFT 4
+#define SLG51000_UP_EN_SENSE4_MASK (0x01 << 4)
+#define SLG51000_UP_EN_SENSE3_SHIFT 3
+#define SLG51000_UP_EN_SENSE3_MASK (0x01 << 3)
+#define SLG51000_UP_EN_SENSE2_SHIFT 2
+#define SLG51000_UP_EN_SENSE2_MASK (0x01 << 2)
+#define SLG51000_UP_EN_SENSE1_SHIFT 1
+#define SLG51000_UP_EN_SENSE1_MASK (0x01 << 1)
+#define SLG51000_UP_EN_SENSE0_SHIFT 0
+#define SLG51000_UP_EN_SENSE0_MASK (0x01 << 0)
+
+/* SLG51000_PWRSEQ_INPUT_SENSE_CONF_B = 0x1916 */
+#define SLG51000_CRASH_DETECT_SENSE_SHIFT 7
+#define SLG51000_CRASH_DETECT_SENSE_MASK (0x01 << 7)
+#define SLG51000_TRIG_DOWN_SENSE_SHIFT 6
+#define SLG51000_TRIG_DOWN_SENSE_MASK (0x01 << 6)
+#define SLG51000_DOWN_EN_SENSE5_SHIFT 5
+#define SLG51000_DOWN_EN_SENSE5_MASK (0x01 << 5)
+#define SLG51000_DOWN_EN_SENSE4_SHIFT 4
+#define SLG51000_DOWN_EN_SENSE4_MASK (0x01 << 4)
+#define SLG51000_DOWN_EN_SENSE3_SHIFT 3
+#define SLG51000_DOWN_EN_SENSE3_MASK (0x01 << 3)
+#define SLG51000_DOWN_EN_SENSE2_SHIFT 2
+#define SLG51000_DOWN_EN_SENSE2_MASK (0x01 << 2)
+#define SLG51000_DOWN_EN_SENSE1_SHIFT 1
+#define SLG51000_DOWN_EN_SENSE1_MASK (0x01 << 1)
+#define SLG51000_DOWN_EN_SENSE0_SHIFT 0
+#define SLG51000_DOWN_EN_SENSE0_MASK (0x01 << 0)
+
+/* SLG51000_LDO1_VSEL ~ SLG51000_LDO7_VSEL =
+ * 0x2000, 0x2200, 0x2300, 0x2500, 0x2700, 0x2900, 0x3100
+ */
+#define SLG51000_VSEL_SHIFT 0
+#define SLG51000_VSEL_MASK (0xff << 0)
+
+/* SLG51000_LDO1_MINV ~ SLG51000_LDO7_MINV =
+ * 0x2060, 0x2260, 0x2360, 0x2560, 0x2760, 0x2960, 0x3160
+ */
+#define SLG51000_MINV_SHIFT 0
+#define SLG51000_MINV_MASK (0xff << 0)
+
+/* SLG51000_LDO1_MAXV ~ SLG51000_LDO7_MAXV =
+ * 0x2061, 0x2261, 0x2361, 0x2561, 0x2761, 0x2961, 0x3161
+ */
+#define SLG51000_MAXV_SHIFT 0
+#define SLG51000_MAXV_MASK (0xff << 0)
+
+/* SLG51000_LDO1_MISC1 = 0x2064, SLG51000_LDO2_MISC1 = 0x2264 */
+#define SLG51000_SEL_VRANGE_SHIFT 0
+#define SLG51000_SEL_VRANGE_MASK (0x01 << 0)
+
+/* SLG51000_LDO1_VSEL_ACTUAL ~ SLG51000_LDO7_VSEL_ACTUAL =
+ * 0x2065, 0x2265, 0x2366, 0x2566, 0x2767, 0x2967, 0x3166
+ */
+#define SLG51000_VSEL_ACTUAL_SHIFT 0
+#define SLG51000_VSEL_ACTUAL_MASK (0xff << 0)
+
+/* SLG51000_LDO1_EVENT ~ SLG51000_LDO7_EVENT =
+ * 0x20c0, 0x22c0, 0x23c0, 0x25c0, 0x27c0, 0x29c0, 0x31c0
+ */
+#define SLG51000_EVT_ILIM_FLAG_SHIFT 0
+#define SLG51000_EVT_ILIM_FLAG_MASK (0x01 << 0)
+#define SLG51000_EVT_VOUT_OK_FLAG_SHIFT 1
+#define SLG51000_EVT_VOUT_OK_FLAG_MASK (0x01 << 1)
+
+/* SLG51000_LDO1_STATUS ~ SLG51000_LDO7_STATUS =
+ * 0x20c1, 0x22c1, 0x23c1, 0x25c1, 0x27c1, 0x29c1, 0x31c1
+ */
+#define SLG51000_STA_ILIM_FLAG_SHIFT 0
+#define SLG51000_STA_ILIM_FLAG_MASK (0x01 << 0)
+#define SLG51000_STA_VOUT_OK_FLAG_SHIFT 1
+#define SLG51000_STA_VOUT_OK_FLAG_MASK (0x01 << 1)
+
+/* SLG51000_LDO1_IRQ_MASK ~ SLG51000_LDO7_IRQ_MASK =
+ * 0x20c2, 0x22c2, 0x23c2, 0x25c2, 0x27c2, 0x29c2, 0x31c2
+ */
+#define SLG51000_IRQ_ILIM_FLAG_SHIFT 0
+#define SLG51000_IRQ_ILIM_FLAG_MASK (0x01 << 0)
+
+/* SLG51000_LDO3_CONF1 ~ SLG51000_LDO7_CONF1 =
+ * 0x2364, 0x2564, 0x2765, 0x2965, 0x3164
+ */
+#define SLG51000_SEL_START_ILIM_SHIFT 0
+#define SLG51000_SEL_START_ILIM_MASK (0x7f << 0)
+
+/* SLG51000_LDO3_CONF2 ~ SLG51000_LDO7_CONF2 =
+ * 0x2365, 0x2565, 0x2766, 0x2966, 0x3165
+ */
+#define SLG51000_SEL_FUNC_ILIM_SHIFT 0
+#define SLG51000_SEL_FUNC_ILIM_MASK (0x7f << 0)
+
+/* SLG51000_LDO5_TRIM2 = 0x2763, SLG51000_LDO6_TRIM2 = 0x2963 */
+#define SLG51000_SEL_BYP_SLEW_RATE_SHIFT 2
+#define SLG51000_SEL_BYP_SLEW_RATE_MASK (0x03 << 2)
+#define SLG51000_SEL_BYP_VGATE_SHIFT 1
+#define SLG51000_SEL_BYP_VGATE_MASK (0x01 << 1)
+#define SLG51000_SEL_BYP_MODE_SHIFT 0
+#define SLG51000_SEL_BYP_MODE_MASK (0x01 << 0)
+
+/* SLG51000_OTP_EVENT = 0x782b */
+#define SLG51000_EVT_CRC_SHIFT 0
+#define SLG51000_EVT_CRC_MASK (0x01 << 0)
+
+/* SLG51000_OTP_IRQ_MASK = 0x782d */
+#define SLG51000_IRQ_CRC_SHIFT 0
+#define SLG51000_IRQ_CRC_MASK (0x01 << 0)
+
+/* SLG51000_OTP_LOCK_OTP_PROG = 0x78fe */
+#define SLG51000_LOCK_OTP_PROG_SHIFT 0
+#define SLG51000_LOCK_OTP_PROG_MASK (0x01 << 0)
+
+/* SLG51000_OTP_LOCK_CTRL = 0x78ff */
+#define SLG51000_LOCK_DFT_SHIFT 1
+#define SLG51000_LOCK_DFT_MASK (0x01 << 1)
+#define SLG51000_LOCK_RWT_SHIFT 0
+#define SLG51000_LOCK_RWT_MASK (0x01 << 0)
+
+/* SLG51000_LOCK_GLOBAL_LOCK_CTRL1 = 0x8000 */
+#define SLG51000_LDO7_LOCK_SHIFT 7
+#define SLG51000_LDO7_LOCK_MASK (0x01 << 7)
+#define SLG51000_LDO6_LOCK_SHIFT 6
+#define SLG51000_LDO6_LOCK_MASK (0x01 << 6)
+#define SLG51000_LDO5_LOCK_SHIFT 5
+#define SLG51000_LDO5_LOCK_MASK (0x01 << 5)
+#define SLG51000_LDO4_LOCK_SHIFT 4
+#define SLG51000_LDO4_LOCK_MASK (0x01 << 4)
+#define SLG51000_LDO3_LOCK_SHIFT 3
+#define SLG51000_LDO3_LOCK_MASK (0x01 << 3)
+#define SLG51000_LDO2_LOCK_SHIFT 2
+#define SLG51000_LDO2_LOCK_MASK (0x01 << 2)
+#define SLG51000_LDO1_LOCK_SHIFT 1
+#define SLG51000_LDO1_LOCK_MASK (0x01 << 1)
+
+#endif /* __SLG51000_REGISTERS_H__ */
+
diff --git a/drivers/regulator/stm32-booster.c b/drivers/regulator/stm32-booster.c
new file mode 100644
index 000000000000..2a897666c650
--- /dev/null
+++ b/drivers/regulator/stm32-booster.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) STMicroelectronics 2019
+// Author(s): Fabrice Gasnier <fabrice.gasnier@st.com>.
+
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+
+/* STM32H7 SYSCFG register */
+#define STM32H7_SYSCFG_PMCR 0x04
+#define STM32H7_SYSCFG_BOOSTE_MASK BIT(8)
+
+/* STM32MP1 SYSCFG has set and clear registers */
+#define STM32MP1_SYSCFG_PMCSETR 0x04
+#define STM32MP1_SYSCFG_PMCCLRR 0x44
+#define STM32MP1_SYSCFG_EN_BOOSTER_MASK BIT(8)
+
+static const struct regulator_ops stm32h7_booster_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+};
+
+static const struct regulator_desc stm32h7_booster_desc = {
+ .name = "booster",
+ .supply_name = "vdda",
+ .n_voltages = 1,
+ .type = REGULATOR_VOLTAGE,
+ .min_uV = 3300000,
+ .fixed_uV = 3300000,
+ .ramp_delay = 66000, /* up to 50us to stabilize */
+ .ops = &stm32h7_booster_ops,
+ .enable_reg = STM32H7_SYSCFG_PMCR,
+ .enable_mask = STM32H7_SYSCFG_BOOSTE_MASK,
+ .owner = THIS_MODULE,
+};
+
+static int stm32mp1_booster_enable(struct regulator_dev *rdev)
+{
+ return regmap_write(rdev->regmap, STM32MP1_SYSCFG_PMCSETR,
+ STM32MP1_SYSCFG_EN_BOOSTER_MASK);
+}
+
+static int stm32mp1_booster_disable(struct regulator_dev *rdev)
+{
+ return regmap_write(rdev->regmap, STM32MP1_SYSCFG_PMCCLRR,
+ STM32MP1_SYSCFG_EN_BOOSTER_MASK);
+}
+
+static const struct regulator_ops stm32mp1_booster_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .enable = stm32mp1_booster_enable,
+ .disable = stm32mp1_booster_disable,
+ .is_enabled = regulator_is_enabled_regmap,
+};
+
+static const struct regulator_desc stm32mp1_booster_desc = {
+ .name = "booster",
+ .supply_name = "vdda",
+ .n_voltages = 1,
+ .type = REGULATOR_VOLTAGE,
+ .min_uV = 3300000,
+ .fixed_uV = 3300000,
+ .ramp_delay = 66000,
+ .ops = &stm32mp1_booster_ops,
+ .enable_reg = STM32MP1_SYSCFG_PMCSETR,
+ .enable_mask = STM32MP1_SYSCFG_EN_BOOSTER_MASK,
+ .owner = THIS_MODULE,
+};
+
+static int stm32_booster_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = pdev->dev.of_node;
+ struct regulator_config config = { };
+ const struct regulator_desc *desc;
+ struct regulator_dev *rdev;
+ struct regmap *regmap;
+ int ret;
+
+ regmap = syscon_regmap_lookup_by_phandle(np, "st,syscfg");
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ desc = (const struct regulator_desc *)
+ of_match_device(dev->driver->of_match_table, dev)->data;
+
+ config.regmap = regmap;
+ config.dev = dev;
+ config.of_node = np;
+ config.init_data = of_get_regulator_init_data(dev, np, desc);
+
+ rdev = devm_regulator_register(dev, desc, &config);
+ if (IS_ERR(rdev)) {
+ ret = PTR_ERR(rdev);
+ dev_err(dev, "register failed with error %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id stm32_booster_of_match[] = {
+ {
+ .compatible = "st,stm32h7-booster",
+ .data = (void *)&stm32h7_booster_desc
+ }, {
+ .compatible = "st,stm32mp1-booster",
+ .data = (void *)&stm32mp1_booster_desc
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, stm32_booster_of_match);
+
+static struct platform_driver stm32_booster_driver = {
+ .probe = stm32_booster_probe,
+ .driver = {
+ .name = "stm32-booster",
+ .of_match_table = of_match_ptr(stm32_booster_of_match),
+ },
+};
+module_platform_driver(stm32_booster_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Fabrice Gasnier <fabrice.gasnier@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 booster regulator driver");
+MODULE_ALIAS("platform:stm32-booster");
diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index ca39b3d55123..10ea4b5a0f55 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -371,11 +371,12 @@ static struct tps65090_platform_data *tps65090_parse_dt_reg_data(
"dcdc-ext-control-gpios", 0,
gflags,
"tps65090");
- if (IS_ERR(rpdata->gpiod))
- return ERR_CAST(rpdata->gpiod);
- if (!rpdata->gpiod)
+ if (PTR_ERR(rpdata->gpiod) == -ENOENT) {
dev_err(&pdev->dev,
"could not find DCDC external control GPIO\n");
+ rpdata->gpiod = NULL;
+ } else if (IS_ERR(rpdata->gpiod))
+ return ERR_CAST(rpdata->gpiod);
}
if (of_property_read_u32(tps65090_matches[idx].of_node,
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index b422eef97b77..018dbbd96771 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -15,7 +15,7 @@
#include <linux/platform_device.h>
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/slab.h>
#include <linux/mfd/wm831x/core.h>
@@ -50,7 +50,7 @@ struct wm831x_dcdc {
int base;
struct wm831x *wm831x;
struct regulator_dev *regulator;
- int dvs_gpio;
+ struct gpio_desc *dvs_gpiod;
int dvs_gpio_state;
int on_vsel;
int dvs_vsel;
@@ -217,7 +217,7 @@ static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
return 0;
dcdc->dvs_gpio_state = state;
- gpio_set_value(dcdc->dvs_gpio, state);
+ gpiod_set_value(dcdc->dvs_gpiod, state);
/* Should wait for DVS state change to be asserted if we have
* a GPIO for it, for now assume the device is configured
@@ -237,10 +237,10 @@ static int wm831x_buckv_set_voltage_sel(struct regulator_dev *rdev,
int ret;
/* If this value is already set then do a GPIO update if we can */
- if (dcdc->dvs_gpio && dcdc->on_vsel == vsel)
+ if (dcdc->dvs_gpiod && dcdc->on_vsel == vsel)
return wm831x_buckv_set_dvs(rdev, 0);
- if (dcdc->dvs_gpio && dcdc->dvs_vsel == vsel)
+ if (dcdc->dvs_gpiod && dcdc->dvs_vsel == vsel)
return wm831x_buckv_set_dvs(rdev, 1);
/* Always set the ON status to the minimum voltage */
@@ -249,7 +249,7 @@ static int wm831x_buckv_set_voltage_sel(struct regulator_dev *rdev,
return ret;
dcdc->on_vsel = vsel;
- if (!dcdc->dvs_gpio)
+ if (!dcdc->dvs_gpiod)
return ret;
/* Kick the voltage transition now */
@@ -296,7 +296,7 @@ static int wm831x_buckv_get_voltage_sel(struct regulator_dev *rdev)
{
struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
- if (dcdc->dvs_gpio && dcdc->dvs_gpio_state)
+ if (dcdc->dvs_gpiod && dcdc->dvs_gpio_state)
return dcdc->dvs_vsel;
else
return dcdc->on_vsel;
@@ -337,7 +337,7 @@ static void wm831x_buckv_dvs_init(struct platform_device *pdev,
int ret;
u16 ctrl;
- if (!pdata || !pdata->dvs_gpio)
+ if (!pdata)
return;
/* gpiolib won't let us read the GPIO status so pick the higher
@@ -345,17 +345,14 @@ static void wm831x_buckv_dvs_init(struct platform_device *pdev,
*/
dcdc->dvs_gpio_state = pdata->dvs_init_state;
- ret = devm_gpio_request_one(&pdev->dev, pdata->dvs_gpio,
- dcdc->dvs_gpio_state ? GPIOF_INIT_HIGH : 0,
- "DCDC DVS");
- if (ret < 0) {
- dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
- dcdc->name, ret);
+ dcdc->dvs_gpiod = devm_gpiod_get(&pdev->dev, "dvs",
+ dcdc->dvs_gpio_state ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW);
+ if (IS_ERR(dcdc->dvs_gpiod)) {
+ dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %ld\n",
+ dcdc->name, PTR_ERR(dcdc->dvs_gpiod));
return;
}
- dcdc->dvs_gpio = pdata->dvs_gpio;
-
switch (pdata->dvs_control_src) {
case 1:
ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 9ac7574e3cfb..a8682f69effc 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -38,7 +38,7 @@ config DASD_PROFILE
depends on DASD
help
Enable this option if you want to see profiling information
- in /proc/dasd/statistics.
+ in /proc/dasd/statistics.
config DASD_ECKD
def_tristate y
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index fab35c6170cc..245f33c2f71e 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -203,7 +203,7 @@ static int __init dasd_feature_list(char *str)
else if (len == 8 && !strncmp(str, "failfast", 8))
features |= DASD_FEATURE_FAILFAST;
else {
- pr_warn("%*s is not a supported device option\n",
+ pr_warn("%.*s is not a supported device option\n",
len, str);
rc = -EINVAL;
}
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index ab0b243a947d..6cc4b19acf85 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -79,27 +79,6 @@ config SCLP_VT220_CONSOLE
Include support for using an IBM SCLP VT220-compatible terminal as a
Linux system console.
-config SCLP_ASYNC
- def_tristate m
- prompt "Support for Call Home via Asynchronous SCLP Records"
- depends on S390
- help
- This option enables the call home function, which is able to inform
- the service element and connected organisations about a kernel panic.
- You should only select this option if you know what you are doing,
- want for inform other people about your kernel panics,
- need this feature and intend to run your kernel in LPAR.
-
-config SCLP_ASYNC_ID
- string "Component ID for Call Home"
- depends on SCLP_ASYNC
- default "000000000"
- help
- The Component ID for Call Home is used to identify the correct
- problem reporting queue the call home records should be sent to.
-
- If your are unsure, please use the default value "000000000".
-
config HMC_DRV
def_tristate m
prompt "Support for file transfers from HMC drive CD/DVD-ROM"
@@ -205,4 +184,3 @@ config S390_VMUR
depends on S390
help
Character device driver for z/VM reader, puncher and printer.
-
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 3072b89785dd..b8a8816d94e7 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_TN3215) += con3215.o
obj-$(CONFIG_SCLP_TTY) += sclp_tty.o
obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
-obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
obj-$(CONFIG_PCI) += sclp_pci.o
diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c
deleted file mode 100644
index e69b12a40636..000000000000
--- a/drivers/s390/char/sclp_async.c
+++ /dev/null
@@ -1,189 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Enable Asynchronous Notification via SCLP.
- *
- * Copyright IBM Corp. 2009
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/sysctl.h>
-#include <linux/utsname.h>
-#include "sclp.h"
-
-static int callhome_enabled;
-static struct sclp_req *request;
-static struct sclp_async_sccb *sccb;
-static int sclp_async_send_wait(char *message);
-static struct ctl_table_header *callhome_sysctl_header;
-static DEFINE_SPINLOCK(sclp_async_lock);
-#define SCLP_NORMAL_WRITE 0x00
-
-struct async_evbuf {
- struct evbuf_header header;
- u64 reserved;
- u8 rflags;
- u8 empty;
- u8 rtype;
- u8 otype;
- char comp_id[12];
- char data[3000]; /* there is still some space left */
-} __attribute__((packed));
-
-struct sclp_async_sccb {
- struct sccb_header header;
- struct async_evbuf evbuf;
-} __attribute__((packed));
-
-static struct sclp_register sclp_async_register = {
- .send_mask = EVTYP_ASYNC_MASK,
-};
-
-static int call_home_on_panic(struct notifier_block *self,
- unsigned long event, void *data)
-{
- strncat(data, init_utsname()->nodename,
- sizeof(init_utsname()->nodename));
- sclp_async_send_wait(data);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block call_home_panic_nb = {
- .notifier_call = call_home_on_panic,
- .priority = INT_MAX,
-};
-
-static int zero;
-static int one = 1;
-
-static struct ctl_table callhome_table[] = {
- {
- .procname = "callhome",
- .data = &callhome_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
- {}
-};
-
-static struct ctl_table kern_dir_table[] = {
- {
- .procname = "kernel",
- .maxlen = 0,
- .mode = 0555,
- .child = callhome_table,
- },
- {}
-};
-
-/*
- * Function used to transfer asynchronous notification
- * records which waits for send completion
- */
-static int sclp_async_send_wait(char *message)
-{
- struct async_evbuf *evb;
- int rc;
- unsigned long flags;
-
- if (!callhome_enabled)
- return 0;
- sccb->evbuf.header.type = EVTYP_ASYNC;
- sccb->evbuf.rtype = 0xA5;
- sccb->evbuf.otype = 0x00;
- evb = &sccb->evbuf;
- request->command = SCLP_CMDW_WRITE_EVENT_DATA;
- request->sccb = sccb;
- request->status = SCLP_REQ_FILLED;
- strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
- /*
- * Retain Queue
- * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
- */
- strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
- sizeof(sccb->evbuf.comp_id));
- sccb->evbuf.header.length = sizeof(sccb->evbuf);
- sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
- sccb->header.function_code = SCLP_NORMAL_WRITE;
- rc = sclp_add_request(request);
- if (rc)
- return rc;
- spin_lock_irqsave(&sclp_async_lock, flags);
- while (request->status != SCLP_REQ_DONE &&
- request->status != SCLP_REQ_FAILED) {
- sclp_sync_wait();
- }
- spin_unlock_irqrestore(&sclp_async_lock, flags);
- if (request->status != SCLP_REQ_DONE)
- return -EIO;
- rc = ((struct sclp_async_sccb *)
- request->sccb)->header.response_code;
- if (rc != 0x0020)
- return -EIO;
- if (evb->header.flags != 0x80)
- return -EIO;
- return rc;
-}
-
-static int __init sclp_async_init(void)
-{
- int rc;
-
- rc = sclp_register(&sclp_async_register);
- if (rc)
- return rc;
- rc = -EOPNOTSUPP;
- if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK))
- goto out_sclp;
- rc = -ENOMEM;
- callhome_sysctl_header = register_sysctl_table(kern_dir_table);
- if (!callhome_sysctl_header)
- goto out_sclp;
- request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
- sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!request || !sccb)
- goto out_mem;
- rc = atomic_notifier_chain_register(&panic_notifier_list,
- &call_home_panic_nb);
- if (!rc)
- goto out;
-out_mem:
- kfree(request);
- free_page((unsigned long) sccb);
- unregister_sysctl_table(callhome_sysctl_header);
-out_sclp:
- sclp_unregister(&sclp_async_register);
-out:
- return rc;
-}
-module_init(sclp_async_init);
-
-static void __exit sclp_async_exit(void)
-{
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &call_home_panic_nb);
- unregister_sysctl_table(callhome_sysctl_header);
- sclp_unregister(&sclp_async_register);
- free_page((unsigned long) sccb);
- kfree(request);
-}
-module_exit(sclp_async_exit);
-
-MODULE_AUTHOR("Copyright IBM Corp. 2009");
-MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 405a60538630..08f812475f5e 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -4,7 +4,7 @@
* dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
* dump format as s390 standalone dumps.
*
- * For more information please refer to Documentation/s390/zfcpdump.txt
+ * For more information please refer to Documentation/s390/zfcpdump.rst
*
* Copyright IBM Corp. 2003, 2008
* Author(s): Michael Holzheu
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index 4534afc63591..427b2e24a8ce 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -16,9 +16,11 @@
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/slab.h>
+#include <linux/dmapool.h>
#include <asm/airq.h>
#include <asm/isc.h>
+#include <asm/cio.h>
#include "cio.h"
#include "cio_debug.h"
@@ -27,7 +29,7 @@
static DEFINE_SPINLOCK(airq_lists_lock);
static struct hlist_head airq_lists[MAX_ISC+1];
-static struct kmem_cache *airq_iv_cache;
+static struct dma_pool *airq_iv_cache;
/**
* register_adapter_interrupt() - register adapter interrupt handler
@@ -115,6 +117,11 @@ void __init init_airq_interrupts(void)
setup_irq(THIN_INTERRUPT, &airq_interrupt);
}
+static inline unsigned long iv_size(unsigned long bits)
+{
+ return BITS_TO_LONGS(bits) * sizeof(unsigned long);
+}
+
/**
* airq_iv_create - create an interrupt vector
* @bits: number of bits in the interrupt vector
@@ -132,17 +139,19 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
goto out;
iv->bits = bits;
iv->flags = flags;
- size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
+ size = iv_size(bits);
if (flags & AIRQ_IV_CACHELINE) {
- if ((cache_line_size() * BITS_PER_BYTE) < bits)
+ if ((cache_line_size() * BITS_PER_BYTE) < bits
+ || !airq_iv_cache)
goto out_free;
- iv->vector = kmem_cache_zalloc(airq_iv_cache, GFP_KERNEL);
+ iv->vector = dma_pool_zalloc(airq_iv_cache, GFP_KERNEL,
+ &iv->vector_dma);
if (!iv->vector)
goto out_free;
} else {
- iv->vector = kzalloc(size, GFP_KERNEL);
+ iv->vector = cio_dma_zalloc(size);
if (!iv->vector)
goto out_free;
}
@@ -178,10 +187,10 @@ out_free:
kfree(iv->ptr);
kfree(iv->bitlock);
kfree(iv->avail);
- if (iv->flags & AIRQ_IV_CACHELINE)
- kmem_cache_free(airq_iv_cache, iv->vector);
+ if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
+ dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
- kfree(iv->vector);
+ cio_dma_free(iv->vector, size);
kfree(iv);
out:
return NULL;
@@ -198,9 +207,9 @@ void airq_iv_release(struct airq_iv *iv)
kfree(iv->ptr);
kfree(iv->bitlock);
if (iv->flags & AIRQ_IV_CACHELINE)
- kmem_cache_free(airq_iv_cache, iv->vector);
+ dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
- kfree(iv->vector);
+ cio_dma_free(iv->vector, iv_size(iv->bits));
kfree(iv->avail);
kfree(iv);
}
@@ -295,12 +304,12 @@ unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
}
EXPORT_SYMBOL(airq_iv_scan);
-static int __init airq_init(void)
+int __init airq_init(void)
{
- airq_iv_cache = kmem_cache_create("airq_iv_cache", cache_line_size(),
- cache_line_size(), 0, NULL);
+ airq_iv_cache = dma_pool_create("airq_iv_cache", cio_get_dma_css_dev(),
+ cache_line_size(),
+ cache_line_size(), PAGE_SIZE);
if (!airq_iv_cache)
return -ENOMEM;
return 0;
}
-subsys_initcall(airq_init);
diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
index 603268a33ea1..73582a0a2622 100644
--- a/drivers/s390/cio/ccwreq.c
+++ b/drivers/s390/cio/ccwreq.c
@@ -63,7 +63,7 @@ static void ccwreq_stop(struct ccw_device *cdev, int rc)
return;
req->done = 1;
ccw_device_set_timeout(cdev, 0);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
if (rc && rc != -ENODEV && req->drc)
rc = req->drc;
req->callback(cdev, req->data, rc);
@@ -86,7 +86,7 @@ static void ccwreq_do(struct ccw_device *cdev)
continue;
}
/* Perform start function. */
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
rc = cio_start(sch, cp, (u8) req->mask);
if (rc == 0) {
/* I/O started successfully. */
@@ -169,7 +169,7 @@ int ccw_request_cancel(struct ccw_device *cdev)
*/
static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
{
- struct irb *irb = &cdev->private->irb;
+ struct irb *irb = &cdev->private->dma_area->irb;
struct cmd_scsw *scsw = &irb->scsw.cmd;
enum uc_todo todo;
@@ -187,7 +187,8 @@ static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
CIO_TRACE_EVENT(2, "sensedata");
CIO_HEX_EVENT(2, &cdev->private->dev_id,
sizeof(struct ccw_dev_id));
- CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT);
+ CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw,
+ SENSE_MAX_COUNT);
/* Check for command reject. */
if (irb->ecw[0] & SNS0_CMD_REJECT)
return IO_REJECTED;
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index a835b31aad99..6392a1b95b02 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -323,36 +323,6 @@ struct chsc_sei {
} __packed __aligned(PAGE_SIZE);
/*
- * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
- */
-
-#define ND_VALIDITY_VALID 0
-#define ND_VALIDITY_OUTDATED 1
-#define ND_VALIDITY_INVALID 2
-
-struct node_descriptor {
- /* Flags. */
- union {
- struct {
- u32 validity:3;
- u32 reserved:5;
- } __packed;
- u8 byte0;
- } __packed;
-
- /* Node parameters. */
- u32 params:24;
-
- /* Node ID. */
- char type[6];
- char model[3];
- char manufacturer[3];
- char plant[2];
- char seq[12];
- u16 tag;
-} __packed;
-
-/*
* Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
*/
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 06a91743335a..ba7d2480613b 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -113,6 +113,7 @@ struct subchannel {
enum sch_todo todo;
struct work_struct todo_work;
struct schib_config config;
+ char *driver_override; /* Driver name to force a match */
} __attribute__ ((aligned(8)));
DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb);
@@ -135,6 +136,8 @@ extern int cio_commit_config(struct subchannel *sch);
int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
int cio_tm_intrg(struct subchannel *sch);
+extern int __init airq_init(void);
+
/* Use with care. */
#ifdef CONFIG_CCW_CONSOLE
extern struct subchannel *cio_probe_console(void);
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index aea502922646..e1f2d0eed544 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -20,6 +20,8 @@
#include <linux/reboot.h>
#include <linux/suspend.h>
#include <linux/proc_fs.h>
+#include <linux/genalloc.h>
+#include <linux/dma-mapping.h>
#include <asm/isc.h>
#include <asm/crw.h>
@@ -165,6 +167,7 @@ static void css_subchannel_release(struct device *dev)
sch->config.intparm = 0;
cio_commit_config(sch);
+ kfree(sch->driver_override);
kfree(sch->lock);
kfree(sch);
}
@@ -224,6 +227,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
INIT_WORK(&sch->todo_work, css_sch_todo);
sch->dev.release = &css_subchannel_release;
device_initialize(&sch->dev);
+ /*
+ * The physical addresses of some the dma structures that can
+ * belong to a subchannel need to fit 31 bit width (e.g. ccw).
+ */
+ sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
+ sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
return sch;
err:
@@ -315,9 +324,57 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RO(modalias);
+static ssize_t driver_override_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ char *driver_override, *old, *cp;
+
+ /* We need to keep extra room for a newline */
+ if (count >= (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
+ if (!driver_override)
+ return -ENOMEM;
+
+ cp = strchr(driver_override, '\n');
+ if (cp)
+ *cp = '\0';
+
+ device_lock(dev);
+ old = sch->driver_override;
+ if (strlen(driver_override)) {
+ sch->driver_override = driver_override;
+ } else {
+ kfree(driver_override);
+ sch->driver_override = NULL;
+ }
+ device_unlock(dev);
+
+ kfree(old);
+
+ return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ ssize_t len;
+
+ device_lock(dev);
+ len = snprintf(buf, PAGE_SIZE, "%s\n", sch->driver_override);
+ device_unlock(dev);
+ return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
static struct attribute *subch_attrs[] = {
&dev_attr_type.attr,
&dev_attr_modalias.attr,
+ &dev_attr_driver_override.attr,
NULL,
};
@@ -899,6 +956,13 @@ static int __init setup_css(int nr)
dev_set_name(&css->device, "css%x", nr);
css->device.groups = cssdev_attr_groups;
css->device.release = channel_subsystem_release;
+ /*
+ * We currently allocate notifier bits with this (using
+ * css->device as the device argument with the DMA API)
+ * and are fine with 64 bit addresses.
+ */
+ css->device.coherent_dma_mask = DMA_BIT_MASK(64);
+ css->device.dma_mask = &css->device.coherent_dma_mask;
mutex_init(&css->mutex);
css->cssid = chsc_get_cssid(nr);
@@ -1018,6 +1082,111 @@ static struct notifier_block css_power_notifier = {
.notifier_call = css_power_event,
};
+#define CIO_DMA_GFP (GFP_KERNEL | __GFP_ZERO)
+static struct gen_pool *cio_dma_pool;
+
+/* Currently cio supports only a single css */
+struct device *cio_get_dma_css_dev(void)
+{
+ return &channel_subsystems[0]->device;
+}
+
+struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages)
+{
+ struct gen_pool *gp_dma;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+ int i;
+
+ gp_dma = gen_pool_create(3, -1);
+ if (!gp_dma)
+ return NULL;
+ for (i = 0; i < nr_pages; ++i) {
+ cpu_addr = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr,
+ CIO_DMA_GFP);
+ if (!cpu_addr)
+ return gp_dma;
+ gen_pool_add_virt(gp_dma, (unsigned long) cpu_addr,
+ dma_addr, PAGE_SIZE, -1);
+ }
+ return gp_dma;
+}
+
+static void __gp_dma_free_dma(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk, void *data)
+{
+ size_t chunk_size = chunk->end_addr - chunk->start_addr + 1;
+
+ dma_free_coherent((struct device *) data, chunk_size,
+ (void *) chunk->start_addr,
+ (dma_addr_t) chunk->phys_addr);
+}
+
+void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev)
+{
+ if (!gp_dma)
+ return;
+ /* this is quite ugly but no better idea */
+ gen_pool_for_each_chunk(gp_dma, __gp_dma_free_dma, dma_dev);
+ gen_pool_destroy(gp_dma);
+}
+
+static int cio_dma_pool_init(void)
+{
+ /* No need to free up the resources: compiled in */
+ cio_dma_pool = cio_gp_dma_create(cio_get_dma_css_dev(), 1);
+ if (!cio_dma_pool)
+ return -ENOMEM;
+ return 0;
+}
+
+void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+ size_t size)
+{
+ dma_addr_t dma_addr;
+ unsigned long addr;
+ size_t chunk_size;
+
+ if (!gp_dma)
+ return NULL;
+ addr = gen_pool_alloc(gp_dma, size);
+ while (!addr) {
+ chunk_size = round_up(size, PAGE_SIZE);
+ addr = (unsigned long) dma_alloc_coherent(dma_dev,
+ chunk_size, &dma_addr, CIO_DMA_GFP);
+ if (!addr)
+ return NULL;
+ gen_pool_add_virt(gp_dma, addr, dma_addr, chunk_size, -1);
+ addr = gen_pool_alloc(gp_dma, size);
+ }
+ return (void *) addr;
+}
+
+void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size)
+{
+ if (!cpu_addr)
+ return;
+ memset(cpu_addr, 0, size);
+ gen_pool_free(gp_dma, (unsigned long) cpu_addr, size);
+}
+
+/*
+ * Allocate dma memory from the css global pool. Intended for memory not
+ * specific to any single device within the css. The allocated memory
+ * is not guaranteed to be 31-bit addressable.
+ *
+ * Caution: Not suitable for early stuff like console.
+ */
+void *cio_dma_zalloc(size_t size)
+{
+ return cio_gp_dma_zalloc(cio_dma_pool, cio_get_dma_css_dev(), size);
+}
+
+void cio_dma_free(void *cpu_addr, size_t size)
+{
+ cio_gp_dma_free(cio_dma_pool, cpu_addr, size);
+}
+
/*
* Now that the driver core is running, we can setup our channel subsystem.
* The struct subchannel's are created during probing.
@@ -1059,16 +1228,22 @@ static int __init css_bus_init(void)
if (ret)
goto out_unregister;
ret = register_pm_notifier(&css_power_notifier);
- if (ret) {
- unregister_reboot_notifier(&css_reboot_notifier);
- goto out_unregister;
- }
+ if (ret)
+ goto out_unregister_rn;
+ ret = cio_dma_pool_init();
+ if (ret)
+ goto out_unregister_pmn;
+ airq_init();
css_init_done = 1;
/* Enable default isc for I/O subchannels. */
isc_register(IO_SCH_ISC);
return 0;
+out_unregister_pmn:
+ unregister_pm_notifier(&css_power_notifier);
+out_unregister_rn:
+ unregister_reboot_notifier(&css_reboot_notifier);
out_unregister:
while (i-- > 0) {
struct channel_subsystem *css = channel_subsystems[i];
@@ -1222,6 +1397,10 @@ static int css_bus_match(struct device *dev, struct device_driver *drv)
struct css_driver *driver = to_cssdriver(drv);
struct css_device_id *id;
+ /* When driver_override is set, only bind to the matching driver */
+ if (sch->driver_override && strcmp(sch->driver_override, drv->name))
+ return 0;
+
for (id = driver->subchannel_type; id->match_flags; id++) {
if (sch->st == id->type)
return 1;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 1540229a37bb..9985b7484a6b 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -24,6 +24,7 @@
#include <linux/timer.h>
#include <linux/kernel_stat.h>
#include <linux/sched/signal.h>
+#include <linux/dma-mapping.h>
#include <asm/ccwdev.h>
#include <asm/cio.h>
@@ -687,6 +688,9 @@ ccw_device_release(struct device *dev)
struct ccw_device *cdev;
cdev = to_ccwdev(dev);
+ cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
+ sizeof(*cdev->private->dma_area));
+ cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
/* Release reference of parent subchannel. */
put_device(cdev->dev.parent);
kfree(cdev->private);
@@ -696,15 +700,33 @@ ccw_device_release(struct device *dev)
static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
{
struct ccw_device *cdev;
+ struct gen_pool *dma_pool;
cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
- if (cdev) {
- cdev->private = kzalloc(sizeof(struct ccw_device_private),
- GFP_KERNEL | GFP_DMA);
- if (cdev->private)
- return cdev;
- }
+ if (!cdev)
+ goto err_cdev;
+ cdev->private = kzalloc(sizeof(struct ccw_device_private),
+ GFP_KERNEL | GFP_DMA);
+ if (!cdev->private)
+ goto err_priv;
+ cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
+ cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
+ dma_pool = cio_gp_dma_create(&cdev->dev, 1);
+ if (!dma_pool)
+ goto err_dma_pool;
+ cdev->private->dma_pool = dma_pool;
+ cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
+ sizeof(*cdev->private->dma_area));
+ if (!cdev->private->dma_area)
+ goto err_dma_area;
+ return cdev;
+err_dma_area:
+ cio_gp_dma_destroy(dma_pool, &cdev->dev);
+err_dma_pool:
+ kfree(cdev->private);
+err_priv:
kfree(cdev);
+err_cdev:
return ERR_PTR(-ENOMEM);
}
@@ -884,7 +906,7 @@ io_subchannel_recog_done(struct ccw_device *cdev)
wake_up(&ccw_device_init_wq);
break;
case DEV_STATE_OFFLINE:
- /*
+ /*
* We can't register the device in interrupt context so
* we schedule a work item.
*/
@@ -1062,6 +1084,14 @@ static int io_subchannel_probe(struct subchannel *sch)
if (!io_priv)
goto out_schedule;
+ io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+ sizeof(*io_priv->dma_area),
+ &io_priv->dma_area_dma, GFP_KERNEL);
+ if (!io_priv->dma_area) {
+ kfree(io_priv);
+ goto out_schedule;
+ }
+
set_io_private(sch, io_priv);
css_schedule_eval(sch->schid);
return 0;
@@ -1088,6 +1118,8 @@ static int io_subchannel_remove(struct subchannel *sch)
set_io_private(sch, NULL);
spin_unlock_irq(sch->lock);
out_free:
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
return 0;
@@ -1593,13 +1625,19 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
return ERR_CAST(sch);
io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
- if (!io_priv) {
- put_device(&sch->dev);
- return ERR_PTR(-ENOMEM);
- }
+ if (!io_priv)
+ goto err_priv;
+ io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+ sizeof(*io_priv->dma_area),
+ &io_priv->dma_area_dma, GFP_KERNEL);
+ if (!io_priv->dma_area)
+ goto err_dma_area;
set_io_private(sch, io_priv);
cdev = io_subchannel_create_ccwdev(sch);
if (IS_ERR(cdev)) {
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
+ set_io_private(sch, NULL);
put_device(&sch->dev);
kfree(io_priv);
return cdev;
@@ -1607,6 +1645,12 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
cdev->drv = drv;
ccw_device_set_int_class(cdev);
return cdev;
+
+err_dma_area:
+ kfree(io_priv);
+err_priv:
+ put_device(&sch->dev);
+ return ERR_PTR(-ENOMEM);
}
void __init ccw_device_destroy_console(struct ccw_device *cdev)
@@ -1617,6 +1661,8 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev)
set_io_private(sch, NULL);
put_device(&sch->dev);
put_device(&cdev->dev);
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
}
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 9169af7dbb43..8fc267324ebb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -67,8 +67,10 @@ static void ccw_timeout_log(struct ccw_device *cdev)
sizeof(struct tcw), 0);
} else {
printk(KERN_WARNING "cio: orb indicates command mode\n");
- if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw ||
- (void *)(addr_t)orb->cmd.cpa == cdev->private->iccws)
+ if ((void *)(addr_t)orb->cmd.cpa ==
+ &private->dma_area->sense_ccw ||
+ (void *)(addr_t)orb->cmd.cpa ==
+ cdev->private->dma_area->iccws)
printk(KERN_WARNING "cio: last channel program "
"(intern):\n");
else
@@ -143,18 +145,22 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
void ccw_device_update_sense_data(struct ccw_device *cdev)
{
memset(&cdev->id, 0, sizeof(cdev->id));
- cdev->id.cu_type = cdev->private->senseid.cu_type;
- cdev->id.cu_model = cdev->private->senseid.cu_model;
- cdev->id.dev_type = cdev->private->senseid.dev_type;
- cdev->id.dev_model = cdev->private->senseid.dev_model;
+ cdev->id.cu_type = cdev->private->dma_area->senseid.cu_type;
+ cdev->id.cu_model = cdev->private->dma_area->senseid.cu_model;
+ cdev->id.dev_type = cdev->private->dma_area->senseid.dev_type;
+ cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
}
int ccw_device_test_sense_data(struct ccw_device *cdev)
{
- return cdev->id.cu_type == cdev->private->senseid.cu_type &&
- cdev->id.cu_model == cdev->private->senseid.cu_model &&
- cdev->id.dev_type == cdev->private->senseid.dev_type &&
- cdev->id.dev_model == cdev->private->senseid.dev_model;
+ return cdev->id.cu_type ==
+ cdev->private->dma_area->senseid.cu_type &&
+ cdev->id.cu_model ==
+ cdev->private->dma_area->senseid.cu_model &&
+ cdev->id.dev_type ==
+ cdev->private->dma_area->senseid.dev_type &&
+ cdev->id.dev_model ==
+ cdev->private->dma_area->senseid.dev_model;
}
/*
@@ -342,7 +348,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
cio_disable_subchannel(sch);
/* Reset device status. */
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
cdev->private->state = state;
@@ -509,13 +515,14 @@ callback:
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
- create_fake_irb(&cdev->private->irb,
+ create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
- &cdev->private->irb);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ &cdev->private->dma_area->irb);
+ memset(&cdev->private->dma_area->irb, 0,
+ sizeof(struct irb));
}
ccw_device_report_path_events(cdev);
ccw_device_handle_broken_paths(cdev);
@@ -672,7 +679,8 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
if (scsw_actl(&sch->schib.scsw) != 0 ||
(scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
- (scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
+ (scsw_stctl(&cdev->private->dma_area->irb.scsw) &
+ SCSW_STCTL_STATUS_PEND)) {
/*
* No final status yet or final status not yet delivered
* to the device driver. Can't do path verification now,
@@ -719,7 +727,7 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
* - fast notification was requested (primary status)
* - unsolicited interrupts
*/
- stctl = scsw_stctl(&cdev->private->irb.scsw);
+ stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
(stctl == SCSW_STCTL_STATUS_PEND);
@@ -735,9 +743,9 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
- &cdev->private->irb);
+ &cdev->private->dma_area->irb);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
return 1;
}
@@ -759,7 +767,8 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
/* Unit check but no sense data. Need basic sense. */
if (ccw_device_do_sense(cdev, irb) != 0)
goto call_handler_unsol;
- memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+ memcpy(&cdev->private->dma_area->irb, irb,
+ sizeof(struct irb));
cdev->private->state = DEV_STATE_W4SENSE;
cdev->private->intparm = 0;
return;
@@ -842,7 +851,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
if (scsw_fctl(&irb->scsw) &
(SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
cdev->private->flags.dosense = 0;
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
ccw_device_accumulate_irb(cdev, irb);
goto call_handler;
}
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index f6df83a9dfbb..740996d0dc8c 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -99,7 +99,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
static int diag210_get_dev_info(struct ccw_device *cdev)
{
struct ccw_dev_id *dev_id = &cdev->private->dev_id;
- struct senseid *senseid = &cdev->private->senseid;
+ struct senseid *senseid = &cdev->private->dma_area->senseid;
struct diag210 diag_data;
int rc;
@@ -134,8 +134,10 @@ err_failed:
static void snsid_init(struct ccw_device *cdev)
{
cdev->private->flags.esid = 0;
- memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid));
- cdev->private->senseid.cu_type = 0xffff;
+
+ memset(&cdev->private->dma_area->senseid, 0,
+ sizeof(cdev->private->dma_area->senseid));
+ cdev->private->dma_area->senseid.cu_type = 0xffff;
}
/*
@@ -143,16 +145,16 @@ static void snsid_init(struct ccw_device *cdev)
*/
static int snsid_check(struct ccw_device *cdev, void *data)
{
- struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd;
+ struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
int len = sizeof(struct senseid) - scsw->count;
/* Check for incomplete SENSE ID data. */
if (len < SENSE_ID_MIN_LEN)
goto out_restart;
- if (cdev->private->senseid.cu_type == 0xffff)
+ if (cdev->private->dma_area->senseid.cu_type == 0xffff)
goto out_restart;
/* Check for incompatible SENSE ID data. */
- if (cdev->private->senseid.reserved != 0xff)
+ if (cdev->private->dma_area->senseid.reserved != 0xff)
return -EOPNOTSUPP;
/* Check for extended-identification information. */
if (len > SENSE_ID_BASIC_LEN)
@@ -170,7 +172,7 @@ out_restart:
static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
{
struct ccw_dev_id *id = &cdev->private->dev_id;
- struct senseid *senseid = &cdev->private->senseid;
+ struct senseid *senseid = &cdev->private->dma_area->senseid;
int vm = 0;
if (rc && MACHINE_IS_VM) {
@@ -200,7 +202,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
{
struct subchannel *sch = to_subchannel(cdev->dev.parent);
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
CIO_TRACE_EVENT(4, "snsid");
CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
@@ -208,7 +210,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
snsid_init(cdev);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_ID;
- cp->cda = (u32) (addr_t) &cdev->private->senseid;
+ cp->cda = (u32) (addr_t) &cdev->private->dma_area->senseid;
cp->count = sizeof(struct senseid);
cp->flags = CCW_FLAG_SLI;
/* Request setup. */
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 4435ae0b3027..d722458c5928 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -429,8 +429,8 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct)
if (cdev->private->flags.esid == 0)
return NULL;
for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
- if (cdev->private->senseid.ciw[ciw_cnt].ct == ct)
- return cdev->private->senseid.ciw + ciw_cnt;
+ if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
+ return cdev->private->dma_area->senseid.ciw + ciw_cnt;
return NULL;
}
@@ -699,6 +699,23 @@ void ccw_device_get_schid(struct ccw_device *cdev, struct subchannel_id *schid)
}
EXPORT_SYMBOL_GPL(ccw_device_get_schid);
+/*
+ * Allocate zeroed dma coherent 31 bit addressable memory using
+ * the subchannels dma pool. Maximal size of allocation supported
+ * is PAGE_SIZE.
+ */
+void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
+{
+ return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_zalloc);
+
+void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
+{
+ cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_free);
+
EXPORT_SYMBOL(ccw_device_set_options_mask);
EXPORT_SYMBOL(ccw_device_set_options);
EXPORT_SYMBOL(ccw_device_clear_options);
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index d30a3babf176..767a85635a0f 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -57,7 +57,7 @@ out:
static void nop_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
cp->cmd_code = CCW_CMD_NOOP;
cp->cda = 0;
@@ -134,9 +134,9 @@ err:
static void spid_build_cp(struct ccw_device *cdev, u8 fn)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
- struct pgid *pgid = &cdev->private->pgid[i];
+ struct pgid *pgid = &cdev->private->dma_area->pgid[i];
pgid->inf.fc = fn;
cp->cmd_code = CCW_CMD_SET_PGID;
@@ -300,7 +300,7 @@ static int pgid_cmp(struct pgid *p1, struct pgid *p2)
static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
int *mismatch, u8 *reserved, u8 *reset)
{
- struct pgid *pgid = &cdev->private->pgid[0];
+ struct pgid *pgid = &cdev->private->dma_area->pgid[0];
struct pgid *first = NULL;
int lpm;
int i;
@@ -342,7 +342,7 @@ static u8 pgid_to_donepm(struct ccw_device *cdev)
lpm = 0x80 >> i;
if ((cdev->private->pgid_valid_mask & lpm) == 0)
continue;
- pgid = &cdev->private->pgid[i];
+ pgid = &cdev->private->dma_area->pgid[i];
if (sch->opm & lpm) {
if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
continue;
@@ -368,7 +368,8 @@ static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid)
int i;
for (i = 0; i < 8; i++)
- memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid));
+ memcpy(&cdev->private->dma_area->pgid[i], pgid,
+ sizeof(struct pgid));
}
/*
@@ -435,12 +436,12 @@ out:
static void snid_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_PGID;
- cp->cda = (u32) (addr_t) &cdev->private->pgid[i];
+ cp->cda = (u32) (addr_t) &cdev->private->dma_area->pgid[i];
cp->count = sizeof(struct pgid);
cp->flags = CCW_FLAG_SLI;
req->cp = cp;
@@ -516,7 +517,8 @@ static void verify_start(struct ccw_device *cdev)
sch->lpm = sch->schib.pmcw.pam;
/* Initialize PGID data. */
- memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid));
+ memset(cdev->private->dma_area->pgid, 0,
+ sizeof(cdev->private->dma_area->pgid));
cdev->private->pgid_valid_mask = 0;
cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
cdev->private->path_notoper_mask = 0;
@@ -626,7 +628,7 @@ struct stlck_data {
static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
cp[0].cmd_code = CCW_CMD_STLCK;
cp[0].cda = (u32) (addr_t) buf1;
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 7d5c7892b2c4..0bd8f2642732 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -79,15 +79,15 @@ ccw_device_accumulate_ecw(struct ccw_device *cdev, struct irb *irb)
* are condition that have to be met for the extended control
* bit to have meaning. Sick.
*/
- cdev->private->irb.scsw.cmd.ectl = 0;
+ cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
!(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
- cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
+ cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
/* Check if extended control word is valid. */
- if (!cdev->private->irb.scsw.cmd.ectl)
+ if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
return;
/* Copy concurrent sense / model dependent information. */
- memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw));
+ memcpy(&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof(irb->ecw));
}
/*
@@ -118,7 +118,7 @@ ccw_device_accumulate_esw(struct ccw_device *cdev, struct irb *irb)
if (!ccw_device_accumulate_esw_valid(irb))
return;
- cdev_irb = &cdev->private->irb;
+ cdev_irb = &cdev->private->dma_area->irb;
/* Copy last path used mask. */
cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum;
@@ -210,7 +210,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
ccw_device_path_notoper(cdev);
/* No irb accumulation for transport mode irbs. */
if (scsw_is_tm(&irb->scsw)) {
- memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+ memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
return;
}
/*
@@ -219,7 +219,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
if (!scsw_is_solicited(&irb->scsw))
return;
- cdev_irb = &cdev->private->irb;
+ cdev_irb = &cdev->private->dma_area->irb;
/*
* If the clear function had been performed, all formerly pending
@@ -227,7 +227,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
* intermediate accumulated status to the device driver.
*/
if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
/* Copy bits which are valid only for the start function. */
if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) {
@@ -329,9 +329,9 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb)
/*
* We have ending status but no sense information. Do a basic sense.
*/
- sense_ccw = &to_io_private(sch)->sense_ccw;
+ sense_ccw = &to_io_private(sch)->dma_area->sense_ccw;
sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE;
- sense_ccw->cda = (__u32) __pa(cdev->private->irb.ecw);
+ sense_ccw->cda = (__u32) __pa(cdev->private->dma_area->irb.ecw);
sense_ccw->count = SENSE_MAX_COUNT;
sense_ccw->flags = CCW_FLAG_SLI;
@@ -364,7 +364,7 @@ ccw_device_accumulate_basic_sense(struct ccw_device *cdev, struct irb *irb)
if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) &&
(irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) {
- cdev->private->irb.esw.esw0.erw.cons = 1;
+ cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
}
/* Check if path verification is required. */
@@ -386,7 +386,7 @@ ccw_device_accumulate_and_sense(struct ccw_device *cdev, struct irb *irb)
/* Check for basic sense. */
if (cdev->private->flags.dosense &&
!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) {
- cdev->private->irb.esw.esw0.erw.cons = 1;
+ cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
return 0;
}
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 90e4e3a7841b..c03b4a19974e 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -9,15 +9,20 @@
#include "css.h"
#include "orb.h"
+struct io_subchannel_dma_area {
+ struct ccw1 sense_ccw; /* static ccw for sense command */
+};
+
struct io_subchannel_private {
union orb orb; /* operation request block */
- struct ccw1 sense_ccw; /* static ccw for sense command */
struct ccw_device *cdev;/* pointer to the child ccw device */
struct {
unsigned int suspend:1; /* allow suspend */
unsigned int prefetch:1;/* deny prefetch */
unsigned int inter:1; /* suppress intermediate interrupts */
} __packed options;
+ struct io_subchannel_dma_area *dma_area;
+ dma_addr_t dma_area_dma;
} __aligned(8);
#define to_io_private(n) ((struct io_subchannel_private *) \
@@ -115,6 +120,13 @@ enum cdev_todo {
#define FAKE_CMD_IRB 1
#define FAKE_TM_IRB 2
+struct ccw_device_dma_area {
+ struct senseid senseid; /* SenseID info */
+ struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
+ struct irb irb; /* device status */
+ struct pgid pgid[8]; /* path group IDs per chpid*/
+};
+
struct ccw_device_private {
struct ccw_device *cdev;
struct subchannel *sch;
@@ -156,11 +168,7 @@ struct ccw_device_private {
} __attribute__((packed)) flags;
unsigned long intparm; /* user interruption parameter */
struct qdio_irq *qdio_data;
- struct irb irb; /* device status */
int async_kill_io_rc;
- struct senseid senseid; /* SenseID info */
- struct pgid pgid[8]; /* path group IDs per chpid*/
- struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
struct work_struct todo_work;
enum cdev_todo todo;
wait_queue_head_t wait_q;
@@ -169,6 +177,8 @@ struct ccw_device_private {
struct list_head cmb_list; /* list of measured devices */
u64 cmb_start_time; /* clock value of cmb reset */
void *cmb_wait; /* deferred cmb enable/disable */
+ struct gen_pool *dma_pool;
+ struct ccw_device_dma_area *dma_area;
enum interruption_class int_class;
};
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 7b7620de2acd..730c4e68094b 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -736,6 +736,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start)
switch (state) {
case SLSB_P_OUTPUT_EMPTY:
+ case SLSB_P_OUTPUT_PENDING:
/* the adapter got it */
DBF_DEV_EVENT(DBF_INFO, q->irq_ptr,
"out empty:%1d %02x", q->nr, count);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 99d7d2566a3a..d4101cecdc8d 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -150,6 +150,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
return -ENOMEM;
}
irq_ptr_qs[i] = q;
+ INIT_LIST_HEAD(&q->entry);
}
return 0;
}
@@ -178,6 +179,7 @@ static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr,
q->mask = 1 << (31 - i);
q->nr = i;
q->handler = handler;
+ INIT_LIST_HEAD(&q->entry);
}
static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 28d59ac2204c..93ee067c10ca 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -79,7 +79,6 @@ void tiqdio_add_input_queues(struct qdio_irq *irq_ptr)
mutex_lock(&tiq_list_lock);
list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list);
mutex_unlock(&tiq_list_lock);
- xchg(irq_ptr->dsci, 1 << 7);
}
void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
@@ -87,14 +86,14 @@ void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
struct qdio_q *q;
q = irq_ptr->input_qs[0];
- /* if establish triggered an error */
- if (!q || !q->entry.prev || !q->entry.next)
+ if (!q)
return;
mutex_lock(&tiq_list_lock);
list_del_rcu(&q->entry);
mutex_unlock(&tiq_list_lock);
synchronize_rcu();
+ INIT_LIST_HEAD(&q->entry);
}
static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr)
@@ -178,6 +177,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq)
/**
* tiqdio_thinint_handler - thin interrupt handler for qdio
* @airq: pointer to adapter interrupt descriptor
+ * @floating: flag to recognize floating vs. directed interrupts (unused)
*/
static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
{
diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 0e79799e9a71..1d4c893ead23 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -16,12 +16,6 @@
#include "vfio_ccw_cp.h"
-/*
- * Max length for ccw chain.
- * XXX: Limit to 256, need to check more?
- */
-#define CCWCHAIN_LEN_MAX 256
-
struct pfn_array {
/* Starting guest physical I/O address. */
unsigned long pa_iova;
@@ -33,11 +27,6 @@ struct pfn_array {
int pa_nr;
};
-struct pfn_array_table {
- struct pfn_array *pat_pa;
- int pat_nr;
-};
-
struct ccwchain {
struct list_head next;
struct ccw1 *ch_ccw;
@@ -46,35 +35,29 @@ struct ccwchain {
/* Count of the valid ccws in chain. */
int ch_len;
/* Pinned PAGEs for the original data. */
- struct pfn_array_table *ch_pat;
+ struct pfn_array *ch_pa;
};
/*
- * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory
+ * pfn_array_alloc() - alloc memory for PFNs
* @pa: pfn_array on which to perform the operation
- * @mdev: the mediated device to perform pin/unpin operations
* @iova: target guest physical address
* @len: number of bytes that should be pinned from @iova
*
- * Attempt to allocate memory for PFNs, and pin user pages in memory.
+ * Attempt to allocate memory for PFNs.
*
* Usage of pfn_array:
* We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
* this structure will be filled in by this function.
*
* Returns:
- * Number of pages pinned on success.
- * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially,
- * returns -EINVAL.
- * If no pages were pinned, returns -errno.
+ * 0 if PFNs are allocated
+ * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
+ * -ENOMEM if alloc failed
*/
-static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
- u64 iova, unsigned int len)
+static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
{
- int i, ret = 0;
-
- if (!len)
- return 0;
+ int i;
if (pa->pa_nr || pa->pa_iova_pfn)
return -EINVAL;
@@ -94,8 +77,27 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
- for (i = 1; i < pa->pa_nr; i++)
+ pa->pa_pfn[0] = -1ULL;
+ for (i = 1; i < pa->pa_nr; i++) {
pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
+ pa->pa_pfn[i] = -1ULL;
+ }
+
+ return 0;
+}
+
+/*
+ * pfn_array_pin() - Pin user pages in memory
+ * @pa: pfn_array on which to perform the operation
+ * @mdev: the mediated device to perform pin operations
+ *
+ * Returns number of pages pinned upon success.
+ * If the pin request partially succeeds, or fails completely,
+ * all pages are left unpinned and a negative error value is returned.
+ */
+static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
+{
+ int ret = 0;
ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
@@ -112,8 +114,6 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
err_out:
pa->pa_nr = 0;
- kfree(pa->pa_iova_pfn);
- pa->pa_iova_pfn = NULL;
return ret;
}
@@ -121,60 +121,30 @@ err_out:
/* Unpin the pages before releasing the memory. */
static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
{
- vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+ /* Only unpin if any pages were pinned to begin with */
+ if (pa->pa_nr)
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
pa->pa_nr = 0;
kfree(pa->pa_iova_pfn);
}
-static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
-{
- pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
- if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
- pat->pat_nr = 0;
- return -ENOMEM;
- }
-
- pat->pat_nr = nr;
-
- return 0;
-}
-
-static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
- struct device *mdev)
+static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova)
{
- int i;
-
- for (i = 0; i < pat->pat_nr; i++)
- pfn_array_unpin_free(pat->pat_pa + i, mdev);
-
- if (pat->pat_nr) {
- kfree(pat->pat_pa);
- pat->pat_pa = NULL;
- pat->pat_nr = 0;
- }
-}
-
-static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
- unsigned long iova)
-{
- struct pfn_array *pa = pat->pat_pa;
unsigned long iova_pfn = iova >> PAGE_SHIFT;
- int i, j;
+ int i;
- for (i = 0; i < pat->pat_nr; i++, pa++)
- for (j = 0; j < pa->pa_nr; j++)
- if (pa->pa_iova_pfn[j] == iova_pfn)
- return true;
+ for (i = 0; i < pa->pa_nr; i++)
+ if (pa->pa_iova_pfn[i] == iova_pfn)
+ return true;
return false;
}
-/* Create the list idal words for a pfn_array_table. */
-static inline void pfn_array_table_idal_create_words(
- struct pfn_array_table *pat,
+/* Create the list of IDAL words for a pfn_array. */
+static inline void pfn_array_idal_create_words(
+ struct pfn_array *pa,
unsigned long *idaws)
{
- struct pfn_array *pa;
- int i, j, k;
+ int i;
/*
* Idal words (execept the first one) rely on the memory being 4k
@@ -183,19 +153,36 @@ static inline void pfn_array_table_idal_create_words(
* there will be no problem here to simply use the phys to create an
* idaw.
*/
- k = 0;
- for (i = 0; i < pat->pat_nr; i++) {
- pa = pat->pat_pa + i;
- for (j = 0; j < pa->pa_nr; j++) {
- idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
- if (k == 0)
- idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
- k++;
+
+ for (i = 0; i < pa->pa_nr; i++)
+ idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT;
+
+ /* Adjust the first IDAW, since it may not start on a page boundary */
+ idaws[0] += pa->pa_iova & (PAGE_SIZE - 1);
+}
+
+static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
+{
+ struct ccw0 ccw0;
+ struct ccw1 *pccw1 = source;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ ccw0 = *(struct ccw0 *)pccw1;
+ if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
+ pccw1->cmd_code = CCW_CMD_TIC;
+ pccw1->flags = 0;
+ pccw1->count = 0;
+ } else {
+ pccw1->cmd_code = ccw0.cmd_code;
+ pccw1->flags = ccw0.flags;
+ pccw1->count = ccw0.count;
}
+ pccw1->cda = ccw0.cda;
+ pccw1++;
}
}
-
/*
* Within the domain (@mdev), copy @n bytes from a guest physical
* address (@iova) to a host physical address (@to).
@@ -209,9 +196,15 @@ static long copy_from_iova(struct device *mdev,
int i, ret;
unsigned long l, m;
- ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
- if (ret <= 0)
+ ret = pfn_array_alloc(&pa, iova, n);
+ if (ret < 0)
+ return ret;
+
+ ret = pfn_array_pin(&pa, mdev);
+ if (ret < 0) {
+ pfn_array_unpin_free(&pa, mdev);
return ret;
+ }
l = n;
for (i = 0; i < pa.pa_nr; i++) {
@@ -235,55 +228,60 @@ static long copy_from_iova(struct device *mdev,
return l;
}
-static long copy_ccw_from_iova(struct channel_program *cp,
- struct ccw1 *to, u64 iova,
- unsigned long len)
-{
- struct ccw0 ccw0;
- struct ccw1 *pccw1;
- int ret;
- int i;
-
- ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
- if (ret)
- return ret;
-
- if (!cp->orb.cmd.fmt) {
- pccw1 = to;
- for (i = 0; i < len; i++) {
- ccw0 = *(struct ccw0 *)pccw1;
- if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
- pccw1->cmd_code = CCW_CMD_TIC;
- pccw1->flags = 0;
- pccw1->count = 0;
- } else {
- pccw1->cmd_code = ccw0.cmd_code;
- pccw1->flags = ccw0.flags;
- pccw1->count = ccw0.count;
- }
- pccw1->cda = ccw0.cda;
- pccw1++;
- }
- }
-
- return ret;
-}
-
/*
* Helpers to operate ccwchain.
*/
-#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
+#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
+#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
+#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
-
+#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
/*
+ * ccw_does_data_transfer()
+ *
+ * Determine whether a CCW will move any data, such that the guest pages
+ * would need to be pinned before performing the I/O.
+ *
+ * Returns 1 if yes, 0 if no.
+ */
+static inline int ccw_does_data_transfer(struct ccw1 *ccw)
+{
+ /* If the count field is zero, then no data will be transferred */
+ if (ccw->count == 0)
+ return 0;
+
+ /* If the command is a NOP, then no data will be transferred */
+ if (ccw_is_noop(ccw))
+ return 0;
+
+ /* If the skip flag is off, then data will be transferred */
+ if (!ccw_is_skip(ccw))
+ return 1;
+
+ /*
+ * If the skip flag is on, it is only meaningful if the command
+ * code is a read, read backward, sense, or sense ID. In those
+ * cases, no data will be transferred.
+ */
+ if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
+ return 0;
+
+ if (ccw_is_sense(ccw))
+ return 0;
+
+ /* The skip flag is on, but it is ignored for this command code. */
+ return 1;
+}
+
+/*
* is_cpa_within_range()
*
* @cpa: channel program address being questioned
@@ -319,7 +317,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
/* Make ccw address aligned to 8. */
size = ((sizeof(*chain) + 7L) & -8L) +
sizeof(*chain->ch_ccw) * len +
- sizeof(*chain->ch_pat) * len;
+ sizeof(*chain->ch_pa) * len;
chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
if (!chain)
return NULL;
@@ -328,7 +326,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
chain->ch_ccw = (struct ccw1 *)data;
data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
- chain->ch_pat = (struct pfn_array_table *)data;
+ chain->ch_pa = (struct pfn_array *)data;
chain->ch_len = len;
@@ -348,31 +346,12 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
{
struct ccw1 *ccw = chain->ch_ccw + idx;
- if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
- return;
- if (!ccw->count)
+ if (ccw_is_tic(ccw))
return;
kfree((void *)(u64)ccw->cda);
}
-/* Unpin the pages then free the memory resources. */
-static void cp_unpin_free(struct channel_program *cp)
-{
- struct ccwchain *chain, *temp;
- int i;
-
- cp->initialized = false;
- list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
- for (i = 0; i < chain->ch_len; i++) {
- pfn_array_table_unpin_free(chain->ch_pat + i,
- cp->mdev);
- ccwchain_cda_free(chain, i);
- }
- ccwchain_free(chain);
- }
-}
-
/**
* ccwchain_calc_length - calculate the length of the ccw chain.
* @iova: guest physical address of the target ccw chain
@@ -388,25 +367,9 @@ static void cp_unpin_free(struct channel_program *cp)
*/
static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
{
- struct ccw1 *ccw, *p;
- int cnt;
-
- /*
- * Copy current chain from guest to host kernel.
- * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
- * So copying 2K is enough (safe).
- */
- p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
- if (!ccw)
- return -ENOMEM;
-
- cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
- if (cnt) {
- kfree(ccw);
- return cnt;
- }
+ struct ccw1 *ccw = cp->guest_cp;
+ int cnt = 0;
- cnt = 0;
do {
cnt++;
@@ -415,10 +378,8 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
* orb specified one of the unsupported formats, we defer
* checking for IDAWs in unsupported formats to here.
*/
- if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
- kfree(p);
+ if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
return -EOPNOTSUPP;
- }
/*
* We want to keep counting if the current CCW has the
@@ -437,7 +398,6 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
if (cnt == CCWCHAIN_LEN_MAX + 1)
cnt = -EINVAL;
- kfree(p);
return cnt;
}
@@ -458,17 +418,23 @@ static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
static int ccwchain_loop_tic(struct ccwchain *chain,
struct channel_program *cp);
-static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
+static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
{
struct ccwchain *chain;
- int len, ret;
+ int len;
- /* May transfer to an existing chain. */
- if (tic_target_chain_exists(tic, cp))
- return 0;
+ /* Copy 2K (the most we support today) of possible CCWs */
+ len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
+ CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
+ if (len)
+ return len;
- /* Get chain length. */
- len = ccwchain_calc_length(tic->cda, cp);
+ /* Convert any Format-0 CCWs to Format-1 */
+ if (!cp->orb.cmd.fmt)
+ convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
+
+ /* Count the CCWs in the current chain */
+ len = ccwchain_calc_length(cda, cp);
if (len < 0)
return len;
@@ -476,14 +442,10 @@ static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
chain = ccwchain_alloc(cp, len);
if (!chain)
return -ENOMEM;
- chain->ch_iova = tic->cda;
+ chain->ch_iova = cda;
- /* Copy the new chain from user. */
- ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
- if (ret) {
- ccwchain_free(chain);
- return ret;
- }
+ /* Copy the actual CCWs into the new chain */
+ memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
/* Loop for tics on this new chain. */
return ccwchain_loop_tic(chain, cp);
@@ -501,7 +463,12 @@ static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
if (!ccw_is_tic(tic))
continue;
- ret = ccwchain_handle_tic(tic, cp);
+ /* May transfer to an existing chain. */
+ if (tic_target_chain_exists(tic, cp))
+ continue;
+
+ /* Build a ccwchain for the next segment */
+ ret = ccwchain_handle_ccw(tic->cda, cp);
if (ret)
return ret;
}
@@ -534,115 +501,90 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
struct channel_program *cp)
{
struct ccw1 *ccw;
- struct pfn_array_table *pat;
+ struct pfn_array *pa;
+ u64 iova;
unsigned long *idaws;
int ret;
+ int bytes = 1;
+ int idaw_nr, idal_len;
+ int i;
ccw = chain->ch_ccw + idx;
- if (!ccw->count) {
- /*
- * We just want the translation result of any direct ccw
- * to be an IDA ccw, so let's add the IDA flag for it.
- * Although the flag will be ignored by firmware.
- */
- ccw->flags |= CCW_FLAG_IDA;
- return 0;
- }
-
- /*
- * Pin data page(s) in memory.
- * The number of pages actually is the count of the idaws which will be
- * needed when translating a direct ccw to a idal ccw.
- */
- pat = chain->ch_pat + idx;
- ret = pfn_array_table_init(pat, 1);
- if (ret)
- goto out_init;
-
- ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
- if (ret < 0)
- goto out_unpin;
+ if (ccw->count)
+ bytes = ccw->count;
- /* Translate this direct ccw to a idal ccw. */
- idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
- if (!idaws) {
- ret = -ENOMEM;
- goto out_unpin;
+ /* Calculate size of IDAL */
+ if (ccw_is_idal(ccw)) {
+ /* Read first IDAW to see if it's 4K-aligned or not. */
+ /* All subsequent IDAws will be 4K-aligned. */
+ ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova));
+ if (ret)
+ return ret;
+ } else {
+ iova = ccw->cda;
}
- ccw->cda = (__u32) virt_to_phys(idaws);
- ccw->flags |= CCW_FLAG_IDA;
-
- pfn_array_table_idal_create_words(pat, idaws);
-
- return 0;
-
-out_unpin:
- pfn_array_table_unpin_free(pat, cp->mdev);
-out_init:
- ccw->cda = 0;
- return ret;
-}
-
-static int ccwchain_fetch_idal(struct ccwchain *chain,
- int idx,
- struct channel_program *cp)
-{
- struct ccw1 *ccw;
- struct pfn_array_table *pat;
- unsigned long *idaws;
- u64 idaw_iova;
- unsigned int idaw_nr, idaw_len;
- int i, ret;
-
- ccw = chain->ch_ccw + idx;
-
- if (!ccw->count)
- return 0;
-
- /* Calculate size of idaws. */
- ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
- if (ret)
- return ret;
- idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
- idaw_len = idaw_nr * sizeof(*idaws);
-
- /* Pin data page(s) in memory. */
- pat = chain->ch_pat + idx;
- ret = pfn_array_table_init(pat, idaw_nr);
- if (ret)
- goto out_init;
+ idaw_nr = idal_nr_words((void *)iova, bytes);
+ idal_len = idaw_nr * sizeof(*idaws);
- /* Translate idal ccw to use new allocated idaws. */
- idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
+ /* Allocate an IDAL from host storage */
+ idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
if (!idaws) {
ret = -ENOMEM;
- goto out_unpin;
+ goto out_init;
}
- ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
- if (ret)
+ /*
+ * Allocate an array of pfn's for pages to pin/translate.
+ * The number of pages is actually the count of the idaws
+ * required for the data transfer, since we only only support
+ * 4K IDAWs today.
+ */
+ pa = chain->ch_pa + idx;
+ ret = pfn_array_alloc(pa, iova, bytes);
+ if (ret < 0)
goto out_free_idaws;
- ccw->cda = virt_to_phys(idaws);
+ if (ccw_is_idal(ccw)) {
+ /* Copy guest IDAL into host IDAL */
+ ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len);
+ if (ret)
+ goto out_unpin;
- for (i = 0; i < idaw_nr; i++) {
- idaw_iova = *(idaws + i);
+ /*
+ * Copy guest IDAWs into pfn_array, in case the memory they
+ * occupy is not contiguous.
+ */
+ for (i = 0; i < idaw_nr; i++)
+ pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT;
+ } else {
+ /*
+ * No action is required here; the iova addresses in pfn_array
+ * were initialized sequentially in pfn_array_alloc() beginning
+ * with the contents of ccw->cda.
+ */
+ }
- ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
- idaw_iova, 1);
+ if (ccw_does_data_transfer(ccw)) {
+ ret = pfn_array_pin(pa, cp->mdev);
if (ret < 0)
- goto out_free_idaws;
+ goto out_unpin;
+ } else {
+ pa->pa_nr = 0;
}
- pfn_array_table_idal_create_words(pat, idaws);
+ ccw->cda = (__u32) virt_to_phys(idaws);
+ ccw->flags |= CCW_FLAG_IDA;
+
+ /* Populate the IDAL with pinned/translated addresses from pfn */
+ pfn_array_idal_create_words(pa, idaws);
return 0;
+out_unpin:
+ pfn_array_unpin_free(pa, cp->mdev);
out_free_idaws:
kfree(idaws);
-out_unpin:
- pfn_array_table_unpin_free(pat, cp->mdev);
out_init:
ccw->cda = 0;
return ret;
@@ -660,15 +602,9 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
{
struct ccw1 *ccw = chain->ch_ccw + idx;
- if (ccw_is_test(ccw) || ccw_is_noop(ccw))
- return 0;
-
if (ccw_is_tic(ccw))
return ccwchain_fetch_tic(chain, idx, cp);
- if (ccw_is_idal(ccw))
- return ccwchain_fetch_idal(chain, idx, cp);
-
return ccwchain_fetch_direct(chain, idx, cp);
}
@@ -691,9 +627,7 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
*/
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
{
- u64 iova = orb->cmd.cpa;
- struct ccwchain *chain;
- int len, ret;
+ int ret;
/*
* XXX:
@@ -706,28 +640,11 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
memcpy(&cp->orb, orb, sizeof(*orb));
cp->mdev = mdev;
- /* Get chain length. */
- len = ccwchain_calc_length(iova, cp);
- if (len < 0)
- return len;
-
- /* Alloc mem for the head chain. */
- chain = ccwchain_alloc(cp, len);
- if (!chain)
- return -ENOMEM;
- chain->ch_iova = iova;
-
- /* Copy the head chain from guest. */
- ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
- if (ret) {
- ccwchain_free(chain);
- return ret;
- }
-
- /* Now loop for its TICs. */
- ret = ccwchain_loop_tic(chain, cp);
+ /* Build a ccwchain for the first CCW segment */
+ ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
if (ret)
- cp_unpin_free(cp);
+ cp_free(cp);
+
/* It is safe to force: if not set but idals used
* ccwchain_calc_length returns an error.
*/
@@ -750,8 +667,20 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
*/
void cp_free(struct channel_program *cp)
{
- if (cp->initialized)
- cp_unpin_free(cp);
+ struct ccwchain *chain, *temp;
+ int i;
+
+ if (!cp->initialized)
+ return;
+
+ cp->initialized = false;
+ list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++) {
+ pfn_array_unpin_free(chain->ch_pa + i, cp->mdev);
+ ccwchain_cda_free(chain, i);
+ }
+ ccwchain_free(chain);
+ }
}
/**
@@ -886,7 +815,11 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
ccw_head = (u32)(u64)chain->ch_ccw;
- if (is_cpa_within_range(cpa, ccw_head, chain->ch_len)) {
+ /*
+ * On successful execution, cpa points just beyond the end
+ * of the chain.
+ */
+ if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
/*
* (cpa - ccw_head) is the offset value of the host
* physical ccw to its chain head.
@@ -919,8 +852,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova)
list_for_each_entry(chain, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++)
- if (pfn_array_table_iova_pinned(chain->ch_pat + i,
- iova))
+ if (pfn_array_iova_pinned(chain->ch_pa + i, iova))
return true;
}
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
index 3c20cd208da5..7cdc38049033 100644
--- a/drivers/s390/cio/vfio_ccw_cp.h
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -16,6 +16,12 @@
#include "orb.h"
+/*
+ * Max length for ccw chain.
+ * XXX: Limit to 256, need to check more?
+ */
+#define CCWCHAIN_LEN_MAX 256
+
/**
* struct channel_program - manage information for channel program
* @ccwchain_list: list head of ccwchains
@@ -32,6 +38,7 @@ struct channel_program {
union orb orb;
struct device *mdev;
bool initialized;
+ struct ccw1 *guest_cp;
};
extern int cp_init(struct channel_program *cp, struct device *mdev,
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 9125f7f4e64c..2b90a5ecaeb9 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -95,11 +95,11 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
mutex_unlock(&private->io_mutex);
- if (private->io_trigger)
- eventfd_signal(private->io_trigger, 1);
-
if (private->mdev && is_final)
private->state = VFIO_CCW_STATE_IDLE;
+
+ if (private->io_trigger)
+ eventfd_signal(private->io_trigger, 1);
}
/*
@@ -129,6 +129,11 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
if (!private)
return -ENOMEM;
+ private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
+ GFP_KERNEL);
+ if (!private->cp.guest_cp)
+ goto out_free;
+
private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
GFP_KERNEL | GFP_DMA);
if (!private->io_region)
@@ -169,6 +174,7 @@ out_free:
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
if (private->io_region)
kmem_cache_free(vfio_ccw_io_region, private->io_region);
+ kfree(private->cp.guest_cp);
kfree(private);
return ret;
}
@@ -185,6 +191,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
kmem_cache_free(vfio_ccw_io_region, private->io_region);
+ kfree(private->cp.guest_cp);
kfree(private);
return 0;
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 45eb0c14b880..7f418d2d8cdf 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -690,7 +690,7 @@ int pkey_clr2protkey(u32 keytype,
*/
if (!cpacf_test_func(&pckmo_functions, fc)) {
DEBUG_ERR("%s pckmo functions not available\n", __func__);
- return -EOPNOTSUPP;
+ return -ENODEV;
}
/* prepare param block */
@@ -1695,15 +1695,15 @@ static int __init pkey_init(void)
* are able to work with protected keys.
*/
if (!cpacf_query(CPACF_PCKMO, &pckmo_functions))
- return -EOPNOTSUPP;
+ return -ENODEV;
/* check for kmc instructions available */
if (!cpacf_query(CPACF_KMC, &kmc_functions))
- return -EOPNOTSUPP;
+ return -ENODEV;
if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256))
- return -EOPNOTSUPP;
+ return -ENODEV;
pkey_debug_init();
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index e9824c35c34f..003662aa8060 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -5,6 +5,7 @@
* Copyright IBM Corp. 2018
*
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ * Pierre Morel <pmorel@linux.ibm.com>
*/
#include <linux/module.h>
@@ -40,14 +41,45 @@ static struct ap_device_id ap_queue_ids[] = {
MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
+/**
+ * vfio_ap_queue_dev_probe:
+ *
+ * Allocate a vfio_ap_queue structure and associate it
+ * with the device as driver_data.
+ */
static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
{
+ struct vfio_ap_queue *q;
+
+ q = kzalloc(sizeof(*q), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+ dev_set_drvdata(&apdev->device, q);
+ q->apqn = to_ap_queue(&apdev->device)->qid;
+ q->saved_isc = VFIO_AP_ISC_INVALID;
return 0;
}
+/**
+ * vfio_ap_queue_dev_remove:
+ *
+ * Takes the matrix lock to avoid actions on this device while removing
+ * Free the associated vfio_ap_queue structure
+ */
static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
{
- /* Nothing to do yet */
+ struct vfio_ap_queue *q;
+ int apid, apqi;
+
+ mutex_lock(&matrix_dev->lock);
+ q = dev_get_drvdata(&apdev->device);
+ dev_set_drvdata(&apdev->device, NULL);
+ apid = AP_QID_CARD(q->apqn);
+ apqi = AP_QID_QUEUE(q->apqn);
+ vfio_ap_mdev_reset_queue(apid, apqi, 1);
+ vfio_ap_irq_disable(q);
+ kfree(q);
+ mutex_unlock(&matrix_dev->lock);
}
static void vfio_ap_matrix_dev_release(struct device *dev)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 900b9cf20ca5..2c9fb1423a39 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -24,6 +24,296 @@
#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
+static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
+
+static int match_apqn(struct device *dev, void *data)
+{
+ struct vfio_ap_queue *q = dev_get_drvdata(dev);
+
+ return (q->apqn == *(int *)(data)) ? 1 : 0;
+}
+
+/**
+ * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
+ * @matrix_mdev: the associated mediated matrix
+ * @apqn: The queue APQN
+ *
+ * Retrieve a queue with a specific APQN from the list of the
+ * devices of the vfio_ap_drv.
+ * Verify that the APID and the APQI are set in the matrix.
+ *
+ * Returns the pointer to the associated vfio_ap_queue
+ */
+static struct vfio_ap_queue *vfio_ap_get_queue(
+ struct ap_matrix_mdev *matrix_mdev,
+ int apqn)
+{
+ struct vfio_ap_queue *q;
+ struct device *dev;
+
+ if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
+ return NULL;
+ if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
+ return NULL;
+
+ dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
+ &apqn, match_apqn);
+ if (!dev)
+ return NULL;
+ q = dev_get_drvdata(dev);
+ q->matrix_mdev = matrix_mdev;
+ put_device(dev);
+
+ return q;
+}
+
+/**
+ * vfio_ap_wait_for_irqclear
+ * @apqn: The AP Queue number
+ *
+ * Checks the IRQ bit for the status of this APQN using ap_tapq.
+ * Returns if the ap_tapq function succeeded and the bit is clear.
+ * Returns if ap_tapq function failed with invalid, deconfigured or
+ * checkstopped AP.
+ * Otherwise retries up to 5 times after waiting 20ms.
+ *
+ */
+static void vfio_ap_wait_for_irqclear(int apqn)
+{
+ struct ap_queue_status status;
+ int retry = 5;
+
+ do {
+ status = ap_tapq(apqn, NULL);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ if (!status.irq_enabled)
+ return;
+ /* Fall through */
+ case AP_RESPONSE_BUSY:
+ msleep(20);
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ default:
+ WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
+ status.response_code, apqn);
+ return;
+ }
+ } while (--retry);
+
+ WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
+ __func__, status.response_code, apqn);
+}
+
+/**
+ * vfio_ap_free_aqic_resources
+ * @q: The vfio_ap_queue
+ *
+ * Unregisters the ISC in the GIB when the saved ISC not invalid.
+ * Unpin the guest's page holding the NIB when it exist.
+ * Reset the saved_pfn and saved_isc to invalid values.
+ * Clear the pointer to the matrix mediated device.
+ *
+ */
+static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
+{
+ if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev)
+ kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
+ if (q->saved_pfn && q->matrix_mdev)
+ vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
+ &q->saved_pfn, 1);
+ q->saved_pfn = 0;
+ q->saved_isc = VFIO_AP_ISC_INVALID;
+ q->matrix_mdev = NULL;
+}
+
+/**
+ * vfio_ap_irq_disable
+ * @q: The vfio_ap_queue
+ *
+ * Uses ap_aqic to disable the interruption and in case of success, reset
+ * in progress or IRQ disable command already proceeded: calls
+ * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
+ * and calls vfio_ap_free_aqic_resources() to free the resources associated
+ * with the AP interrupt handling.
+ *
+ * In the case the AP is busy, or a reset is in progress,
+ * retries after 20ms, up to 5 times.
+ *
+ * Returns if ap_aqic function failed with invalid, deconfigured or
+ * checkstopped AP.
+ */
+struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
+{
+ struct ap_qirq_ctrl aqic_gisa = {};
+ struct ap_queue_status status;
+ int retries = 5;
+
+ do {
+ status = ap_aqic(q->apqn, aqic_gisa, NULL);
+ switch (status.response_code) {
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ case AP_RESPONSE_NORMAL:
+ vfio_ap_wait_for_irqclear(q->apqn);
+ goto end_free;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ msleep(20);
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ default:
+ /* All cases in default means AP not operational */
+ WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
+ status.response_code);
+ goto end_free;
+ }
+ } while (retries--);
+
+ WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
+ status.response_code);
+end_free:
+ vfio_ap_free_aqic_resources(q);
+ return status;
+}
+
+/**
+ * vfio_ap_setirq: Enable Interruption for a APQN
+ *
+ * @dev: the device associated with the ap_queue
+ * @q: the vfio_ap_queue holding AQIC parameters
+ *
+ * Pin the NIB saved in *q
+ * Register the guest ISC to GIB interface and retrieve the
+ * host ISC to issue the host side PQAP/AQIC
+ *
+ * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
+ * vfio_pin_pages failed.
+ *
+ * Otherwise return the ap_queue_status returned by the ap_aqic(),
+ * all retry handling will be done by the guest.
+ */
+static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
+ int isc,
+ unsigned long nib)
+{
+ struct ap_qirq_ctrl aqic_gisa = {};
+ struct ap_queue_status status = {};
+ struct kvm_s390_gisa *gisa;
+ struct kvm *kvm;
+ unsigned long h_nib, g_pfn, h_pfn;
+ int ret;
+
+ g_pfn = nib >> PAGE_SHIFT;
+ ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
+ IOMMU_READ | IOMMU_WRITE, &h_pfn);
+ switch (ret) {
+ case 1:
+ break;
+ default:
+ status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+ return status;
+ }
+
+ kvm = q->matrix_mdev->kvm;
+ gisa = kvm->arch.gisa_int.origin;
+
+ h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
+ aqic_gisa.gisc = isc;
+ aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
+ aqic_gisa.ir = 1;
+ aqic_gisa.gisa = (uint64_t)gisa >> 4;
+
+ status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ /* See if we did clear older IRQ configuration */
+ vfio_ap_free_aqic_resources(q);
+ q->saved_pfn = g_pfn;
+ q->saved_isc = isc;
+ break;
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ /* We could not modify IRQ setings: clear new configuration */
+ vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
+ kvm_s390_gisc_unregister(kvm, isc);
+ break;
+ default:
+ pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
+ status.response_code);
+ vfio_ap_irq_disable(q);
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * handle_pqap: PQAP instruction callback
+ *
+ * @vcpu: The vcpu on which we received the PQAP instruction
+ *
+ * Get the general register contents to initialize internal variables.
+ * REG[0]: APQN
+ * REG[1]: IR and ISC
+ * REG[2]: NIB
+ *
+ * Response.status may be set to following Response Code:
+ * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
+ * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
+ * - AP_RESPONSE_NORMAL (0) : in case of successs
+ * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
+ * We take the matrix_dev lock to ensure serialization on queues and
+ * mediated device access.
+ *
+ * Return 0 if we could handle the request inside KVM.
+ * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
+ */
+static int handle_pqap(struct kvm_vcpu *vcpu)
+{
+ uint64_t status;
+ uint16_t apqn;
+ struct vfio_ap_queue *q;
+ struct ap_queue_status qstatus = {
+ .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
+ struct ap_matrix_mdev *matrix_mdev;
+
+ /* If we do not use the AIV facility just go to userland */
+ if (!(vcpu->arch.sie_block->eca & ECA_AIV))
+ return -EOPNOTSUPP;
+
+ apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
+ mutex_lock(&matrix_dev->lock);
+
+ if (!vcpu->kvm->arch.crypto.pqap_hook)
+ goto out_unlock;
+ matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
+ struct ap_matrix_mdev, pqap_hook);
+
+ q = vfio_ap_get_queue(matrix_mdev, apqn);
+ if (!q)
+ goto out_unlock;
+
+ status = vcpu->run->s.regs.gprs[1];
+
+ /* If IR bit(16) is set we enable the interrupt */
+ if ((status >> (63 - 16)) & 0x01)
+ qstatus = vfio_ap_irq_enable(q, status & 0x07,
+ vcpu->run->s.regs.gprs[2]);
+ else
+ qstatus = vfio_ap_irq_disable(q);
+
+out_unlock:
+ memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
+ vcpu->run->s.regs.gprs[1] >>= 32;
+ mutex_unlock(&matrix_dev->lock);
+ return 0;
+}
+
static void vfio_ap_matrix_init(struct ap_config_info *info,
struct ap_matrix *matrix)
{
@@ -45,8 +335,11 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
return -ENOMEM;
}
+ matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
mdev_set_drvdata(mdev, matrix_mdev);
+ matrix_mdev->pqap_hook.hook = handle_pqap;
+ matrix_mdev->pqap_hook.owner = THIS_MODULE;
mutex_lock(&matrix_dev->lock);
list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock);
@@ -62,6 +355,7 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
return -EBUSY;
mutex_lock(&matrix_dev->lock);
+ vfio_ap_mdev_reset_queues(mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->lock);
@@ -754,11 +1048,42 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
}
matrix_mdev->kvm = kvm;
+ kvm_get_kvm(kvm);
+ kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
mutex_unlock(&matrix_dev->lock);
return 0;
}
+/*
+ * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
+ *
+ * @nb: The notifier block
+ * @action: Action to be taken
+ * @data: data associated with the request
+ *
+ * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
+ * pinned before). Other requests are ignored.
+ *
+ */
+static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
+
+ if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+ unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
+
+ vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -790,15 +1115,36 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
- unsigned int retry)
+static void vfio_ap_irq_disable_apqn(int apqn)
+{
+ struct device *dev;
+ struct vfio_ap_queue *q;
+
+ dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
+ &apqn, match_apqn);
+ if (dev) {
+ q = dev_get_drvdata(dev);
+ vfio_ap_irq_disable(q);
+ put_device(dev);
+ }
+}
+
+int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+ unsigned int retry)
{
struct ap_queue_status status;
+ int retry2 = 2;
+ int apqn = AP_MKQID(apid, apqi);
do {
- status = ap_zapq(AP_MKQID(apid, apqi));
+ status = ap_zapq(apqn);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
+ while (!status.queue_empty && retry2--) {
+ msleep(20);
+ status = ap_tapq(apqn, NULL);
+ }
+ WARN_ON_ONCE(retry <= 0);
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
@@ -832,6 +1178,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
*/
if (ret)
rc = ret;
+ vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi));
}
}
@@ -858,20 +1205,37 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
return ret;
}
- return 0;
+ matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
+ events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+ ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &events, &matrix_mdev->iommu_notifier);
+ if (!ret)
+ return ret;
+
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+ &matrix_mdev->group_notifier);
+ module_put(THIS_MODULE);
+ return ret;
}
static void vfio_ap_mdev_release(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
- if (matrix_mdev->kvm)
+ mutex_lock(&matrix_dev->lock);
+ if (matrix_mdev->kvm) {
kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+ matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
+ vfio_ap_mdev_reset_queues(mdev);
+ kvm_put_kvm(matrix_mdev->kvm);
+ matrix_mdev->kvm = NULL;
+ }
+ mutex_unlock(&matrix_dev->lock);
- vfio_ap_mdev_reset_queues(mdev);
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &matrix_mdev->iommu_notifier);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
- matrix_mdev->kvm = NULL;
module_put(THIS_MODULE);
}
@@ -900,6 +1264,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
{
int ret;
+ mutex_lock(&matrix_dev->lock);
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
ret = vfio_ap_mdev_get_device_info(arg);
@@ -911,6 +1276,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
ret = -EOPNOTSUPP;
break;
}
+ mutex_unlock(&matrix_dev->lock);
return ret;
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 76b7f98e47e9..f46dde56b464 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -4,6 +4,7 @@
*
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
* Halil Pasic <pasic@linux.ibm.com>
+ * Pierre Morel <pmorel@linux.ibm.com>
*
* Copyright IBM Corp. 2018
*/
@@ -16,6 +17,7 @@
#include <linux/mdev.h>
#include <linux/delay.h>
#include <linux/mutex.h>
+#include <linux/kvm_host.h>
#include "ap_bus.h"
@@ -80,10 +82,23 @@ struct ap_matrix_mdev {
struct list_head node;
struct ap_matrix matrix;
struct notifier_block group_notifier;
+ struct notifier_block iommu_notifier;
struct kvm *kvm;
+ struct kvm_s390_module_hook pqap_hook;
+ struct mdev_device *mdev;
};
extern int vfio_ap_mdev_register(void);
extern void vfio_ap_mdev_unregister(void);
+int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+ unsigned int retry);
+struct vfio_ap_queue {
+ struct ap_matrix_mdev *matrix_mdev;
+ unsigned long saved_pfn;
+ int apqn;
+#define VFIO_AP_ISC_INVALID 0xff
+ unsigned char saved_isc;
+};
+struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q);
#endif /* _VFIO_AP_PRIVATE_H_ */
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 0cbcc238ef98..12fe9deb265e 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -567,6 +567,10 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg,
payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
*fcode = payload_hdr->func_val & 0xFFFF;
+ /* enable special processing based on the cprbs flags special bit */
+ if (msg->cprbx.flags & 0x20)
+ ap_msg->special = 1;
+
return 0;
}
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index 7c5a25ddf832..ced896d1534a 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -7,10 +7,10 @@ config LCS
prompt "Lan Channel Station Interface"
depends on CCW && NETDEVICES && (ETHERNET || FDDI)
help
- Select this option if you want to use LCS networking on IBM System z.
- This device driver supports FDDI (IEEE 802.7) and Ethernet.
- To compile as a module, choose M. The module name is lcs.
- If you do not know what it is, it's safe to choose Y.
+ Select this option if you want to use LCS networking on IBM System z.
+ This device driver supports FDDI (IEEE 802.7) and Ethernet.
+ To compile as a module, choose M. The module name is lcs.
+ If you do not know what it is, it's safe to choose Y.
config CTCM
def_tristate m
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 6a3076881321..1a55e5942d36 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -46,9 +46,15 @@ struct vq_config_block {
#define VIRTIO_CCW_CONFIG_SIZE 0x100
/* same as PCI config space size, should be enough for all drivers */
+struct vcdev_dma_area {
+ unsigned long indicators;
+ unsigned long indicators2;
+ struct vq_config_block config_block;
+ __u8 status;
+};
+
struct virtio_ccw_device {
struct virtio_device vdev;
- __u8 *status;
__u8 config[VIRTIO_CCW_CONFIG_SIZE];
struct ccw_device *cdev;
__u32 curr_io;
@@ -58,17 +64,24 @@ struct virtio_ccw_device {
spinlock_t lock;
struct mutex io_lock; /* Serializes I/O requests */
struct list_head virtqueues;
- unsigned long indicators;
- unsigned long indicators2;
- struct vq_config_block *config_block;
bool is_thinint;
bool going_away;
bool device_lost;
unsigned int config_ready;
void *airq_info;
- u64 dma_mask;
+ struct vcdev_dma_area *dma_area;
};
+static inline unsigned long *indicators(struct virtio_ccw_device *vcdev)
+{
+ return &vcdev->dma_area->indicators;
+}
+
+static inline unsigned long *indicators2(struct virtio_ccw_device *vcdev)
+{
+ return &vcdev->dma_area->indicators2;
+}
+
struct vq_info_block_legacy {
__u64 queue;
__u32 align;
@@ -127,11 +140,17 @@ static int virtio_ccw_use_airq = 1;
struct airq_info {
rwlock_t lock;
- u8 summary_indicator;
+ u8 summary_indicator_idx;
struct airq_struct airq;
struct airq_iv *aiv;
};
static struct airq_info *airq_areas[MAX_AIRQ_AREAS];
+static u8 *summary_indicators;
+
+static inline u8 *get_summary_indicator(struct airq_info *info)
+{
+ return summary_indicators + info->summary_indicator_idx;
+}
#define CCW_CMD_SET_VQ 0x13
#define CCW_CMD_VDEV_RESET 0x33
@@ -196,7 +215,7 @@ static void virtio_airq_handler(struct airq_struct *airq, bool floating)
break;
vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai));
}
- info->summary_indicator = 0;
+ *(get_summary_indicator(info)) = 0;
smp_wmb();
/* Walk through indicators field, summary indicator not active. */
for (ai = 0;;) {
@@ -208,7 +227,7 @@ static void virtio_airq_handler(struct airq_struct *airq, bool floating)
read_unlock(&info->lock);
}
-static struct airq_info *new_airq_info(void)
+static struct airq_info *new_airq_info(int index)
{
struct airq_info *info;
int rc;
@@ -217,13 +236,15 @@ static struct airq_info *new_airq_info(void)
if (!info)
return NULL;
rwlock_init(&info->lock);
- info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR);
+ info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR
+ | AIRQ_IV_CACHELINE);
if (!info->aiv) {
kfree(info);
return NULL;
}
info->airq.handler = virtio_airq_handler;
- info->airq.lsi_ptr = &info->summary_indicator;
+ info->summary_indicator_idx = index;
+ info->airq.lsi_ptr = get_summary_indicator(info);
info->airq.lsi_mask = 0xff;
info->airq.isc = VIRTIO_AIRQ_ISC;
rc = register_adapter_interrupt(&info->airq);
@@ -245,7 +266,7 @@ static unsigned long get_airq_indicator(struct virtqueue *vqs[], int nvqs,
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
if (!airq_areas[i])
- airq_areas[i] = new_airq_info();
+ airq_areas[i] = new_airq_info(i);
info = airq_areas[i];
if (!info)
return 0;
@@ -326,29 +347,29 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
struct airq_info *airq_info = vcdev->airq_info;
if (vcdev->is_thinint) {
- thinint_area = kzalloc(sizeof(*thinint_area),
- GFP_DMA | GFP_KERNEL);
+ thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*thinint_area));
if (!thinint_area)
return;
thinint_area->summary_indicator =
- (unsigned long) &airq_info->summary_indicator;
+ (unsigned long) get_summary_indicator(airq_info);
thinint_area->isc = VIRTIO_AIRQ_ISC;
ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
ccw->count = sizeof(*thinint_area);
ccw->cda = (__u32)(unsigned long) thinint_area;
} else {
/* payload is the address of the indicators */
- indicatorp = kmalloc(sizeof(&vcdev->indicators),
- GFP_DMA | GFP_KERNEL);
+ indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(indicators(vcdev)));
if (!indicatorp)
return;
*indicatorp = 0;
ccw->cmd_code = CCW_CMD_SET_IND;
- ccw->count = sizeof(&vcdev->indicators);
+ ccw->count = sizeof(indicators(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
}
/* Deregister indicators from host. */
- vcdev->indicators = 0;
+ *indicators(vcdev) = 0;
ccw->flags = 0;
ret = ccw_io_helper(vcdev, ccw,
vcdev->is_thinint ?
@@ -359,8 +380,8 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
"Failed to deregister indicators (%d)\n", ret);
else if (vcdev->is_thinint)
virtio_ccw_drop_indicators(vcdev);
- kfree(indicatorp);
- kfree(thinint_area);
+ ccw_device_dma_free(vcdev->cdev, indicatorp, sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
}
static inline long __do_kvm_notify(struct subchannel_id schid,
@@ -407,15 +428,15 @@ static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev,
{
int ret;
- vcdev->config_block->index = index;
+ vcdev->dma_area->config_block.index = index;
ccw->cmd_code = CCW_CMD_READ_VQ_CONF;
ccw->flags = 0;
ccw->count = sizeof(struct vq_config_block);
- ccw->cda = (__u32)(unsigned long)(vcdev->config_block);
+ ccw->cda = (__u32)(unsigned long)(&vcdev->dma_area->config_block);
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF);
if (ret)
return ret;
- return vcdev->config_block->num ?: -ENOENT;
+ return vcdev->dma_area->config_block.num ?: -ENOENT;
}
static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
@@ -460,7 +481,8 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
ret, index);
vring_del_virtqueue(vq);
- kfree(info->info_block);
+ ccw_device_dma_free(vcdev->cdev, info->info_block,
+ sizeof(*info->info_block));
kfree(info);
}
@@ -470,7 +492,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
struct ccw1 *ccw;
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
@@ -479,7 +501,7 @@ static void virtio_ccw_del_vqs(struct virtio_device *vdev)
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
virtio_ccw_del_vq(vq, ccw);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
@@ -502,8 +524,8 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
err = -ENOMEM;
goto out_err;
}
- info->info_block = kzalloc(sizeof(*info->info_block),
- GFP_DMA | GFP_KERNEL);
+ info->info_block = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*info->info_block));
if (!info->info_block) {
dev_warn(&vcdev->cdev->dev, "no info block\n");
err = -ENOMEM;
@@ -567,7 +589,8 @@ out_err:
if (vq)
vring_del_virtqueue(vq);
if (info) {
- kfree(info->info_block);
+ ccw_device_dma_free(vcdev->cdev, info->info_block,
+ sizeof(*info->info_block));
}
kfree(info);
return ERR_PTR(err);
@@ -581,7 +604,8 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
struct virtio_thinint_area *thinint_area = NULL;
struct airq_info *info;
- thinint_area = kzalloc(sizeof(*thinint_area), GFP_DMA | GFP_KERNEL);
+ thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*thinint_area));
if (!thinint_area) {
ret = -ENOMEM;
goto out;
@@ -596,7 +620,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
}
info = vcdev->airq_info;
thinint_area->summary_indicator =
- (unsigned long) &info->summary_indicator;
+ (unsigned long) get_summary_indicator(info);
thinint_area->isc = VIRTIO_AIRQ_ISC;
ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
ccw->flags = CCW_FLAG_SLI;
@@ -617,7 +641,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
virtio_ccw_drop_indicators(vcdev);
}
out:
- kfree(thinint_area);
+ ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
return ret;
}
@@ -633,7 +657,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
int ret, i, queue_idx = 0;
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
@@ -657,10 +681,11 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
* We need a data area under 2G to communicate. Our payload is
* the address of the indicators.
*/
- indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL);
+ indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(indicators(vcdev)));
if (!indicatorp)
goto out;
- *indicatorp = (unsigned long) &vcdev->indicators;
+ *indicatorp = (unsigned long) indicators(vcdev);
if (vcdev->is_thinint) {
ret = virtio_ccw_register_adapter_ind(vcdev, vqs, nvqs, ccw);
if (ret)
@@ -669,32 +694,36 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
}
if (!vcdev->is_thinint) {
/* Register queue indicators with host. */
- vcdev->indicators = 0;
+ *indicators(vcdev) = 0;
ccw->cmd_code = CCW_CMD_SET_IND;
ccw->flags = 0;
- ccw->count = sizeof(&vcdev->indicators);
+ ccw->count = sizeof(indicators(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND);
if (ret)
goto out;
}
/* Register indicators2 with host for config changes */
- *indicatorp = (unsigned long) &vcdev->indicators2;
- vcdev->indicators2 = 0;
+ *indicatorp = (unsigned long) indicators2(vcdev);
+ *indicators2(vcdev) = 0;
ccw->cmd_code = CCW_CMD_SET_CONF_IND;
ccw->flags = 0;
- ccw->count = sizeof(&vcdev->indicators2);
+ ccw->count = sizeof(indicators2(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND);
if (ret)
goto out;
- kfree(indicatorp);
- kfree(ccw);
+ if (indicatorp)
+ ccw_device_dma_free(vcdev->cdev, indicatorp,
+ sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return 0;
out:
- kfree(indicatorp);
- kfree(ccw);
+ if (indicatorp)
+ ccw_device_dma_free(vcdev->cdev, indicatorp,
+ sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
virtio_ccw_del_vqs(vdev);
return ret;
}
@@ -704,12 +733,12 @@ static void virtio_ccw_reset(struct virtio_device *vdev)
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
/* Zero status bits. */
- *vcdev->status = 0;
+ vcdev->dma_area->status = 0;
/* Send a reset ccw on device. */
ccw->cmd_code = CCW_CMD_VDEV_RESET;
@@ -717,7 +746,7 @@ static void virtio_ccw_reset(struct virtio_device *vdev)
ccw->count = 0;
ccw->cda = 0;
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_RESET);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static u64 virtio_ccw_get_features(struct virtio_device *vdev)
@@ -728,11 +757,11 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev)
u64 rc;
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return 0;
- features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
+ features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
if (!features) {
rc = 0;
goto out_free;
@@ -765,8 +794,8 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev)
rc |= (u64)le32_to_cpu(features->features) << 32;
out_free:
- kfree(features);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return rc;
}
@@ -791,11 +820,11 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev)
return -EINVAL;
}
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
- features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
+ features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
if (!features) {
ret = -ENOMEM;
goto out_free;
@@ -830,8 +859,8 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev)
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
out_free:
- kfree(features);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return ret;
}
@@ -845,11 +874,12 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
void *config_area;
unsigned long flags;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
- config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
+ config_area = ccw_device_dma_zalloc(vcdev->cdev,
+ VIRTIO_CCW_CONFIG_SIZE);
if (!config_area)
goto out_free;
@@ -871,8 +901,8 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
memcpy(buf, config_area + offset, len);
out_free:
- kfree(config_area);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static void virtio_ccw_set_config(struct virtio_device *vdev,
@@ -884,11 +914,12 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
void *config_area;
unsigned long flags;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
- config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
+ config_area = ccw_device_dma_zalloc(vcdev->cdev,
+ VIRTIO_CCW_CONFIG_SIZE);
if (!config_area)
goto out_free;
@@ -907,61 +938,61 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG);
out_free:
- kfree(config_area);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static u8 virtio_ccw_get_status(struct virtio_device *vdev)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- u8 old_status = *vcdev->status;
+ u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
if (vcdev->revision < 1)
- return *vcdev->status;
+ return vcdev->dma_area->status;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return old_status;
ccw->cmd_code = CCW_CMD_READ_STATUS;
ccw->flags = 0;
- ccw->count = sizeof(*vcdev->status);
- ccw->cda = (__u32)(unsigned long)vcdev->status;
+ ccw->count = sizeof(vcdev->dma_area->status);
+ ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_STATUS);
/*
* If the channel program failed (should only happen if the device
* was hotunplugged, and then we clean up via the machine check
- * handler anyway), vcdev->status was not overwritten and we just
+ * handler anyway), vcdev->dma_area->status was not overwritten and we just
* return the old status, which is fine.
*/
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
- return *vcdev->status;
+ return vcdev->dma_area->status;
}
static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- u8 old_status = *vcdev->status;
+ u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
int ret;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
/* Write the status to the host. */
- *vcdev->status = status;
+ vcdev->dma_area->status = status;
ccw->cmd_code = CCW_CMD_WRITE_STATUS;
ccw->flags = 0;
ccw->count = sizeof(status);
- ccw->cda = (__u32)(unsigned long)vcdev->status;
+ ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
/* Write failed? We assume status is unchanged. */
if (ret)
- *vcdev->status = old_status;
- kfree(ccw);
+ vcdev->dma_area->status = old_status;
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static const char *virtio_ccw_bus_name(struct virtio_device *vdev)
@@ -994,8 +1025,8 @@ static void virtio_ccw_release_dev(struct device *_d)
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_ccw_device *vcdev = to_vc_device(dev);
- kfree(vcdev->status);
- kfree(vcdev->config_block);
+ ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
+ sizeof(*vcdev->dma_area));
kfree(vcdev);
}
@@ -1093,17 +1124,17 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
vcdev->err = -EIO;
}
virtio_ccw_check_activity(vcdev, activity);
- for_each_set_bit(i, &vcdev->indicators,
- sizeof(vcdev->indicators) * BITS_PER_BYTE) {
+ for_each_set_bit(i, indicators(vcdev),
+ sizeof(*indicators(vcdev)) * BITS_PER_BYTE) {
/* The bit clear must happen before the vring kick. */
- clear_bit(i, &vcdev->indicators);
+ clear_bit(i, indicators(vcdev));
barrier();
vq = virtio_ccw_vq_by_ind(vcdev, i);
vring_interrupt(0, vq);
}
- if (test_bit(0, &vcdev->indicators2)) {
+ if (test_bit(0, indicators2(vcdev))) {
virtio_config_changed(&vcdev->vdev);
- clear_bit(0, &vcdev->indicators2);
+ clear_bit(0, indicators2(vcdev));
}
}
@@ -1203,12 +1234,12 @@ static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev)
struct ccw1 *ccw;
int ret;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
- rev = kzalloc(sizeof(*rev), GFP_DMA | GFP_KERNEL);
+ rev = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*rev));
if (!rev) {
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return -ENOMEM;
}
@@ -1238,8 +1269,8 @@ static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev)
}
} while (ret == -EOPNOTSUPP);
- kfree(ccw);
- kfree(rev);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
+ ccw_device_dma_free(vcdev->cdev, rev, sizeof(*rev));
return ret;
}
@@ -1255,24 +1286,11 @@ static int virtio_ccw_online(struct ccw_device *cdev)
ret = -ENOMEM;
goto out_free;
}
-
vcdev->vdev.dev.parent = &cdev->dev;
- cdev->dev.dma_mask = &vcdev->dma_mask;
- /* we are fine with common virtio infrastructure using 64 bit DMA */
- ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
- if (ret) {
- dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
- goto out_free;
- }
-
- vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
- GFP_DMA | GFP_KERNEL);
- if (!vcdev->config_block) {
- ret = -ENOMEM;
- goto out_free;
- }
- vcdev->status = kzalloc(sizeof(*vcdev->status), GFP_DMA | GFP_KERNEL);
- if (!vcdev->status) {
+ vcdev->cdev = cdev;
+ vcdev->dma_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*vcdev->dma_area));
+ if (!vcdev->dma_area) {
ret = -ENOMEM;
goto out_free;
}
@@ -1281,7 +1299,6 @@ static int virtio_ccw_online(struct ccw_device *cdev)
vcdev->vdev.dev.release = virtio_ccw_release_dev;
vcdev->vdev.config = &virtio_ccw_config_ops;
- vcdev->cdev = cdev;
init_waitqueue_head(&vcdev->wait_q);
INIT_LIST_HEAD(&vcdev->virtqueues);
spin_lock_init(&vcdev->lock);
@@ -1312,8 +1329,8 @@ out_put:
return ret;
out_free:
if (vcdev) {
- kfree(vcdev->status);
- kfree(vcdev->config_block);
+ ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
+ sizeof(*vcdev->dma_area));
}
kfree(vcdev);
return ret;
@@ -1483,8 +1500,17 @@ static void __init no_auto_parse(void)
static int __init virtio_ccw_init(void)
{
+ int rc;
+
/* parse no_auto string before we do anything further */
no_auto_parse();
- return ccw_driver_register(&virtio_ccw_driver);
+
+ summary_indicators = cio_dma_zalloc(MAX_AIRQ_AREAS);
+ if (!summary_indicators)
+ return -ENOMEM;
+ rc = ccw_driver_register(&virtio_ccw_driver);
+ if (rc)
+ cio_dma_free(summary_indicators, MAX_AIRQ_AREAS);
+ return rc;
}
device_initcall(virtio_ccw_init);
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 524ecdc2a9bb..2ec355003524 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/
obj-$(CONFIG_SOC_SAMSUNG) += samsung/
obj-y += sunxi/
obj-$(CONFIG_ARCH_TEGRA) += tegra/
-obj-$(CONFIG_SOC_TI) += ti/
+obj-y += ti/
obj-$(CONFIG_ARCH_U8500) += ux500/
obj-$(CONFIG_PLAT_VERSATILE) += versatile/
obj-y += xilinx/
diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
index ea0859f7b185..d7d50d48d05d 100644
--- a/drivers/soc/ti/Kconfig
+++ b/drivers/soc/ti/Kconfig
@@ -75,10 +75,10 @@ config TI_SCI_PM_DOMAINS
called ti_sci_pm_domains. Note this is needed early in boot before
rootfs may be available.
+endif # SOC_TI
+
config TI_SCI_INTA_MSI_DOMAIN
bool
select GENERIC_MSI_IRQ_DOMAIN
help
Driver to enable Interrupt Aggregator specific MSI Domain.
-
-endif # SOC_TI
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index b6e4862cc242..51ddca2033e0 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -81,6 +81,12 @@ out:
return CHAP_DIGEST_UNKNOWN;
}
+static void chap_close(struct iscsi_conn *conn)
+{
+ kfree(conn->auth_protocol);
+ conn->auth_protocol = NULL;
+}
+
static struct iscsi_chap *chap_server_open(
struct iscsi_conn *conn,
struct iscsi_node_auth *auth,
@@ -118,7 +124,7 @@ static struct iscsi_chap *chap_server_open(
case CHAP_DIGEST_UNKNOWN:
default:
pr_err("Unsupported CHAP_A value\n");
- kfree(conn->auth_protocol);
+ chap_close(conn);
return NULL;
}
@@ -133,19 +139,13 @@ static struct iscsi_chap *chap_server_open(
* Generate Challenge.
*/
if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) {
- kfree(conn->auth_protocol);
+ chap_close(conn);
return NULL;
}
return chap;
}
-static void chap_close(struct iscsi_conn *conn)
-{
- kfree(conn->auth_protocol);
- conn->auth_protocol = NULL;
-}
-
static int chap_server_compute_md5(
struct iscsi_conn *conn,
struct iscsi_node_auth *auth,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index f4a075303e9a..6949ea8bc387 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -502,7 +502,7 @@ iblock_execute_write_same(struct se_cmd *cmd)
/* Always in 512 byte units for Linux/Block */
block_lba += sg->length >> SECTOR_SHIFT;
- sectors -= 1;
+ sectors -= sg->length >> SECTOR_SHIFT;
}
iblock_submit_bios(&list);
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 319b77126168..e85d54d1cdf3 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -43,7 +43,7 @@ MODULE_PARM_DESC(notify_delay_ms,
*/
#define MAX_NUMBER_OF_TRIPS 2
-struct pkg_device {
+struct zone_device {
int cpu;
bool work_scheduled;
u32 tj_max;
@@ -58,10 +58,10 @@ static struct thermal_zone_params pkg_temp_tz_params = {
.no_hwmon = true,
};
-/* Keep track of how many package pointers we allocated in init() */
-static int max_packages __read_mostly;
-/* Array of package pointers */
-static struct pkg_device **packages;
+/* Keep track of how many zone pointers we allocated in init() */
+static int max_id __read_mostly;
+/* Array of zone pointers */
+static struct zone_device **zones;
/* Serializes interrupt notification, work and hotplug */
static DEFINE_SPINLOCK(pkg_temp_lock);
/* Protects zone operation in the work function against hotplug removal */
@@ -108,12 +108,12 @@ err_out:
*
* - Other callsites: Must hold pkg_temp_lock
*/
-static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
+static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
{
- int pkgid = topology_logical_package_id(cpu);
+ int id = topology_logical_die_id(cpu);
- if (pkgid >= 0 && pkgid < max_packages)
- return packages[pkgid];
+ if (id >= 0 && id < max_id)
+ return zones[id];
return NULL;
}
@@ -138,12 +138,13 @@ static int get_tj_max(int cpu, u32 *tj_max)
static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
- struct pkg_device *pkgdev = tzd->devdata;
+ struct zone_device *zonedev = tzd->devdata;
u32 eax, edx;
- rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
+ rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
+ &eax, &edx);
if (eax & 0x80000000) {
- *temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
+ *temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
pr_debug("sys_get_curr_temp %d\n", *temp);
return 0;
}
@@ -153,7 +154,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
static int sys_get_trip_temp(struct thermal_zone_device *tzd,
int trip, int *temp)
{
- struct pkg_device *pkgdev = tzd->devdata;
+ struct zone_device *zonedev = tzd->devdata;
unsigned long thres_reg_value;
u32 mask, shift, eax, edx;
int ret;
@@ -169,14 +170,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
shift = THERM_SHIFT_THRESHOLD0;
}
- ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
&eax, &edx);
if (ret < 0)
return ret;
thres_reg_value = (eax & mask) >> shift;
if (thres_reg_value)
- *temp = pkgdev->tj_max - thres_reg_value * 1000;
+ *temp = zonedev->tj_max - thres_reg_value * 1000;
else
*temp = 0;
pr_debug("sys_get_trip_temp %d\n", *temp);
@@ -187,14 +188,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
static int
sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
{
- struct pkg_device *pkgdev = tzd->devdata;
+ struct zone_device *zonedev = tzd->devdata;
u32 l, h, mask, shift, intr;
int ret;
- if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
+ if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
return -EINVAL;
- ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
&l, &h);
if (ret < 0)
return ret;
@@ -216,11 +217,12 @@ sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
if (!temp) {
l &= ~intr;
} else {
- l |= (pkgdev->tj_max - temp)/1000 << shift;
+ l |= (zonedev->tj_max - temp)/1000 << shift;
l |= intr;
}
- return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l, h);
}
static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
@@ -275,26 +277,26 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
{
struct thermal_zone_device *tzone = NULL;
int cpu = smp_processor_id();
- struct pkg_device *pkgdev;
+ struct zone_device *zonedev;
u64 msr_val, wr_val;
mutex_lock(&thermal_zone_mutex);
spin_lock_irq(&pkg_temp_lock);
++pkg_work_cnt;
- pkgdev = pkg_temp_thermal_get_dev(cpu);
- if (!pkgdev) {
+ zonedev = pkg_temp_thermal_get_dev(cpu);
+ if (!zonedev) {
spin_unlock_irq(&pkg_temp_lock);
mutex_unlock(&thermal_zone_mutex);
return;
}
- pkgdev->work_scheduled = false;
+ zonedev->work_scheduled = false;
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
if (wr_val != msr_val) {
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
- tzone = pkgdev->tzone;
+ tzone = zonedev->tzone;
}
enable_pkg_thres_interrupt();
@@ -320,7 +322,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
static int pkg_thermal_notify(u64 msr_val)
{
int cpu = smp_processor_id();
- struct pkg_device *pkgdev;
+ struct zone_device *zonedev;
unsigned long flags;
spin_lock_irqsave(&pkg_temp_lock, flags);
@@ -329,10 +331,10 @@ static int pkg_thermal_notify(u64 msr_val)
disable_pkg_thres_interrupt();
/* Work is per package, so scheduling it once is enough. */
- pkgdev = pkg_temp_thermal_get_dev(cpu);
- if (pkgdev && !pkgdev->work_scheduled) {
- pkgdev->work_scheduled = true;
- pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
+ zonedev = pkg_temp_thermal_get_dev(cpu);
+ if (zonedev && !zonedev->work_scheduled) {
+ zonedev->work_scheduled = true;
+ pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
}
spin_unlock_irqrestore(&pkg_temp_lock, flags);
@@ -341,12 +343,12 @@ static int pkg_thermal_notify(u64 msr_val)
static int pkg_temp_thermal_device_add(unsigned int cpu)
{
- int pkgid = topology_logical_package_id(cpu);
+ int id = topology_logical_die_id(cpu);
u32 tj_max, eax, ebx, ecx, edx;
- struct pkg_device *pkgdev;
+ struct zone_device *zonedev;
int thres_count, err;
- if (pkgid >= max_packages)
+ if (id >= max_id)
return -ENOMEM;
cpuid(6, &eax, &ebx, &ecx, &edx);
@@ -360,51 +362,51 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
if (err)
return err;
- pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
- if (!pkgdev)
+ zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
+ if (!zonedev)
return -ENOMEM;
- INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
- pkgdev->cpu = cpu;
- pkgdev->tj_max = tj_max;
- pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
+ INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
+ zonedev->cpu = cpu;
+ zonedev->tj_max = tj_max;
+ zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
thres_count,
(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
- pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
- if (IS_ERR(pkgdev->tzone)) {
- err = PTR_ERR(pkgdev->tzone);
- kfree(pkgdev);
+ zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
+ if (IS_ERR(zonedev->tzone)) {
+ err = PTR_ERR(zonedev->tzone);
+ kfree(zonedev);
return err;
}
/* Store MSR value for package thermal interrupt, to restore at exit */
- rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
- pkgdev->msr_pkg_therm_high);
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
+ zonedev->msr_pkg_therm_high);
- cpumask_set_cpu(cpu, &pkgdev->cpumask);
+ cpumask_set_cpu(cpu, &zonedev->cpumask);
spin_lock_irq(&pkg_temp_lock);
- packages[pkgid] = pkgdev;
+ zones[id] = zonedev;
spin_unlock_irq(&pkg_temp_lock);
return 0;
}
static int pkg_thermal_cpu_offline(unsigned int cpu)
{
- struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+ struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
bool lastcpu, was_target;
int target;
- if (!pkgdev)
+ if (!zonedev)
return 0;
- target = cpumask_any_but(&pkgdev->cpumask, cpu);
- cpumask_clear_cpu(cpu, &pkgdev->cpumask);
+ target = cpumask_any_but(&zonedev->cpumask, cpu);
+ cpumask_clear_cpu(cpu, &zonedev->cpumask);
lastcpu = target >= nr_cpu_ids;
/*
* Remove the sysfs files, if this is the last cpu in the package
* before doing further cleanups.
*/
if (lastcpu) {
- struct thermal_zone_device *tzone = pkgdev->tzone;
+ struct thermal_zone_device *tzone = zonedev->tzone;
/*
* We must protect against a work function calling
@@ -413,7 +415,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* won't try to call.
*/
mutex_lock(&thermal_zone_mutex);
- pkgdev->tzone = NULL;
+ zonedev->tzone = NULL;
mutex_unlock(&thermal_zone_mutex);
thermal_zone_device_unregister(tzone);
@@ -427,8 +429,8 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* one. When we drop the lock, then the interrupt notify function
* will see the new target.
*/
- was_target = pkgdev->cpu == cpu;
- pkgdev->cpu = target;
+ was_target = zonedev->cpu == cpu;
+ zonedev->cpu = target;
/*
* If this is the last CPU in the package remove the package
@@ -437,23 +439,23 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* worker will see the package anymore.
*/
if (lastcpu) {
- packages[topology_logical_package_id(cpu)] = NULL;
+ zones[topology_logical_die_id(cpu)] = NULL;
/* After this point nothing touches the MSR anymore. */
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
+ zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
}
/*
* Check whether there is work scheduled and whether the work is
* targeted at the outgoing CPU.
*/
- if (pkgdev->work_scheduled && was_target) {
+ if (zonedev->work_scheduled && was_target) {
/*
* To cancel the work we need to drop the lock, otherwise
* we might deadlock if the work needs to be flushed.
*/
spin_unlock_irq(&pkg_temp_lock);
- cancel_delayed_work_sync(&pkgdev->work);
+ cancel_delayed_work_sync(&zonedev->work);
spin_lock_irq(&pkg_temp_lock);
/*
* If this is not the last cpu in the package and the work
@@ -461,21 +463,21 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* need to reschedule the work, otherwise the interrupt
* stays disabled forever.
*/
- if (!lastcpu && pkgdev->work_scheduled)
- pkg_thermal_schedule_work(target, &pkgdev->work);
+ if (!lastcpu && zonedev->work_scheduled)
+ pkg_thermal_schedule_work(target, &zonedev->work);
}
spin_unlock_irq(&pkg_temp_lock);
/* Final cleanup if this is the last cpu */
if (lastcpu)
- kfree(pkgdev);
+ kfree(zonedev);
return 0;
}
static int pkg_thermal_cpu_online(unsigned int cpu)
{
- struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
+ struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
/* Paranoia check */
@@ -483,8 +485,8 @@ static int pkg_thermal_cpu_online(unsigned int cpu)
return -ENODEV;
/* If the package exists, nothing to do */
- if (pkgdev) {
- cpumask_set_cpu(cpu, &pkgdev->cpumask);
+ if (zonedev) {
+ cpumask_set_cpu(cpu, &zonedev->cpumask);
return 0;
}
return pkg_temp_thermal_device_add(cpu);
@@ -503,10 +505,10 @@ static int __init pkg_temp_thermal_init(void)
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
- max_packages = topology_max_packages();
- packages = kcalloc(max_packages, sizeof(struct pkg_device *),
+ max_id = topology_max_packages() * topology_max_die_per_package();
+ zones = kcalloc(max_id, sizeof(struct zone_device *),
GFP_KERNEL);
- if (!packages)
+ if (!zones)
return -ENOMEM;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
@@ -525,7 +527,7 @@ static int __init pkg_temp_thermal_init(void)
return 0;
err:
- kfree(packages);
+ kfree(zones);
return ret;
}
module_init(pkg_temp_thermal_init)
@@ -537,7 +539,7 @@ static void __exit pkg_temp_thermal_exit(void)
cpuhp_remove_state(pkg_thermal_hp_state);
debugfs_remove_recursive(debugfs);
- kfree(packages);
+ kfree(zones);
}
module_exit(pkg_temp_thermal_exit)
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index e38f104db174..fde8d4073e74 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -487,7 +487,7 @@ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
{
- lockdep_assert_held_exclusive(&tty->ldisc_sem);
+ lockdep_assert_held_write(&tty->ldisc_sem);
WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
clear_bit(TTY_LDISC_OPEN, &tty->flags);
if (ld->ops->close)
@@ -509,7 +509,7 @@ static int tty_ldisc_failto(struct tty_struct *tty, int ld)
struct tty_ldisc *disc = tty_ldisc_get(tty, ld);
int r;
- lockdep_assert_held_exclusive(&tty->ldisc_sem);
+ lockdep_assert_held_write(&tty->ldisc_sem);
if (IS_ERR(disc))
return PTR_ERR(disc);
tty->ldisc = disc;
@@ -633,7 +633,7 @@ EXPORT_SYMBOL_GPL(tty_set_ldisc);
*/
static void tty_ldisc_kill(struct tty_struct *tty)
{
- lockdep_assert_held_exclusive(&tty->ldisc_sem);
+ lockdep_assert_held_write(&tty->ldisc_sem);
if (!tty->ldisc)
return;
/*
@@ -681,7 +681,7 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
struct tty_ldisc *ld;
int retval;
- lockdep_assert_held_exclusive(&tty->ldisc_sem);
+ lockdep_assert_held_write(&tty->ldisc_sem);
ld = tty_ldisc_get(tty, disc);
if (IS_ERR(ld)) {
BUG_ON(disc == N_TTY);
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index fa783531ee88..a02448105527 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -63,7 +63,7 @@ struct usb_dev_state {
unsigned int discsignr;
struct pid *disc_pid;
const struct cred *cred;
- void __user *disccontext;
+ sigval_t disccontext;
unsigned long ifclaimed;
u32 disabled_bulk_eps;
bool privileges_dropped;
@@ -90,6 +90,7 @@ struct async {
unsigned int ifnum;
void __user *userbuffer;
void __user *userurb;
+ sigval_t userurb_sigval;
struct urb *urb;
struct usb_memory *usbm;
unsigned int mem_usage;
@@ -582,22 +583,19 @@ static void async_completed(struct urb *urb)
{
struct async *as = urb->context;
struct usb_dev_state *ps = as->ps;
- struct kernel_siginfo sinfo;
struct pid *pid = NULL;
const struct cred *cred = NULL;
unsigned long flags;
- int signr;
+ sigval_t addr;
+ int signr, errno;
spin_lock_irqsave(&ps->lock, flags);
list_move_tail(&as->asynclist, &ps->async_completed);
as->status = urb->status;
signr = as->signr;
if (signr) {
- clear_siginfo(&sinfo);
- sinfo.si_signo = as->signr;
- sinfo.si_errno = as->status;
- sinfo.si_code = SI_ASYNCIO;
- sinfo.si_addr = as->userurb;
+ errno = as->status;
+ addr = as->userurb_sigval;
pid = get_pid(as->pid);
cred = get_cred(as->cred);
}
@@ -615,7 +613,7 @@ static void async_completed(struct urb *urb)
spin_unlock_irqrestore(&ps->lock, flags);
if (signr) {
- kill_pid_info_as_cred(sinfo.si_signo, &sinfo, pid, cred);
+ kill_pid_usb_asyncio(signr, errno, addr, pid, cred);
put_pid(pid);
put_cred(cred);
}
@@ -1427,7 +1425,7 @@ find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb)
static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb,
struct usbdevfs_iso_packet_desc __user *iso_frame_desc,
- void __user *arg)
+ void __user *arg, sigval_t userurb_sigval)
{
struct usbdevfs_iso_packet_desc *isopkt = NULL;
struct usb_host_endpoint *ep;
@@ -1727,6 +1725,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
isopkt = NULL;
as->ps = ps;
as->userurb = arg;
+ as->userurb_sigval = userurb_sigval;
if (as->usbm) {
unsigned long uurb_start = (unsigned long)uurb->buffer;
@@ -1801,13 +1800,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
static int proc_submiturb(struct usb_dev_state *ps, void __user *arg)
{
struct usbdevfs_urb uurb;
+ sigval_t userurb_sigval;
if (copy_from_user(&uurb, arg, sizeof(uurb)))
return -EFAULT;
+ memset(&userurb_sigval, 0, sizeof(userurb_sigval));
+ userurb_sigval.sival_ptr = arg;
+
return proc_do_submiturb(ps, &uurb,
(((struct usbdevfs_urb __user *)arg)->iso_frame_desc),
- arg);
+ arg, userurb_sigval);
}
static int proc_unlinkurb(struct usb_dev_state *ps, void __user *arg)
@@ -1977,7 +1980,7 @@ static int proc_disconnectsignal_compat(struct usb_dev_state *ps, void __user *a
if (copy_from_user(&ds, arg, sizeof(ds)))
return -EFAULT;
ps->discsignr = ds.signr;
- ps->disccontext = compat_ptr(ds.context);
+ ps->disccontext.sival_int = ds.context;
return 0;
}
@@ -2005,13 +2008,17 @@ static int get_urb32(struct usbdevfs_urb *kurb,
static int proc_submiturb_compat(struct usb_dev_state *ps, void __user *arg)
{
struct usbdevfs_urb uurb;
+ sigval_t userurb_sigval;
if (get_urb32(&uurb, (struct usbdevfs_urb32 __user *)arg))
return -EFAULT;
+ memset(&userurb_sigval, 0, sizeof(userurb_sigval));
+ userurb_sigval.sival_int = ptr_to_compat(arg);
+
return proc_do_submiturb(ps, &uurb,
((struct usbdevfs_urb32 __user *)arg)->iso_frame_desc,
- arg);
+ arg, userurb_sigval);
}
static int processcompl_compat(struct async *as, void __user * __user *arg)
@@ -2092,7 +2099,7 @@ static int proc_disconnectsignal(struct usb_dev_state *ps, void __user *arg)
if (copy_from_user(&ds, arg, sizeof(ds)))
return -EFAULT;
ps->discsignr = ds.signr;
- ps->disccontext = ds.context;
+ ps->disccontext.sival_ptr = ds.context;
return 0;
}
@@ -2614,22 +2621,15 @@ const struct file_operations usbdev_file_operations = {
static void usbdev_remove(struct usb_device *udev)
{
struct usb_dev_state *ps;
- struct kernel_siginfo sinfo;
while (!list_empty(&udev->filelist)) {
ps = list_entry(udev->filelist.next, struct usb_dev_state, list);
destroy_all_async(ps);
wake_up_all(&ps->wait);
list_del_init(&ps->list);
- if (ps->discsignr) {
- clear_siginfo(&sinfo);
- sinfo.si_signo = ps->discsignr;
- sinfo.si_errno = EPIPE;
- sinfo.si_code = SI_ASYNCIO;
- sinfo.si_addr = ps->disccontext;
- kill_pid_info_as_cred(ps->discsignr, &sinfo,
- ps->disc_pid, ps->cred);
- }
+ if (ps->discsignr)
+ kill_pid_usb_asyncio(ps->discsignr, EPIPE, ps->disccontext,
+ ps->disc_pid, ps->cred);
}
}
diff --git a/fs/Kconfig b/fs/Kconfig
index f1046cf6ad85..bfb1c6095c7a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -11,7 +11,6 @@ config DCACHE_WORD_ACCESS
config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
- default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 86da532c192f..df415c05939e 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -246,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
_enter("%s", cell->name);
- ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
- &result, _expiry, true);
+ ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len,
+ "srv=1", &result, _expiry, true);
if (ret < 0) {
_leave(" = %d [dns]", ret);
return ERR_PTR(ret);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 9b3b2f1f1fc0..bcd1bafb0278 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -24,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = {
static int afs_probe_cell_name(struct dentry *dentry)
{
struct afs_cell *cell;
+ struct afs_net *net = afs_d2net(dentry);
const char *name = dentry->d_name.name;
size_t len = dentry->d_name.len;
int ret;
@@ -36,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry)
len--;
}
- cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
+ cell = afs_lookup_cell_rcu(net, name, len);
if (!IS_ERR(cell)) {
- afs_put_cell(afs_d2net(dentry), cell);
+ afs_put_cell(net, cell);
return 0;
}
- ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false);
+ ret = dns_query(net->net, "afsdb", name, len, "srv=1",
+ NULL, NULL, false);
if (ret == -ENODATA)
ret = -EDESTADDRREQ;
return ret;
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 71e71c07568f..8866703b2e6c 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -28,7 +28,7 @@ struct key *afs_request_key(struct afs_cell *cell)
_debug("key %s", cell->anonymous_key->description);
key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
- NULL);
+ NULL, NULL);
if (IS_ERR(key)) {
if (PTR_ERR(key) != -ENOKEY) {
_leave(" = %ld", PTR_ERR(key));
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index aae2b8b2adf5..523e9ea78a28 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -10,7 +10,7 @@ config CIFS
select CRYPTO_SHA512
select CRYPTO_CMAC
select CRYPTO_HMAC
- select CRYPTO_ARC4
+ select CRYPTO_LIB_ARC4
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_ECB
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7f01c6e60791..d1b439ad0f1a 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -32,6 +32,25 @@
#include "cifsproto.h"
static const struct cred *spnego_cred;
+static struct key_acl cifs_spnego_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
+static struct key_acl cifs_spnego_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR),
+ }
+};
+
/* create a new cifs key */
static int
cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
@@ -170,7 +189,8 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
cifs_dbg(FYI, "key description = %s\n", description);
saved_cred = override_creds(spnego_cred);
- spnego_key = request_key(&cifs_spnego_key_type, description, "");
+ spnego_key = request_key(&cifs_spnego_key_type, description, "",
+ &cifs_spnego_key_acl);
revert_creds(saved_cred);
#ifdef CONFIG_CIFS_DEBUG2
@@ -207,8 +227,7 @@ init_cifs_spnego(void)
keyring = keyring_alloc(".cifs_spnego",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ,
+ &cifs_spnego_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1d377b7f2860..78eed72f3af0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -33,6 +33,25 @@
#include "cifsproto.h"
#include "cifs_debug.h"
+static struct key_acl cifs_idmap_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
+static struct key_acl cifs_idmap_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
/* security id for everyone/world system group */
static const struct cifs_sid sid_everyone = {
1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -298,7 +317,8 @@ id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid)
rc = 0;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, desc, "");
+ sidkey = request_key(&cifs_idmap_key_type, desc, "",
+ &cifs_idmap_key_acl);
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map %cid %u to a SID\n",
@@ -403,7 +423,8 @@ try_upcall_to_get_id:
return -ENOMEM;
saved_cred = override_creds(root_cred);
- sidkey = request_key(&cifs_idmap_key_type, sidstr, "");
+ sidkey = request_key(&cifs_idmap_key_type, sidstr, "",
+ &cifs_idmap_key_acl);
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n",
@@ -481,8 +502,7 @@ init_cifs_idmap(void)
keyring = keyring_alloc(".cifs_idmap",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ,
+ &cifs_idmap_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d2a05e46d6f5..97b7497c13ef 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -33,7 +33,8 @@
#include <linux/ctype.h>
#include <linux/random.h>
#include <linux/highmem.h>
-#include <crypto/skcipher.h>
+#include <linux/fips.h>
+#include <crypto/arc4.h>
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
@@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret:
int
calc_seckey(struct cifs_ses *ses)
{
- int rc;
- struct crypto_skcipher *tfm_arc4;
- struct scatterlist sgin, sgout;
- struct skcipher_request *req;
- unsigned char *sec_key;
+ unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+ struct arc4_ctx *ctx_arc4;
- sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL);
- if (sec_key == NULL)
- return -ENOMEM;
+ if (fips_enabled)
+ return -ENODEV;
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
- tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm_arc4)) {
- rc = PTR_ERR(tfm_arc4);
- cifs_dbg(VFS, "could not allocate crypto API arc4\n");
- goto out;
- }
-
- rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
- CIFS_SESS_KEY_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set response as a key\n",
- __func__);
- goto out_free_cipher;
- }
-
- req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL);
- if (!req) {
- rc = -ENOMEM;
- cifs_dbg(VFS, "could not allocate crypto API arc4 request\n");
- goto out_free_cipher;
+ ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL);
+ if (!ctx_arc4) {
+ cifs_dbg(VFS, "could not allocate arc4 context\n");
+ return -ENOMEM;
}
- sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
- sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
-
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL);
-
- rc = crypto_skcipher_encrypt(req);
- skcipher_request_free(req);
- if (rc) {
- cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc);
- goto out_free_cipher;
- }
+ arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE);
+ arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key,
+ CIFS_CPHTXT_SIZE);
/* make secondary_key/nonce as session key */
memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
/* and make len as that of session key only */
ses->auth_key.len = CIFS_SESS_KEY_SIZE;
-out_free_cipher:
- crypto_free_skcipher(tfm_arc4);
-out:
- kfree(sec_key);
- return rc;
+ memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE);
+ kzfree(ctx_arc4);
+ return 0;
}
void
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 65d9771e49f9..72db1c89bf5a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1591,7 +1591,6 @@ MODULE_DESCRIPTION
("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
"also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
-MODULE_SOFTDEP("pre: arc4");
MODULE_SOFTDEP("pre: des");
MODULE_SOFTDEP("pre: ecb");
MODULE_SOFTDEP("pre: hmac");
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8dd6637a3cbb..ae6bae2ecb5d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2631,7 +2631,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
task = xchg(&server->tsk, NULL);
if (task)
- force_sig(SIGKILL, task);
+ send_sig(SIGKILL, task, 1);
}
static struct TCP_Server_Info *
@@ -2992,7 +2992,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
}
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "");
+ key = request_key(&key_type_logon, desc, "", NULL);
if (IS_ERR(key)) {
if (!ses->domainName) {
cifs_dbg(FYI, "domainName is NULL\n");
@@ -3003,7 +3003,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
/* didn't work, try to find a domain key */
sprintf(desc, "cifs:d:%s", ses->domainName);
cifs_dbg(FYI, "%s: desc=%s\n", __func__, desc);
- key = request_key(&key_type_logon, desc, "");
+ key = request_key(&key_type_logon, desc, "", NULL);
if (IS_ERR(key)) {
rc = PTR_ERR(key);
goto out_err;
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 1e21b2528cfb..534cbba72789 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
goto name_is_IP_address;
/* Perform the upcall */
- rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false);
+ rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len,
+ NULL, ip_addr, NULL, false);
if (rc < 0)
cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
__func__, len, len, hostname);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3fdc6a41b304..9fd56b0acd7e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2372,6 +2372,41 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
kfree(dfs_rsp);
return rc;
}
+
+static int
+parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
+ u32 plen, char **target_path,
+ struct cifs_sb_info *cifs_sb)
+{
+ unsigned int sub_len;
+ unsigned int sub_offset;
+
+ /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */
+ if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) {
+ cifs_dbg(VFS, "srv returned invalid symlink buffer\n");
+ return -EIO;
+ }
+
+ sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
+ sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
+ if (sub_offset + 20 > plen ||
+ sub_offset + sub_len + 20 > plen) {
+ cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
+ return -EIO;
+ }
+
+ *target_path = cifs_strndup_from_utf16(
+ symlink_buf->PathBuffer + sub_offset,
+ sub_len, true, cifs_sb->local_nls);
+ if (!(*target_path))
+ return -ENOMEM;
+
+ convert_delimiter(*target_path, '/');
+ cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+
+ return 0;
+}
+
#define SMB2_SYMLINK_STRUCT_SIZE \
(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
@@ -2401,11 +2436,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec close_iov[1];
struct smb2_create_rsp *create_rsp;
struct smb2_ioctl_rsp *ioctl_rsp;
- char *ioctl_buf;
+ struct reparse_data_buffer *reparse_buf;
u32 plen;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+ *target_path = NULL;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -2483,17 +2520,36 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
if ((rc == 0) && (is_reparse_point)) {
/* See MS-FSCC 2.3.23 */
- ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset);
+ reparse_buf = (struct reparse_data_buffer *)
+ ((char *)ioctl_rsp +
+ le32_to_cpu(ioctl_rsp->OutputOffset));
plen = le32_to_cpu(ioctl_rsp->OutputCount);
if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
rsp_iov[1].iov_len) {
- cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen);
+ cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n",
+ plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < 8) {
+ cifs_dbg(VFS, "reparse buffer is too small. Must be "
+ "at least 8 bytes but was %d\n", plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) {
+ cifs_dbg(VFS, "srv returned invalid reparse buf "
+ "length: %d\n", plen);
rc = -EIO;
goto querty_exit;
}
- /* Do stuff with ioctl_buf/plen */
+ rc = parse_reparse_symlink(
+ (struct reparse_symlink_data_buffer *)reparse_buf,
+ plen, target_path, cifs_sb);
goto querty_exit;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c7d5813bebd8..858353d20c39 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -914,7 +914,19 @@ struct reparse_mount_point_data_buffer {
__u8 PathBuffer[0]; /* Variable Length */
} __packed;
-/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
+struct reparse_symlink_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __le16 SubstituteNameOffset;
+ __le16 SubstituteNameLength;
+ __le16 PrintNameOffset;
+ __le16 PrintNameLength;
+ __le32 Flags;
+ __u8 PathBuffer[0]; /* Variable Length */
+} __packed;
/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index dcd91a3fbe49..4f85af8ab239 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -92,7 +92,7 @@ find_and_lock_process_key(const char *prefix,
if (!description)
return ERR_PTR(-ENOMEM);
- key = request_key(&key_type_logon, description, NULL);
+ key = request_key(&key_type_logon, description, NULL, NULL);
kfree(description);
if (IS_ERR(key))
return key;
diff --git a/fs/dax.c b/fs/dax.c
index 2e48c7ebb973..fe5e33810cd4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -720,12 +720,11 @@ static void *dax_insert_entry(struct xa_state *xas,
xas_reset(xas);
xas_lock_irq(xas);
- if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ void *old;
+
dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
- }
-
- if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -734,7 +733,7 @@ static void *dax_insert_entry(struct xa_state *xas,
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
- void *old = dax_lock_entry(xas, new_entry);
+ old = dax_lock_entry(xas, new_entry);
WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
DAX_LOCKED));
entry = new_entry;
@@ -1188,7 +1187,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
unsigned flags = 0;
if (iov_iter_rw(iter) == WRITE) {
- lockdep_assert_held_exclusive(&inode->i_rwsem);
+ lockdep_assert_held_write(&inode->i_rwsem);
flags |= IOMAP_WRITE;
} else {
lockdep_assert_held(&inode->i_rwsem);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 1c1a56be7ea2..67844fe41a61 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -91,7 +91,7 @@ ecryptfs_get_encrypted_key_payload_data(struct key *key)
static inline struct key *ecryptfs_get_encrypted_key(char *sig)
{
- return request_key(&key_type_encrypted, sig, NULL);
+ return request_key(&key_type_encrypted, sig, NULL, NULL);
}
#else
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 9536e592e25a..ba382f135918 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1610,7 +1610,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
{
int rc = 0;
- (*auth_tok_key) = request_key(&key_type_user, sig, NULL);
+ (*auth_tok_key) = request_key(&key_type_user, sig, NULL, NULL);
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
(*auth_tok_key) = ecryptfs_get_encrypted_key(sig);
if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
diff --git a/fs/exec.c b/fs/exec.c
index 89a500bb897a..c71cbfe6826a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1663,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm)
if (retval < 0 && !bprm->mm) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
- force_sigsegv(SIGSEGV, current);
+ force_sigsegv(SIGSEGV);
return retval;
}
if (retval != -ENOEXEC || !bprm->file) {
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 72ebfe578f40..67b7bda5647a 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -317,7 +317,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
const char *buf;
int len;
- key = request_key(&key_type_user, "fscache:objlist", NULL);
+ key = request_key(&key_type_user, "fscache:objlist", NULL, NULL);
if (IS_ERR(key))
goto no_config;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7660c2749c96..6fbc9126367a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2596,11 +2596,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
if (!check_mnt(p))
goto out;
- /* The thing moved should be either ours or completely unattached. */
- if (attached && !check_mnt(old))
+ /* The thing moved must be mounted... */
+ if (!is_mounted(&old->mnt))
goto out;
- if (!attached && !(ns && is_anon_ns(ns)))
+ /* ... and either ours or the root of anon namespace */
+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
if (old->mnt.mnt_flags & MNT_LOCKED)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e6a700f01452..aec769a500a1 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
char *ip_addr = NULL;
int ip_len;
- ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false);
+ ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL,
+ false);
if (ip_len > 0)
ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 4884fdae28fb..69679f4f2e6c 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -72,6 +72,25 @@ struct idmap {
const struct cred *cred;
};
+static struct key_acl nfs_idmap_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
+static struct key_acl nfs_idmap_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
static struct user_namespace *idmap_userns(const struct idmap *idmap)
{
if (idmap && idmap->cred)
@@ -208,8 +227,7 @@ int nfs_idmap_init(void)
keyring = keyring_alloc(".id_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ,
+ &nfs_idmap_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
@@ -287,11 +305,13 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
return ERR_PTR(ret);
if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
- rkey = request_key(&key_type_id_resolver, desc, "");
+ rkey = request_key(&key_type_id_resolver, desc, "",
+ &nfs_idmap_key_acl);
if (IS_ERR(rkey)) {
mutex_lock(&idmap->idmap_mutex);
rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
- desc, "", 0, idmap);
+ desc, NULL, "", 0, idmap,
+ &nfs_idmap_key_acl);
mutex_unlock(&idmap->idmap_mutex);
}
if (!IS_ERR(rkey))
@@ -320,8 +340,6 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
}
rcu_read_lock();
- rkey->perm |= KEY_USR_VIEW;
-
ret = key_validate(rkey);
if (ret < 0)
goto out_up;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 618e66078ee5..1a0cdeb3b875 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1563,7 +1563,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
- avail = clamp_t(int, avail, slotsize, total_avail/3);
+ avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 62ee41b4bbd0..4c3dcb718961 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -98,3 +98,7 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+ def_bool n
+ depends on PROC_FS
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 55180501b915..46dcb6f0eccf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
}
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 255f6754c70d..c40fca98f2b7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3061,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3448,6 +3451,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 60f43b93d06e..38718026ad0b 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -227,7 +227,7 @@ int ubifs_init_authentication(struct ubifs_info *c)
snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
c->auth_hash_name);
- keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL);
+ keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL, NULL);
if (IS_ERR(keyring_key)) {
ubifs_err(c, "Failed to request key: %ld",
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ae0b8b5f69e6..ccbdbd62f0d8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -40,6 +40,16 @@ enum userfaultfd_state {
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
+ *
+ * Locking order:
+ * fd_wqh.lock
+ * fault_pending_wqh.lock
+ * fault_wqh.lock
+ * event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* __add_wait_queue.
*/
set_current_state(blocking_state);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
* kernel stack can be released after the list_del_init.
*/
if (!list_empty_careful(&uwq.wq.entry)) {
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
list_del(&uwq.wq.entry);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
/*
@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
init_waitqueue_entry(&ewq->wq, current);
release_new_ctx = NULL;
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
break;
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
}
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
if (release_new_ctx) {
struct vm_area_struct *vma;
@@ -918,10 +928,10 @@ wakeup:
* the last page faults that may have been already waiting on
* the fault_*wqh.
*/
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
if (!list_empty(&fork_event)) {
/*
* The fork thread didn't abort, so we can
@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (ret)
userfaultfd_ctx_put(fork_nctx);
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
}
return ret;
@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
static void __wake_userfault(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range *range)
{
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/* wake all in the range and autoremove */
if (waitqueue_active(&ctx->fault_pending_wqh))
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
wait_queue_entry_t *wq;
unsigned long pending = 0, total = 0;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
pending++;
total++;
@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
total++;
}
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/*
* If more protocols will be added, there will be all shown
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index d7a15096fb3b..370f01d4450f 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -10,24 +10,24 @@
#include <linux/types.h>
typedef struct {
- long long counter;
+ s64 counter;
} atomic64_t;
#define ATOMIC64_INIT(i) { (i) }
-extern long long atomic64_read(const atomic64_t *v);
-extern void atomic64_set(atomic64_t *v, long long i);
+extern s64 atomic64_read(const atomic64_t *v);
+extern void atomic64_set(atomic64_t *v, s64 i);
#define atomic64_set_release(v, i) atomic64_set((v), (i))
#define ATOMIC64_OP(op) \
-extern void atomic64_##op(long long a, atomic64_t *v);
+extern void atomic64_##op(s64 a, atomic64_t *v);
#define ATOMIC64_OP_RETURN(op) \
-extern long long atomic64_##op##_return(long long a, atomic64_t *v);
+extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
#define ATOMIC64_FETCH_OP(op) \
-extern long long atomic64_fetch_##op(long long a, atomic64_t *v);
+extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
@@ -46,11 +46,11 @@ ATOMIC64_OPS(xor)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-extern long long atomic64_dec_if_positive(atomic64_t *v);
+extern s64 atomic64_dec_if_positive(atomic64_t *v);
#define atomic64_dec_if_positive atomic64_dec_if_positive
-extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
-extern long long atomic64_xchg(atomic64_t *v, long long new);
-extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
+extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
+extern s64 atomic64_xchg(atomic64_t *v, s64 new);
+extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
#define atomic64_fetch_add_unless atomic64_fetch_add_unless
#endif /* _ASM_GENERIC_ATOMIC64_H */
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
new file mode 100644
index 000000000000..e94b19782c92
--- /dev/null
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_VSYSCALL_H
+#define __ASM_GENERIC_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#ifndef __arch_get_k_vdso_data
+static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
+{
+ return NULL;
+}
+#endif /* __arch_get_k_vdso_data */
+
+#ifndef __arch_update_vdso_data
+static __always_inline int __arch_update_vdso_data(void)
+{
+ return 0;
+}
+#endif /* __arch_update_vdso_data */
+
+#ifndef __arch_get_clock_mode
+static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
+{
+ return 0;
+}
+#endif /* __arch_get_clock_mode */
+
+#ifndef __arch_use_vsyscall
+static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
+{
+ return 1;
+}
+#endif /* __arch_use_vsyscall */
+
+#ifndef __arch_update_vsyscall
+static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+}
+#endif /* __arch_update_vsyscall */
+
+#ifndef __arch_sync_vdso_data
+static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata)
+{
+}
+#endif /* __arch_sync_vdso_data */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_GENERIC_VSYSCALL_H */
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
new file mode 100644
index 000000000000..a821deb8ecb2
--- /dev/null
+++ b/include/clocksource/hyperv_timer.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Definitions for the clocksource provided by the Hyper-V
+ * hypervisor to guest VMs, as described in the Hyper-V Top
+ * Level Functional Spec (TLFS).
+ *
+ * Copyright (C) 2019, Microsoft, Inc.
+ *
+ * Author: Michael Kelley <mikelley@microsoft.com>
+ */
+
+#ifndef __CLKSOURCE_HYPERV_TIMER_H
+#define __CLKSOURCE_HYPERV_TIMER_H
+
+#include <linux/clocksource.h>
+#include <linux/math64.h>
+#include <asm/mshyperv.h>
+
+#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
+#define HV_MIN_DELTA_TICKS 1
+
+/* Routines called by the VMbus driver */
+extern int hv_stimer_alloc(int sint);
+extern void hv_stimer_free(void);
+extern void hv_stimer_init(unsigned int cpu);
+extern void hv_stimer_cleanup(unsigned int cpu);
+extern void hv_stimer_global_cleanup(void);
+extern void hv_stimer0_isr(void);
+
+#if IS_ENABLED(CONFIG_HYPERV)
+extern struct clocksource *hyperv_cs;
+extern void hv_init_clocksource(void);
+#endif /* CONFIG_HYPERV */
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+
+static inline notrace u64
+hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
+{
+ u64 scale, offset;
+ u32 sequence;
+
+ /*
+ * The protocol for reading Hyper-V TSC page is specified in Hypervisor
+ * Top-Level Functional Specification ver. 3.0 and above. To get the
+ * reference time we must do the following:
+ * - READ ReferenceTscSequence
+ * A special '0' value indicates the time source is unreliable and we
+ * need to use something else. The currently published specification
+ * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
+ * instead of '0' as the special value, see commit c35b82ef0294.
+ * - ReferenceTime =
+ * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
+ * - READ ReferenceTscSequence again. In case its value has changed
+ * since our first reading we need to discard ReferenceTime and repeat
+ * the whole sequence as the hypervisor was updating the page in
+ * between.
+ */
+ do {
+ sequence = READ_ONCE(tsc_pg->tsc_sequence);
+ if (!sequence)
+ return U64_MAX;
+ /*
+ * Make sure we read sequence before we read other values from
+ * TSC page.
+ */
+ smp_rmb();
+
+ scale = READ_ONCE(tsc_pg->tsc_scale);
+ offset = READ_ONCE(tsc_pg->tsc_offset);
+ *cur_tsc = hv_get_raw_timer();
+
+ /*
+ * Make sure we read sequence after we read all other values
+ * from TSC page.
+ */
+ smp_rmb();
+
+ } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
+
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
+}
+
+static inline notrace u64
+hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+
+ return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
+}
+
+#else /* CONFIG_HYPERV_TSC_PAGE */
+static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+ return NULL;
+}
+
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
+{
+ return U64_MAX;
+}
+#endif /* CONFIG_HYPERV_TSCPAGE */
+
+#endif
diff --git a/include/clocksource/timer-davinci.h b/include/clocksource/timer-davinci.h
new file mode 100644
index 000000000000..1dcc1333fbc8
--- /dev/null
+++ b/include/clocksource/timer-davinci.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * TI DaVinci clocksource driver
+ *
+ * Copyright (C) 2019 Texas Instruments
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ */
+
+#ifndef __TIMER_DAVINCI_H__
+#define __TIMER_DAVINCI_H__
+
+#include <linux/clk.h>
+#include <linux/ioport.h>
+
+enum {
+ DAVINCI_TIMER_CLOCKEVENT_IRQ,
+ DAVINCI_TIMER_CLOCKSOURCE_IRQ,
+ DAVINCI_TIMER_NUM_IRQS,
+};
+
+/**
+ * struct davinci_timer_cfg - davinci clocksource driver configuration struct
+ * @reg: register range resource
+ * @irq: clockevent and clocksource interrupt resources
+ * @cmp_off: if set - it specifies the compare register used for clockevent
+ *
+ * Note: if the compare register is specified, the driver will use the bottom
+ * clock half for both clocksource and clockevent and the compare register
+ * to generate event irqs. The user must supply the correct compare register
+ * interrupt number.
+ *
+ * This is only used by da830 the DSP of which uses the top half. The timer
+ * driver still configures the top half to run in free-run mode.
+ */
+struct davinci_timer_cfg {
+ struct resource reg;
+ struct resource irq[DAVINCI_TIMER_NUM_IRQS];
+ unsigned int cmp_off;
+};
+
+int __init davinci_timer_register(struct clk *clk,
+ const struct davinci_timer_cfg *data);
+
+#endif /* __TIMER_DAVINCI_H__ */
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 61bb10490492..3c245b1859e7 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -317,21 +317,7 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
*
* Return: 0 if the cipher operation was successful; < 0 if an error occurred
*/
-static inline int crypto_aead_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct crypto_alg *alg = aead->base.__crt_alg;
- unsigned int cryptlen = req->cryptlen;
- int ret;
-
- crypto_stats_get(alg);
- if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
- ret = -ENOKEY;
- else
- ret = crypto_aead_alg(aead)->encrypt(req);
- crypto_stats_aead_encrypt(cryptlen, alg, ret);
- return ret;
-}
+int crypto_aead_encrypt(struct aead_request *req);
/**
* crypto_aead_decrypt() - decrypt ciphertext
@@ -355,23 +341,7 @@ static inline int crypto_aead_encrypt(struct aead_request *req)
* integrity of the ciphertext or the associated data was violated);
* < 0 if an error occurred.
*/
-static inline int crypto_aead_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct crypto_alg *alg = aead->base.__crt_alg;
- unsigned int cryptlen = req->cryptlen;
- int ret;
-
- crypto_stats_get(alg);
- if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
- ret = -ENOKEY;
- else if (req->cryptlen < crypto_aead_authsize(aead))
- ret = -EINVAL;
- else
- ret = crypto_aead_alg(aead)->decrypt(req);
- crypto_stats_aead_decrypt(cryptlen, alg, ret);
- return ret;
-}
+int crypto_aead_decrypt(struct aead_request *req);
/**
* DOC: Asynchronous AEAD Request Handle
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 743d626479ef..dc1106af95c3 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -189,7 +189,6 @@ void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
int crypto_enqueue_request(struct crypto_queue *queue,
struct crypto_async_request *request);
struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue);
-int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm);
static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
{
return queue->qlen;
@@ -371,12 +370,6 @@ static inline void *ablkcipher_request_ctx(struct ablkcipher_request *req)
return req->__ctx;
}
-static inline int ablkcipher_tfm_in_queue(struct crypto_queue *queue,
- struct crypto_ablkcipher *tfm)
-{
- return crypto_tfm_in_queue(queue, crypto_ablkcipher_tfm(tfm));
-}
-
static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb,
u32 type, u32 mask)
{
diff --git a/include/crypto/arc4.h b/include/crypto/arc4.h
index 5b2c24ab0139..f3c22fe01704 100644
--- a/include/crypto/arc4.h
+++ b/include/crypto/arc4.h
@@ -6,8 +6,18 @@
#ifndef _CRYPTO_ARC4_H
#define _CRYPTO_ARC4_H
+#include <linux/types.h>
+
#define ARC4_MIN_KEY_SIZE 1
#define ARC4_MAX_KEY_SIZE 256
#define ARC4_BLOCK_SIZE 1
+struct arc4_ctx {
+ u32 S[256];
+ u32 x, y;
+};
+
+int arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len);
+void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len);
+
#endif /* _CRYPTO_ARC4_H */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index 1fc70a69d550..d1e723c6a37d 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -41,7 +41,7 @@ static inline void chacha20_block(u32 *state, u8 *stream)
}
void hchacha_block(const u32 *in, u32 *out, int nrounds);
-void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv);
+void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keysize);
diff --git a/include/crypto/crypto_wq.h b/include/crypto/crypto_wq.h
deleted file mode 100644
index 23114746ac08..000000000000
--- a/include/crypto/crypto_wq.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef CRYPTO_WQ_H
-#define CRYPTO_WQ_H
-
-#include <linux/workqueue.h>
-
-extern struct workqueue_struct *kcrypto_wq;
-#endif
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 3fb581bf3b87..8c9af21efce1 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -129,6 +129,8 @@ struct drbg_state {
bool seeded; /* DRBG fully seeded? */
bool pr; /* Prediction resistance enabled? */
+ bool fips_primed; /* Continuous test primed? */
+ unsigned char *prev; /* FIPS 140-2 continuous test value */
struct work_struct seed_work; /* asynchronous seeding support */
struct crypto_rng *jent;
const struct drbg_state_ops *d_ops;
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 31e0662fa429..bfc9db7b100d 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -196,12 +196,6 @@ static inline struct ahash_request *ahash_dequeue_request(
return ahash_request_cast(crypto_dequeue_request(queue));
}
-static inline int ahash_tfm_in_queue(struct crypto_queue *queue,
- struct crypto_ahash *tfm)
-{
- return crypto_tfm_in_queue(queue, crypto_ahash_tfm(tfm));
-}
-
static inline void *crypto_shash_ctx(struct crypto_shash *tfm)
{
return crypto_tfm_ctx(&tfm->base);
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index fe0376d5a471..d68faa5759ad 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -200,6 +200,66 @@ static inline unsigned int crypto_skcipher_alg_max_keysize(
return alg->max_keysize;
}
+static inline unsigned int crypto_skcipher_alg_chunksize(
+ struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blocksize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_blocksize;
+
+ return alg->chunksize;
+}
+
+static inline unsigned int crypto_skcipher_alg_walksize(
+ struct skcipher_alg *alg)
+{
+ if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+ CRYPTO_ALG_TYPE_BLKCIPHER)
+ return alg->base.cra_blocksize;
+
+ if (alg->base.cra_ablkcipher.encrypt)
+ return alg->base.cra_blocksize;
+
+ return alg->walksize;
+}
+
+/**
+ * crypto_skcipher_chunksize() - obtain chunk size
+ * @tfm: cipher handle
+ *
+ * The block size is set to one for ciphers such as CTR. However,
+ * you still need to provide incremental updates in multiples of
+ * the underlying block size as the IV does not have sub-block
+ * granularity. This is known in this API as the chunk size.
+ *
+ * Return: chunk size in bytes
+ */
+static inline unsigned int crypto_skcipher_chunksize(
+ struct crypto_skcipher *tfm)
+{
+ return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm));
+}
+
+/**
+ * crypto_skcipher_walksize() - obtain walk size
+ * @tfm: cipher handle
+ *
+ * In some cases, algorithms can only perform optimally when operating on
+ * multiple blocks in parallel. This is reflected by the walksize, which
+ * must be a multiple of the chunksize (or equal if the concern does not
+ * apply)
+ *
+ * Return: walk size in bytes
+ */
+static inline unsigned int crypto_skcipher_walksize(
+ struct crypto_skcipher *tfm)
+{
+ return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm));
+}
+
/* Helpers for simple block cipher modes of operation */
struct skcipher_ctx_simple {
struct crypto_cipher *cipher; /* underlying block cipher */
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index ce7fa0973580..37c164234d97 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -288,66 +288,6 @@ static inline unsigned int crypto_sync_skcipher_ivsize(
return crypto_skcipher_ivsize(&tfm->base);
}
-static inline unsigned int crypto_skcipher_alg_chunksize(
- struct skcipher_alg *alg)
-{
- if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
- CRYPTO_ALG_TYPE_BLKCIPHER)
- return alg->base.cra_blocksize;
-
- if (alg->base.cra_ablkcipher.encrypt)
- return alg->base.cra_blocksize;
-
- return alg->chunksize;
-}
-
-static inline unsigned int crypto_skcipher_alg_walksize(
- struct skcipher_alg *alg)
-{
- if ((alg->base.cra_flags & CRYPTO_ALG_TYPE_MASK) ==
- CRYPTO_ALG_TYPE_BLKCIPHER)
- return alg->base.cra_blocksize;
-
- if (alg->base.cra_ablkcipher.encrypt)
- return alg->base.cra_blocksize;
-
- return alg->walksize;
-}
-
-/**
- * crypto_skcipher_chunksize() - obtain chunk size
- * @tfm: cipher handle
- *
- * The block size is set to one for ciphers such as CTR. However,
- * you still need to provide incremental updates in multiples of
- * the underlying block size as the IV does not have sub-block
- * granularity. This is known in this API as the chunk size.
- *
- * Return: chunk size in bytes
- */
-static inline unsigned int crypto_skcipher_chunksize(
- struct crypto_skcipher *tfm)
-{
- return crypto_skcipher_alg_chunksize(crypto_skcipher_alg(tfm));
-}
-
-/**
- * crypto_skcipher_walksize() - obtain walk size
- * @tfm: cipher handle
- *
- * In some cases, algorithms can only perform optimally when operating on
- * multiple blocks in parallel. This is reflected by the walksize, which
- * must be a multiple of the chunksize (or equal if the concern does not
- * apply)
- *
- * Return: walk size in bytes
- */
-static inline unsigned int crypto_skcipher_walksize(
- struct crypto_skcipher *tfm)
-{
- return crypto_skcipher_alg_walksize(crypto_skcipher_alg(tfm));
-}
-
/**
* crypto_skcipher_blocksize() - obtain block size of cipher
* @tfm: cipher handle
@@ -479,21 +419,7 @@ static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm(
*
* Return: 0 if the cipher operation was successful; < 0 if an error occurred
*/
-static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_alg *alg = tfm->base.__crt_alg;
- unsigned int cryptlen = req->cryptlen;
- int ret;
-
- crypto_stats_get(alg);
- if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
- ret = -ENOKEY;
- else
- ret = tfm->encrypt(req);
- crypto_stats_skcipher_encrypt(cryptlen, ret, alg);
- return ret;
-}
+int crypto_skcipher_encrypt(struct skcipher_request *req);
/**
* crypto_skcipher_decrypt() - decrypt ciphertext
@@ -506,21 +432,7 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
*
* Return: 0 if the cipher operation was successful; < 0 if an error occurred
*/
-static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_alg *alg = tfm->base.__crt_alg;
- unsigned int cryptlen = req->cryptlen;
- int ret;
-
- crypto_stats_get(alg);
- if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
- ret = -ENOKEY;
- else
- ret = tfm->decrypt(req);
- crypto_stats_skcipher_decrypt(cryptlen, ret, alg);
- return ret;
-}
+int crypto_skcipher_decrypt(struct skcipher_request *req);
/**
* DOC: Symmetric Key Cipher Request Handle
diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h
index 20485ca481f4..36b89a933310 100644
--- a/include/keys/request_key_auth-type.h
+++ b/include/keys/request_key_auth-type.h
@@ -14,6 +14,7 @@
* Authorisation record for request_key().
*/
struct request_key_auth {
+ struct rcu_head rcu;
struct key *target_key;
struct key *dest_keyring;
const struct cred *cred;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d315d86844e4..469be6844703 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/ioport.h> /* for struct resource */
+#include <linux/irqdomain.h>
#include <linux/resource_ext.h>
#include <linux/device.h>
#include <linux/property.h>
@@ -314,6 +315,12 @@ int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
void acpi_set_irq_model(enum acpi_irq_model_id model,
struct fwnode_handle *fwnode);
+struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
+ unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data);
+
#ifdef CONFIG_X86_IO_APIC
extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
#else
@@ -1303,6 +1310,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
#ifdef CONFIG_ACPI_PPTT
int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_package(unsigned int cpu);
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
#else
static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
@@ -1313,6 +1321,10 @@ static inline int find_acpi_cpu_topology_package(unsigned int cpu)
{
return -EINVAL;
}
+static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+ return -EINVAL;
+}
static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
{
return -EINVAL;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index d9bdc1a7f4e7..1cfe05ea1d89 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_scale);
struct sched_domain;
static inline
-unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu)
+unsigned long topology_get_cpu_scale(int cpu)
{
return per_cpu(cpu_scale, cpu);
}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3a4f2415bb7c..97d0925454df 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -182,6 +182,9 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
}
extern u32 audit_enabled;
+
+extern int audit_signal_info(int sig, struct task_struct *t);
+
#else /* CONFIG_AUDIT */
static inline __printf(4, 5)
void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
@@ -235,6 +238,12 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
}
#define audit_enabled AUDIT_OFF
+
+static inline int audit_signal_info(int sig, struct task_struct *t)
+{
+ return 0;
+}
+
#endif /* CONFIG_AUDIT */
#ifdef CONFIG_AUDIT_COMPAT_GENERIC
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 70e19bc6cc9f..46b92cd61d0c 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -17,6 +17,8 @@ enum cache_type {
CACHE_TYPE_UNIFIED = BIT(2),
};
+extern unsigned int coherency_max_size;
+
/**
* struct cacheinfo - represent a cache leaf node
* @id: This cache's id. It is unique among caches with the same (type, level).
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index b4e766e93f6e..c5311935239d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -624,7 +624,7 @@ struct cftype {
/*
* Control Group subsystem type.
- * See Documentation/cgroup-v1/cgroups.txt for details
+ * See Documentation/cgroup-v1/cgroups.rst for details
*/
struct cgroup_subsys {
struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0297f930a56e..3745ecdad925 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -131,6 +131,8 @@ void cgroup_free(struct task_struct *p);
int cgroup_init_early(void);
int cgroup_init(void);
+int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v);
+
/*
* Iteration helpers and macros.
*/
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 5c6062206760..87c211adf49e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -116,10 +116,10 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_ACPI_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
+ CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
- CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
CPUHP_AP_QCOM_TIMER_STARTING,
CPUHP_AP_TEGRA_TIMER_STARTING,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 9cf8f3ce0e50..19ea3a371d7b 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -49,7 +49,6 @@
#define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b
#define CRYPTO_ALG_TYPE_RNG 0x0000000c
#define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d
-#define CRYPTO_ALG_TYPE_DIGEST 0x0000000e
#define CRYPTO_ALG_TYPE_HASH 0x0000000e
#define CRYPTO_ALG_TYPE_SHASH 0x0000000e
#define CRYPTO_ALG_TYPE_AHASH 0x0000000f
@@ -323,6 +322,17 @@ struct cipher_alg {
void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
};
+/**
+ * struct compress_alg - compression/decompression algorithm
+ * @coa_compress: Compress a buffer of specified length, storing the resulting
+ * data in the specified buffer. Return the length of the
+ * compressed data in dlen.
+ * @coa_decompress: Decompress the source buffer, storing the uncompressed
+ * data in the specified buffer. The length of the data is
+ * returned in dlen.
+ *
+ * All fields are mandatory.
+ */
struct compress_alg {
int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen);
diff --git a/include/linux/device.h b/include/linux/device.h
index 848fc71c6ba6..4a295e324ac5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -704,7 +704,8 @@ extern unsigned long devm_get_free_pages(struct device *dev,
gfp_t gfp_mask, unsigned int order);
extern void devm_free_pages(struct device *dev, unsigned long addr);
-void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
+void __iomem *devm_ioremap_resource(struct device *dev,
+ const struct resource *res);
void __iomem *devm_of_iomap(struct device *dev,
struct device_node *node, int index,
diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index f2b3ae22e6b7..976cbbdb2832 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -26,7 +26,8 @@
#include <uapi/linux/dns_resolver.h>
-extern int dns_query(const char *type, const char *name, size_t namelen,
+struct net;
+extern int dns_query(struct net *net, const char *type, const char *name, size_t namelen,
const char *options, char **_result, time64_t *_expiry,
bool invalidate);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6ebc2098cfe1..f87fabea4a85 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -689,6 +689,7 @@ void efi_native_runtime_setup(void);
#define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
#define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
#define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa)
+#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
#define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
typedef struct {
@@ -996,6 +997,7 @@ extern struct efi {
unsigned long mem_attr_table; /* memory attributes table */
unsigned long rng_seed; /* UEFI firmware random seed */
unsigned long tpm_log; /* TPM2 Event Log table */
+ unsigned long tpm_final_log; /* TPM2 Final Events Log table */
unsigned long mem_reserve; /* Linux EFI memreserve table */
efi_get_time_t *get_time;
efi_set_time_t *set_time;
@@ -1706,12 +1708,20 @@ struct linux_efi_random_seed {
struct linux_efi_tpm_eventlog {
u32 size;
+ u32 final_events_preboot_size;
u8 version;
u8 log[];
};
extern int efi_tpm_eventlog_init(void);
+struct efi_tcg2_final_events_table {
+ u64 version;
+ u64 nr_events;
+ u8 events[];
+};
+extern int efi_tpm_final_log_size;
+
/*
* efi_runtime_service() function identifiers.
* "NONE" is used by efi_recover_from_page_fault() to check if the page
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index aa027f7bcb3e..73f8c3cb9588 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -89,7 +89,7 @@ static inline unsigned long em_pd_energy(struct em_perf_domain *pd,
* like schedutil.
*/
cpu = cpumask_first(to_cpumask(pd->cpus));
- scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ scale_cpu = arch_scale_cpu_capacity(cpu);
cs = &pd->table[pd->nr_cap_states - 1];
freq = map_util_freq(max_util, cs->frequency, scale_cpu);
diff --git a/include/linux/fmc-sdb.h b/include/linux/fmc-sdb.h
deleted file mode 100644
index bec899f0867c..000000000000
--- a/include/linux/fmc-sdb.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file is separate from sdb.h, because I want that one to remain
- * unchanged (as far as possible) from the official sdb distribution
- *
- * This file and associated functionality are a playground for me to
- * understand stuff which will later be implemented in more generic places.
- */
-#include <linux/sdb.h>
-
-/* This is the union of all currently defined types */
-union sdb_record {
- struct sdb_interconnect ic;
- struct sdb_device dev;
- struct sdb_bridge bridge;
- struct sdb_integration integr;
- struct sdb_empty empty;
- struct sdb_synthesis synthesis;
- struct sdb_repo_url repo_url;
-};
-
-struct fmc_device;
-
-/* Every sdb table is turned into this structure */
-struct sdb_array {
- int len;
- int level;
- unsigned long baseaddr;
- struct fmc_device *fmc; /* the device that hosts it */
- struct sdb_array *parent; /* NULL at root */
- union sdb_record *record; /* copies of the struct */
- struct sdb_array **subtree; /* only valid for bridge items */
-};
-
-extern int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address);
-extern void fmc_show_sdb_tree(const struct fmc_device *fmc);
-extern signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor,
- uint32_t device, unsigned long *sz);
-extern int fmc_free_sdb_tree(struct fmc_device *fmc);
diff --git a/include/linux/fmc.h b/include/linux/fmc.h
deleted file mode 100644
index b355f3806f3f..000000000000
--- a/include/linux/fmc.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2012 CERN (www.cern.ch)
- * Author: Alessandro Rubini <rubini@gnudd.com>
- *
- * This work is part of the White Rabbit project, a research effort led
- * by CERN, the European Institute for Nuclear Research.
- */
-#ifndef __LINUX_FMC_H__
-#define __LINUX_FMC_H__
-#include <linux/types.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-
-struct fmc_device;
-struct fmc_driver;
-
-/*
- * This bus abstraction is developed separately from drivers, so we need
- * to check the version of the data structures we receive.
- */
-
-#define FMC_MAJOR 3
-#define FMC_MINOR 0
-#define FMC_VERSION ((FMC_MAJOR << 16) | FMC_MINOR)
-#define __FMC_MAJOR(x) ((x) >> 16)
-#define __FMC_MINOR(x) ((x) & 0xffff)
-
-/*
- * The device identification, as defined by the IPMI FRU (Field Replaceable
- * Unit) includes four different strings to describe the device. Here we
- * only match the "Board Manufacturer" and the "Board Product Name",
- * ignoring the "Board Serial Number" and "Board Part Number". All 4 are
- * expected to be strings, so they are treated as zero-terminated C strings.
- * Unspecified string (NULL) means "any", so if both are unspecified this
- * is a catch-all driver. So null entries are allowed and we use array
- * and length. This is unlike pci and usb that use null-terminated arrays
- */
-struct fmc_fru_id {
- char *manufacturer;
- char *product_name;
-};
-
-/*
- * If the FPGA is already programmed (think Etherbone or the second
- * SVEC slot), we can match on SDB devices in the memory image. This
- * match uses an array of devices that must all be present, and the
- * match is based on vendor and device only. Further checks are expected
- * to happen in the probe function. Zero means "any" and catch-all is allowed.
- */
-struct fmc_sdb_one_id {
- uint64_t vendor;
- uint32_t device;
-};
-struct fmc_sdb_id {
- struct fmc_sdb_one_id *cores;
- int cores_nr;
-};
-
-struct fmc_device_id {
- struct fmc_fru_id *fru_id;
- int fru_id_nr;
- struct fmc_sdb_id *sdb_id;
- int sdb_id_nr;
-};
-
-/* This sizes the module_param_array used by generic module parameters */
-#define FMC_MAX_CARDS 32
-
-/* The driver is a pretty simple thing */
-struct fmc_driver {
- unsigned long version;
- struct device_driver driver;
- int (*probe)(struct fmc_device *);
- int (*remove)(struct fmc_device *);
- const struct fmc_device_id id_table;
- /* What follows is for generic module parameters */
- int busid_n;
- int busid_val[FMC_MAX_CARDS];
- int gw_n;
- char *gw_val[FMC_MAX_CARDS];
-};
-#define to_fmc_driver(x) container_of((x), struct fmc_driver, driver)
-
-/* These are the generic parameters, that drivers may instantiate */
-#define FMC_PARAM_BUSID(_d) \
- module_param_array_named(busid, _d.busid_val, int, &_d.busid_n, 0444)
-#define FMC_PARAM_GATEWARE(_d) \
- module_param_array_named(gateware, _d.gw_val, charp, &_d.gw_n, 0444)
-
-/*
- * Drivers may need to configure gpio pins in the carrier. To read input
- * (a very uncommon operation, and definitely not in the hot paths), just
- * configure one gpio only and get 0 or 1 as retval of the config method
- */
-struct fmc_gpio {
- char *carrier_name; /* name or NULL for virtual pins */
- int gpio;
- int _gpio; /* internal use by the carrier */
- int mode; /* GPIOF_DIR_OUT etc, from <linux/gpio.h> */
- int irqmode; /* IRQF_TRIGGER_LOW and so on */
-};
-
-/* The numbering of gpio pins allows access to raw pins or virtual roles */
-#define FMC_GPIO_RAW(x) (x) /* 4096 of them */
-#define __FMC_GPIO_IS_RAW(x) ((x) < 0x1000)
-#define FMC_GPIO_IRQ(x) ((x) + 0x1000) /* 256 of them */
-#define FMC_GPIO_LED(x) ((x) + 0x1100) /* 256 of them */
-#define FMC_GPIO_KEY(x) ((x) + 0x1200) /* 256 of them */
-#define FMC_GPIO_TP(x) ((x) + 0x1300) /* 256 of them */
-#define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */
-/* We may add SCL and SDA, or other roles if the need arises */
-
-/* GPIOF_DIR_IN etc are missing before 3.0. copy from <linux/gpio.h> */
-#ifndef GPIOF_DIR_IN
-# define GPIOF_DIR_OUT (0 << 0)
-# define GPIOF_DIR_IN (1 << 0)
-# define GPIOF_INIT_LOW (0 << 1)
-# define GPIOF_INIT_HIGH (1 << 1)
-#endif
-
-/*
- * The operations are offered by each carrier and should make driver
- * design completely independent of the carrier. Named GPIO pins may be
- * the exception.
- */
-struct fmc_operations {
- uint32_t (*read32)(struct fmc_device *fmc, int offset);
- void (*write32)(struct fmc_device *fmc, uint32_t value, int offset);
- int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv);
- int (*reprogram_raw)(struct fmc_device *f, struct fmc_driver *d,
- void *gw, unsigned long len);
- int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw);
- int (*irq_request)(struct fmc_device *fmc, irq_handler_t h,
- char *name, int flags);
- void (*irq_ack)(struct fmc_device *fmc);
- int (*irq_free)(struct fmc_device *fmc);
- int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio,
- int ngpio);
- int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l);
- int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l);
-};
-
-/* Prefer this helper rather than calling of fmc->reprogram directly */
-int fmc_reprogram_raw(struct fmc_device *fmc, struct fmc_driver *d,
- void *gw, unsigned long len, int sdb_entry);
-extern int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw,
- int sdb_entry);
-
-/*
- * The device reports all information needed to access hw.
- *
- * If we have eeprom_len and not contents, the core reads it.
- * Then, parsing of identifiers is done by the core which fills fmc_fru_id..
- * Similarly a device that must be matched based on SDB cores must
- * fill the entry point and the core will scan the bus (FIXME: sdb match)
- */
-struct fmc_device {
- unsigned long version;
- unsigned long flags;
- struct module *owner; /* char device must pin it */
- struct fmc_fru_id id; /* for EEPROM-based match */
- struct fmc_operations *op; /* carrier-provided */
- int irq; /* according to host bus. 0 == none */
- int eeprom_len; /* Usually 8kB, may be less */
- int eeprom_addr; /* 0x50, 0x52 etc */
- uint8_t *eeprom; /* Full contents or leading part */
- char *carrier_name; /* "SPEC" or similar, for special use */
- void *carrier_data; /* "struct spec *" or equivalent */
- __iomem void *fpga_base; /* May be NULL (Etherbone) */
- __iomem void *slot_base; /* Set by the driver */
- struct fmc_device **devarray; /* Allocated by the bus */
- int slot_id; /* Index in the slot array */
- int nr_slots; /* Number of slots in this carrier */
- unsigned long memlen; /* Used for the char device */
- struct device dev; /* For Linux use */
- struct device *hwdev; /* The underlying hardware device */
- unsigned long sdbfs_entry;
- struct sdb_array *sdb;
- uint32_t device_id; /* Filled by the device */
- char *mezzanine_name; /* Defaults to ``fmc'' */
- void *mezzanine_data;
-
- struct dentry *dbg_dir;
- struct dentry *dbg_sdb_dump;
-};
-#define to_fmc_device(x) container_of((x), struct fmc_device, dev)
-
-#define FMC_DEVICE_HAS_GOLDEN 1
-#define FMC_DEVICE_HAS_CUSTOM 2
-#define FMC_DEVICE_NO_MEZZANINE 4
-#define FMC_DEVICE_MATCH_SDB 8 /* fmc-core must scan sdb in fpga */
-
-/*
- * If fpga_base can be used, the carrier offers no readl/writel methods, and
- * this expands to a single, fast, I/O access.
- */
-static inline uint32_t fmc_readl(struct fmc_device *fmc, int offset)
-{
- if (unlikely(fmc->op->read32))
- return fmc->op->read32(fmc, offset);
- return readl(fmc->fpga_base + offset);
-}
-static inline void fmc_writel(struct fmc_device *fmc, uint32_t val, int off)
-{
- if (unlikely(fmc->op->write32))
- fmc->op->write32(fmc, val, off);
- else
- writel(val, fmc->fpga_base + off);
-}
-
-/* pci-like naming */
-static inline void *fmc_get_drvdata(const struct fmc_device *fmc)
-{
- return dev_get_drvdata(&fmc->dev);
-}
-
-static inline void fmc_set_drvdata(struct fmc_device *fmc, void *data)
-{
- dev_set_drvdata(&fmc->dev, data);
-}
-
-struct fmc_gateware {
- void *bitstream;
- unsigned long len;
-};
-
-/* The 5 access points */
-extern int fmc_driver_register(struct fmc_driver *drv);
-extern void fmc_driver_unregister(struct fmc_driver *drv);
-extern int fmc_device_register(struct fmc_device *tdev);
-extern int fmc_device_register_gw(struct fmc_device *tdev,
- struct fmc_gateware *gw);
-extern void fmc_device_unregister(struct fmc_device *tdev);
-
-/* Three more for device sets, all driven by the same FPGA */
-extern int fmc_device_register_n(struct fmc_device **devs, int n);
-extern int fmc_device_register_n_gw(struct fmc_device **devs, int n,
- struct fmc_gateware *gw);
-extern void fmc_device_unregister_n(struct fmc_device **devs, int n);
-
-/* Internal cross-calls between files; not exported to other modules */
-extern int fmc_match(struct device *dev, struct device_driver *drv);
-extern int fmc_fill_id_info(struct fmc_device *fmc);
-extern void fmc_free_id_info(struct fmc_device *fmc);
-extern void fmc_dump_eeprom(const struct fmc_device *fmc);
-
-/* helpers for FMC operations */
-extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h,
- char *name, int flags);
-extern void fmc_irq_free(struct fmc_device *fmc);
-extern void fmc_irq_ack(struct fmc_device *fmc);
-extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv);
-extern int fmc_gpio_config(struct fmc_device *fmc, struct fmc_gpio *gpio,
- int ngpio);
-extern int fmc_read_ee(struct fmc_device *fmc, int pos, void *d, int l);
-extern int fmc_write_ee(struct fmc_device *fmc, int pos, const void *d, int l);
-
-/* helpers for FMC operations */
-extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h,
- char *name, int flags);
-extern void fmc_irq_free(struct fmc_device *fmc);
-extern void fmc_irq_ack(struct fmc_device *fmc);
-extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv);
-
-#endif /* __LINUX_FMC_H__ */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index a1d273c96016..8d58386aadd5 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -18,6 +18,7 @@ struct seq_file;
struct gpio_device;
struct module;
enum gpiod_flags;
+enum gpio_lookup_flags;
#ifdef CONFIG_GPIOLIB
@@ -102,13 +103,6 @@ struct gpio_irq_chip {
unsigned int num_parents;
/**
- * @parent_irq:
- *
- * For use by gpiochip_set_cascaded_irqchip()
- */
- unsigned int parent_irq;
-
- /**
* @parents:
*
* A list of interrupt parents of a GPIO chip. This is owned by the
@@ -167,7 +161,7 @@ struct gpio_irq_chip {
*/
void (*irq_disable)(struct irq_data *data);
};
-#endif
+#endif /* CONFIG_GPIOLIB_IRQCHIP */
/**
* struct gpio_chip - abstract a GPIO controller
@@ -200,6 +194,8 @@ struct gpio_irq_chip {
* @dbg_show: optional routine to show contents in debugfs; default code
* will be used when this is omitted, but custom code can show extra
* state (such as pullup/pulldown configuration).
+ * @init_valid_mask: optional routine to initialize @valid_mask, to be used if
+ * not all GPIOs are valid.
* @base: identifies the first GPIO number handled by this chip;
* or, if negative during registration, requests dynamic ID allocation.
* DEPRECATION: providing anything non-negative and nailing the base
@@ -307,7 +303,7 @@ struct gpio_chip {
spinlock_t bgpio_lock;
unsigned long bgpio_data;
unsigned long bgpio_dir;
-#endif
+#endif /* CONFIG_GPIO_GENERIC */
#ifdef CONFIG_GPIOLIB_IRQCHIP
/*
@@ -322,7 +318,7 @@ struct gpio_chip {
* used to handle IRQs for most practical cases.
*/
struct gpio_irq_chip irq;
-#endif
+#endif /* CONFIG_GPIOLIB_IRQCHIP */
/**
* @need_valid_mask:
@@ -369,7 +365,7 @@ struct gpio_chip {
*/
int (*of_xlate)(struct gpio_chip *gc,
const struct of_phandle_args *gpiospec, u32 *flags);
-#endif
+#endif /* CONFIG_OF_GPIO */
};
extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -412,7 +408,7 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
})
#else
#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
-#endif
+#endif /* CONFIG_LOCKDEP */
static inline int gpiochip_add(struct gpio_chip *chip)
{
@@ -467,7 +463,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
#define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */
#define BGPIOF_NO_OUTPUT BIT(5) /* only input */
-#endif
+#endif /* CONFIG_GPIO_GENERIC */
#ifdef CONFIG_GPIOLIB_IRQCHIP
@@ -537,7 +533,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
handler, type, true,
&lock_key, &request_key);
}
-#else
+#else /* ! CONFIG_LOCKDEP */
static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
struct irq_chip *irqchip,
unsigned int first_irq,
@@ -588,7 +584,7 @@ int gpiochip_add_pingroup_range(struct gpio_chip *chip,
unsigned int gpio_offset, const char *pin_group);
void gpiochip_remove_pin_ranges(struct gpio_chip *chip);
-#else
+#else /* ! CONFIG_PINCTRL */
static inline int
gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
@@ -614,7 +610,8 @@ gpiochip_remove_pin_ranges(struct gpio_chip *chip)
struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
const char *label,
- enum gpiod_flags flags);
+ enum gpio_lookup_flags lflags,
+ enum gpiod_flags dflags);
void gpiochip_free_own_desc(struct gpio_desc *desc);
void devprop_gpiochip_set_names(struct gpio_chip *chip,
diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h
index 5c6efd394cb0..39b888c40b39 100644
--- a/include/linux/gpio/gpio-reg.h
+++ b/include/linux/gpio/gpio-reg.h
@@ -11,4 +11,4 @@ struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg,
int gpio_reg_resume(struct gpio_chip *gc);
-#endif
+#endif /* GPIO_REG_H */
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index 35f299d1f6a7..1ebe5be05d5f 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -97,7 +97,7 @@ void gpiod_add_lookup_table(struct gpiod_lookup_table *table);
void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n);
void gpiod_remove_lookup_table(struct gpiod_lookup_table *table);
void gpiod_add_hogs(struct gpiod_hog *hogs);
-#else
+#else /* ! CONFIG_GPIOLIB */
static inline
void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {}
static inline
@@ -105,6 +105,6 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {}
static inline
void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {}
static inline void gpiod_add_hogs(struct gpiod_hog *hogs) {}
-#endif
+#endif /* CONFIG_GPIOLIB */
#endif /* __LINUX_GPIO_MACHINE_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2e8957eac4d4..4971100a8cab 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -12,8 +12,8 @@
#ifndef _LINUX_HRTIMER_H
#define _LINUX_HRTIMER_H
+#include <linux/hrtimer_defs.h>
#include <linux/rbtree.h>
-#include <linux/ktime.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu.h>
@@ -298,26 +298,12 @@ struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
-/*
- * The resolution of the clocks. The resolution value is returned in
- * the clock_getres() system call to give application programmers an
- * idea of the (in)accuracy of timers. Timer values are rounded up to
- * this resolution values.
- */
-# define HIGH_RES_NSEC 1
-# define KTIME_HIGH_RES (HIGH_RES_NSEC)
-# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
-# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
-
extern void clock_was_set_delayed(void);
extern unsigned int hrtimer_resolution;
#else
-# define MONOTONIC_RES_NSEC LOW_RES_NSEC
-# define KTIME_MONOTONIC_RES KTIME_LOW_RES
-
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
static inline void clock_was_set_delayed(void) { }
diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
new file mode 100644
index 000000000000..2d3e3c5fb946
--- /dev/null
+++ b/include/linux/hrtimer_defs.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HRTIMER_DEFS_H
+#define _LINUX_HRTIMER_DEFS_H
+
+#include <linux/ktime.h>
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * The resolution of the clocks. The resolution value is returned in
+ * the clock_getres() system call to give application programmers an
+ * idea of the (in)accuracy of timers. Timer values are rounded up to
+ * this resolution values.
+ */
+# define HIGH_RES_NSEC 1
+# define KTIME_HIGH_RES (HIGH_RES_NSEC)
+# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
+# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
+
+#else
+
+# define MONOTONIC_RES_NSEC LOW_RES_NSEC
+# define KTIME_MONOTONIC_RES KTIME_LOW_RES
+
+#endif
+
+#endif
diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index f13fd8b1dd79..1f08fa8d69d2 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -48,7 +48,7 @@ struct i3c_i2c_dev_desc {
#define I3C_LVR_I2C_INDEX(x) ((x) << 5)
#define I3C_LVR_I2C_FM_MODE BIT(4)
-#define I2C_MAX_ADDR GENMASK(9, 0)
+#define I2C_MAX_ADDR GENMASK(6, 0)
/**
* struct i2c_dev_boardinfo - I2C device board information
@@ -250,12 +250,17 @@ struct i3c_device {
* the bus. The only impact in this mode is that the
* high SCL pulse has to stay below 50ns to trick I2C
* devices when transmitting I3C frames
+ * @I3C_BUS_MODE_MIXED_LIMITED: I2C devices without 50ns spike filter are
+ * present on the bus. However they allow
+ * compliance up to the maximum SDR SCL clock
+ * frequency.
* @I3C_BUS_MODE_MIXED_SLOW: I2C devices without 50ns spike filter are present
* on the bus
*/
enum i3c_bus_mode {
I3C_BUS_MODE_PURE,
I3C_BUS_MODE_MIXED_FAST,
+ I3C_BUS_MODE_MIXED_LIMITED,
I3C_BUS_MODE_MIXED_SLOW,
};
@@ -390,8 +395,6 @@ struct i3c_bus {
* and i2c_put_dma_safe_msg_buf() helpers provided by the I2C
* framework.
* This method is mandatory.
- * @i2c_funcs: expose the supported I2C functionalities.
- * This method is mandatory.
* @request_ibi: attach an IBI handler to an I3C device. This implies defining
* an IBI handler and the constraints of the IBI (maximum payload
* length and number of pre-allocated slots).
@@ -437,7 +440,6 @@ struct i3c_master_controller_ops {
void (*detach_i2c_dev)(struct i2c_dev_desc *dev);
int (*i2c_xfers)(struct i2c_dev_desc *dev,
const struct i2c_msg *xfers, int nxfers);
- u32 (*i2c_funcs)(struct i3c_master_controller *master);
int (*request_ibi)(struct i3c_dev_desc *dev,
const struct i3c_ibi_setup *req);
void (*free_ibi)(struct i3c_dev_desc *dev);
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 00036d2f57c3..a20ad398d260 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -23,6 +23,7 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
enum kernel_read_file_id id);
extern void ima_post_path_mknod(struct dentry *dentry);
+extern void ima_kexec_cmdline(const void *buf, int size);
#ifdef CONFIG_IMA_KEXEC
extern void ima_add_kexec_buffer(struct kimage *image);
@@ -89,6 +90,7 @@ static inline void ima_post_path_mknod(struct dentry *dentry)
return;
}
+static inline void ima_kexec_cmdline(const void *buf, int size) {}
#endif /* CONFIG_IMA */
#ifndef CONFIG_IMA_KEXEC
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index 626283858563..b9850f5f1906 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -36,4 +36,9 @@ struct gic_kvm_info {
const struct gic_kvm_info *gic_get_kvm_info(void);
+struct irq_domain;
+struct fwnode_handle;
+int gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent);
+
#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 316087da1d09..5686711b0f40 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -157,9 +157,6 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq);
*/
void gic_init(void __iomem *dist , void __iomem *cpu);
-int gicv2m_init(struct fwnode_handle *parent_handle,
- struct irq_domain *parent);
-
void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
int gic_get_cpu_id(unsigned int cpu);
void gic_migrate_target(unsigned int new_cpu_id);
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 3e113a1fa0f1..3526c0aee954 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -215,6 +215,9 @@ extern void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type);
extern void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type);
+extern bool arch_jump_label_transform_queue(struct jump_entry *entry,
+ enum jump_label_type type);
+extern void arch_jump_label_transform_apply(void);
extern int jump_label_text_reserved(void *start, void *end);
extern void static_key_slow_inc(struct static_key *key);
extern void static_key_slow_dec(struct static_key *key);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 331cab70db09..4ded94bcf274 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -70,6 +70,9 @@ struct key_type {
*/
size_t def_datalen;
+ unsigned int flags;
+#define KEY_TYPE_NET_DOMAIN 0x00000001 /* Keys of this type have a net namespace domain */
+
/* vet a description */
int (*vet_description)(const char *description);
diff --git a/include/linux/key.h b/include/linux/key.h
index 1c8b88b455ef..6fef6684501f 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -27,48 +27,14 @@
/* key handle serial number */
typedef int32_t key_serial_t;
-/* key handle permissions mask */
-typedef uint32_t key_perm_t;
-
struct key;
+struct net;
#ifdef CONFIG_KEYS
-#undef KEY_DEBUGGING
+#include <linux/keyctl.h>
-#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */
-#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */
-#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */
-#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */
-#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */
-#define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */
-#define KEY_POS_ALL 0x3f000000
-
-#define KEY_USR_VIEW 0x00010000 /* user permissions... */
-#define KEY_USR_READ 0x00020000
-#define KEY_USR_WRITE 0x00040000
-#define KEY_USR_SEARCH 0x00080000
-#define KEY_USR_LINK 0x00100000
-#define KEY_USR_SETATTR 0x00200000
-#define KEY_USR_ALL 0x003f0000
-
-#define KEY_GRP_VIEW 0x00000100 /* group permissions... */
-#define KEY_GRP_READ 0x00000200
-#define KEY_GRP_WRITE 0x00000400
-#define KEY_GRP_SEARCH 0x00000800
-#define KEY_GRP_LINK 0x00001000
-#define KEY_GRP_SETATTR 0x00002000
-#define KEY_GRP_ALL 0x00003f00
-
-#define KEY_OTH_VIEW 0x00000001 /* third party permissions... */
-#define KEY_OTH_READ 0x00000002
-#define KEY_OTH_WRITE 0x00000004
-#define KEY_OTH_SEARCH 0x00000008
-#define KEY_OTH_LINK 0x00000010
-#define KEY_OTH_SETATTR 0x00000020
-#define KEY_OTH_ALL 0x0000003f
-
-#define KEY_PERM_UNDEF 0xffffffff
+#undef KEY_DEBUGGING
struct seq_file;
struct user_struct;
@@ -77,13 +43,34 @@ struct cred;
struct key_type;
struct key_owner;
+struct key_tag;
struct keyring_list;
struct keyring_name;
+struct key_tag {
+ struct rcu_head rcu;
+ refcount_t usage;
+ bool removed; /* T when subject removed */
+};
+
struct keyring_index_key {
+ /* [!] If this structure is altered, the union in struct key must change too! */
+ unsigned long hash; /* Hash value */
+ union {
+ struct {
+#ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */
+ u8 desc_len;
+ char desc[sizeof(long) - 1]; /* First few chars of description */
+#else
+ char desc[sizeof(long) - 1]; /* First few chars of description */
+ u8 desc_len;
+#endif
+ };
+ unsigned long x;
+ };
struct key_type *type;
+ struct key_tag *domain_tag; /* Domain of operation */
const char *description;
- size_t desc_len;
};
union key_payload {
@@ -91,6 +78,36 @@ union key_payload {
void *data[4];
};
+struct key_ace {
+ unsigned int type;
+ unsigned int perm;
+ union {
+ kuid_t uid;
+ kgid_t gid;
+ unsigned int subject_id;
+ };
+};
+
+struct key_acl {
+ refcount_t usage;
+ unsigned short nr_ace;
+ bool possessor_viewable;
+ struct rcu_head rcu;
+ struct key_ace aces[];
+};
+
+#define KEY_POSSESSOR_ACE(perms) { \
+ .type = KEY_ACE_SUBJ_STANDARD, \
+ .perm = perms, \
+ .subject_id = KEY_ACE_POSSESSOR \
+ }
+
+#define KEY_OWNER_ACE(perms) { \
+ .type = KEY_ACE_SUBJ_STANDARD, \
+ .perm = perms, \
+ .subject_id = KEY_ACE_OWNER \
+ }
+
/*****************************************************************************/
/*
* key reference with possession attribute handling
@@ -157,6 +174,7 @@ struct key {
struct rw_semaphore sem; /* change vs change sem */
struct key_user *user; /* owner of this key */
void *security; /* security data for this key */
+ struct key_acl __rcu *acl;
union {
time64_t expiry; /* time at which key expires (or 0) */
time64_t revoked_at; /* time at which key was revoked */
@@ -164,7 +182,6 @@ struct key {
time64_t last_used_at; /* last time used for LRU keyring discard */
kuid_t uid;
kgid_t gid;
- key_perm_t perm; /* access permissions */
unsigned short quotalen; /* length added to quota */
unsigned short datalen; /* payload data length
* - may not match RCU dereferenced payload
@@ -188,6 +205,7 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
+#define KEY_FLAG_HAS_ACL 10 /* Set if KEYCTL_SETACL called on key */
/* the key type and key description string
* - the desc is used to match a key against search criteria
@@ -197,7 +215,10 @@ struct key {
union {
struct keyring_index_key index_key;
struct {
+ unsigned long hash;
+ unsigned long len_desc;
struct key_type *type; /* type of key */
+ struct key_tag *domain_tag; /* Domain of operation */
char *description;
};
};
@@ -233,7 +254,7 @@ extern struct key *key_alloc(struct key_type *type,
const char *desc,
kuid_t uid, kgid_t gid,
const struct cred *cred,
- key_perm_t perm,
+ struct key_acl *acl,
unsigned long flags,
struct key_restriction *restrict_link);
@@ -248,6 +269,8 @@ extern struct key *key_alloc(struct key_type *type,
extern void key_revoke(struct key *key);
extern void key_invalidate(struct key *key);
extern void key_put(struct key *key);
+extern bool key_put_tag(struct key_tag *tag);
+extern void key_remove_domain(struct key_tag *domain_tag);
static inline struct key *__key_get(struct key *key)
{
@@ -265,26 +288,61 @@ static inline void key_ref_put(key_ref_t key_ref)
key_put(key_ref_to_ptr(key_ref));
}
-extern struct key *request_key(struct key_type *type,
- const char *description,
- const char *callout_info);
+extern struct key *request_key_tag(struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag,
+ const char *callout_info,
+ struct key_acl *acl);
+
+extern struct key *request_key_rcu(struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag);
extern struct key *request_key_with_auxdata(struct key_type *type,
const char *description,
+ struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux);
+ void *aux,
+ struct key_acl *acl);
-extern struct key *request_key_async(struct key_type *type,
- const char *description,
- const void *callout_info,
- size_t callout_len);
+/**
+ * request_key - Request a key and wait for construction
+ * @type: Type of key.
+ * @description: The searchable description of the key.
+ * @callout_info: The data to pass to the instantiation upcall (or NULL).
+ * @acl: The ACL to attach to a new key (or NULL).
+ *
+ * As for request_key_tag(), but with the default global domain tag.
+ */
+static inline struct key *request_key(struct key_type *type,
+ const char *description,
+ const char *callout_info,
+ struct key_acl *acl)
+{
+ return request_key_tag(type, description, NULL, callout_info, acl);
+}
-extern struct key *request_key_async_with_auxdata(struct key_type *type,
- const char *description,
- const void *callout_info,
- size_t callout_len,
- void *aux);
+#ifdef CONFIG_NET
+/*
+ * request_key_net - Request a key for a net namespace and wait for construction
+ * @type: Type of key.
+ * @description: The searchable description of the key.
+ * @net: The network namespace that is the key's domain of operation.
+ * @callout_info: The data to pass to the instantiation upcall (or NULL).
+ * @acl: The ACL to attach to a new key (or NULL).
+ *
+ * As for request_key() except that it does not add the returned key to a
+ * keyring if found, new keys are always allocated in the user's quota, the
+ * callout_info must be a NUL-terminated string and no auxiliary data can be
+ * passed. Only keys that operate the specified network namespace are used.
+ *
+ * Furthermore, it then works as wait_for_key_construction() to wait for the
+ * completion of keys undergoing construction with a non-interruptible wait.
+ */
+#define request_key_net(type, description, net, callout_info, acl) \
+ request_key_tag(type, description, net->key_domain, callout_info, acl);
+#endif /* CONFIG_NET */
extern int wait_for_key_construction(struct key *key, bool intr);
@@ -295,7 +353,7 @@ extern key_ref_t key_create_or_update(key_ref_t keyring,
const char *description,
const void *payload,
size_t plen,
- key_perm_t perm,
+ struct key_acl *acl,
unsigned long flags);
extern int key_update(key_ref_t key,
@@ -305,12 +363,17 @@ extern int key_update(key_ref_t key,
extern int key_link(struct key *keyring,
struct key *key);
+extern int key_move(struct key *key,
+ struct key *from_keyring,
+ struct key *to_keyring,
+ unsigned int flags);
+
extern int key_unlink(struct key *keyring,
struct key *key);
extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
const struct cred *cred,
- key_perm_t perm,
+ struct key_acl *acl,
unsigned long flags,
struct key_restriction *restrict_link,
struct key *dest);
@@ -324,7 +387,8 @@ extern int keyring_clear(struct key *keyring);
extern key_ref_t keyring_search(key_ref_t keyring,
struct key_type *type,
- const char *description);
+ const char *description,
+ bool recurse);
extern int keyring_add_key(struct key *keyring,
struct key *key);
@@ -342,18 +406,29 @@ static inline key_serial_t key_serial(const struct key *key)
extern void key_set_timeout(struct key *, unsigned);
extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
- key_perm_t perm);
+ u32 desired_perm);
+extern void key_free_user_ns(struct user_namespace *);
/*
* The permissions required on a key that we're looking up.
*/
-#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */
-#define KEY_NEED_READ 0x02 /* Require permission to read content */
-#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */
-#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */
-#define KEY_NEED_LINK 0x10 /* Require permission to link */
-#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */
-#define KEY_NEED_ALL 0x3f /* All the above permissions */
+#define KEY_NEED_VIEW 0x001 /* Require permission to view attributes */
+#define KEY_NEED_READ 0x002 /* Require permission to read content */
+#define KEY_NEED_WRITE 0x004 /* Require permission to update / modify */
+#define KEY_NEED_SEARCH 0x008 /* Require permission to search (keyring) or find (key) */
+#define KEY_NEED_LINK 0x010 /* Require permission to link */
+#define KEY_NEED_SETSEC 0x020 /* Require permission to set owner, group, ACL */
+#define KEY_NEED_INVAL 0x040 /* Require permission to invalidate key */
+#define KEY_NEED_REVOKE 0x080 /* Require permission to revoke key */
+#define KEY_NEED_JOIN 0x100 /* Require permission to join keyring as session */
+#define KEY_NEED_CLEAR 0x200 /* Require permission to clear a keyring */
+#define KEY_NEED_ALL 0x3ff
+
+#define OLD_KEY_NEED_SETATTR 0x20 /* Used to be Require permission to change attributes */
+
+extern struct key_acl internal_key_acl;
+extern struct key_acl internal_keyring_acl;
+extern struct key_acl internal_writable_keyring_acl;
static inline short key_read_state(const struct key *key)
{
@@ -397,8 +472,8 @@ extern struct ctl_table key_sysctls[];
* the userspace interface
*/
extern int install_thread_keyring_to_cred(struct cred *cred);
-extern void key_fsuid_changed(struct task_struct *tsk);
-extern void key_fsgid_changed(struct task_struct *tsk);
+extern void key_fsuid_changed(struct cred *new_cred);
+extern void key_fsgid_changed(struct cred *new_cred);
extern void key_init(void);
#else /* CONFIG_KEYS */
@@ -413,9 +488,11 @@ extern void key_init(void);
#define make_key_ref(k, p) NULL
#define key_ref_to_ptr(k) NULL
#define is_key_possessed(k) 0
-#define key_fsuid_changed(t) do { } while(0)
-#define key_fsgid_changed(t) do { } while(0)
+#define key_fsuid_changed(c) do { } while(0)
+#define key_fsgid_changed(c) do { } while(0)
#define key_init() do { } while(0)
+#define key_free_user_ns(ns) do { } while(0)
+#define key_remove_domain(d) do { } while(0)
#endif /* CONFIG_KEYS */
#endif /* __KERNEL__ */
diff --git a/include/linux/leds-ti-lmu-common.h b/include/linux/leds-ti-lmu-common.h
new file mode 100644
index 000000000000..5eb111f38803
--- /dev/null
+++ b/include/linux/leds-ti-lmu-common.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// TI LMU Common Core
+// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/
+
+#ifndef _TI_LMU_COMMON_H_
+#define _TI_LMU_COMMON_H_
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <uapi/linux/uleds.h>
+
+#define LMU_11BIT_LSB_MASK (BIT(0) | BIT(1) | BIT(2))
+#define LMU_11BIT_MSB_SHIFT 3
+
+#define MAX_BRIGHTNESS_8BIT 255
+#define MAX_BRIGHTNESS_11BIT 2047
+
+struct ti_lmu_bank {
+ struct regmap *regmap;
+
+ int max_brightness;
+
+ u8 lsb_brightness_reg;
+ u8 msb_brightness_reg;
+
+ u8 runtime_ramp_reg;
+ u32 ramp_up_usec;
+ u32 ramp_down_usec;
+};
+
+int ti_lmu_common_set_brightness(struct ti_lmu_bank *lmu_bank, int brightness);
+
+int ti_lmu_common_set_ramp(struct ti_lmu_bank *lmu_bank);
+
+int ti_lmu_common_get_ramp_params(struct device *dev,
+ struct fwnode_handle *child,
+ struct ti_lmu_bank *lmu_data);
+
+int ti_lmu_common_get_brt_res(struct device *dev, struct fwnode_handle *child,
+ struct ti_lmu_bank *lmu_data);
+
+#endif /* _TI_LMU_COMMON_H_ */
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6e2377e6c1d6..57baa27f238c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -203,11 +203,17 @@ struct lock_list {
struct lock_list *parent;
};
-/*
- * We record lock dependency chains, so that we can cache them:
+/**
+ * struct lock_chain - lock dependency chain record
+ *
+ * @irq_context: the same as irq_context in held_lock below
+ * @depth: the number of held locks in this chain
+ * @base: the index in chain_hlocks for this chain
+ * @entry: the collided lock chains in lock_chain hash list
+ * @chain_key: the hash key of this lock_chain
*/
struct lock_chain {
- /* see BUILD_BUG_ON()s in lookup_chain_cache() */
+ /* see BUILD_BUG_ON()s in add_chain_cache() */
unsigned int irq_context : 2,
depth : 6,
base : 24;
@@ -217,12 +223,8 @@ struct lock_chain {
};
#define MAX_LOCKDEP_KEYS_BITS 13
-/*
- * Subtract one because we offset hlock->class_idx by 1 in order
- * to make 0 mean no class. This avoids overflowing the class_idx
- * bitfield and hitting the BUG in hlock_class().
- */
-#define MAX_LOCKDEP_KEYS ((1UL << MAX_LOCKDEP_KEYS_BITS) - 1)
+#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
+#define INITIAL_CHAIN_KEY -1
struct held_lock {
/*
@@ -247,6 +249,11 @@ struct held_lock {
u64 waittime_stamp;
u64 holdtime_stamp;
#endif
+ /*
+ * class_idx is zero-indexed; it points to the element in
+ * lock_classes this held lock instance belongs to. class_idx is in
+ * the range from 0 to (MAX_LOCKDEP_KEYS-1) inclusive.
+ */
unsigned int class_idx:MAX_LOCKDEP_KEYS_BITS;
/*
* The lock-stack is unified in that the lock chains of interrupt
@@ -281,6 +288,8 @@ extern void lockdep_free_key_range(void *start, unsigned long size);
extern asmlinkage void lockdep_sys_exit(void);
extern void lockdep_set_selftest_task(struct task_struct *task);
+extern void lockdep_init_task(struct task_struct *task);
+
extern void lockdep_off(void);
extern void lockdep_on(void);
@@ -385,7 +394,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
WARN_ON(debug_locks && !lockdep_is_held(l)); \
} while (0)
-#define lockdep_assert_held_exclusive(l) do { \
+#define lockdep_assert_held_write(l) do { \
WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \
} while (0)
@@ -405,6 +414,10 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
#else /* !CONFIG_LOCKDEP */
+static inline void lockdep_init_task(struct task_struct *task)
+{
+}
+
static inline void lockdep_off(void)
{
}
@@ -466,7 +479,7 @@ struct lockdep_map { };
#define lockdep_is_held_type(l, r) (1)
#define lockdep_assert_held(l) do { (void)(l); } while (0)
-#define lockdep_assert_held_exclusive(l) do { (void)(l); } while (0)
+#define lockdep_assert_held_write(l) do { (void)(l); } while (0)
#define lockdep_assert_held_read(l) do { (void)(l); } while (0)
#define lockdep_assert_held_once(l) do { (void)(l); } while (0)
@@ -497,7 +510,6 @@ enum xhlock_context_t {
{ .name = (_name), .key = (void *)(_key), }
static inline void lockdep_invariant_state(bool force) {}
-static inline void lockdep_init_task(struct task_struct *task) {}
static inline void lockdep_free_task(struct task_struct *task) {}
#ifdef CONFIG_LOCK_STAT
@@ -632,11 +644,18 @@ do { \
"IRQs not disabled as expected\n"); \
} while (0)
+#define lockdep_assert_in_irq() do { \
+ WARN_ONCE(debug_locks && !current->lockdep_recursion && \
+ !current->hardirq_context, \
+ "Not in hardirq as expected\n"); \
+ } while (0)
+
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
+# define lockdep_assert_in_irq() do { } while (0)
#endif
#ifdef CONFIG_LOCKDEP
diff --git a/include/linux/log2.h b/include/linux/log2.h
index 1aec01365ed4..83a4a3ca3e8a 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -220,4 +220,38 @@ int __order_base_2(unsigned long n)
ilog2((n) - 1) + 1) : \
__order_base_2(n) \
)
+
+static inline __attribute__((const))
+int __bits_per(unsigned long n)
+{
+ if (n < 2)
+ return 1;
+ if (is_power_of_2(n))
+ return order_base_2(n) + 1;
+ return order_base_2(n);
+}
+
+/**
+ * bits_per - calculate the number of bits required for the argument
+ * @n: parameter
+ *
+ * This is constant-capable and can be used for compile time
+ * initializations, e.g bitfields.
+ *
+ * The first few values calculated by this routine:
+ * bf(0) = 1
+ * bf(1) = 1
+ * bf(2) = 2
+ * bf(3) = 2
+ * bf(4) = 3
+ * ... and so on.
+ */
+#define bits_per(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ ((n) == 0 || (n) == 1) \
+ ? 1 : ilog2(n) + 1 \
+ ) : \
+ __bits_per(n) \
+)
#endif /* _LINUX_LOG2_H */
diff --git a/include/linux/mfd/da9062/registers.h b/include/linux/mfd/da9062/registers.h
index fe04b708742b..2906bf6160fb 100644
--- a/include/linux/mfd/da9062/registers.h
+++ b/include/linux/mfd/da9062/registers.h
@@ -797,6 +797,9 @@
#define DA9062AA_BUCK3_SL_A_SHIFT 7
#define DA9062AA_BUCK3_SL_A_MASK BIT(7)
+/* DA9062AA_VLDO[1-4]_A common */
+#define DA9062AA_VLDO_A_MIN_SEL 2
+
/* DA9062AA_VLDO1_A = 0x0A9 */
#define DA9062AA_VLDO1_A_SHIFT 0
#define DA9062AA_VLDO1_A_MASK 0x3f
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 77c566ab96ab..085edbf7601b 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -11,55 +11,6 @@
#ifndef __MFD_DA9063_PDATA_H__
#define __MFD_DA9063_PDATA_H__
-#include <linux/regulator/machine.h>
-
-/*
- * Regulator configuration
- */
-/* DA9063 and DA9063L regulator IDs */
-enum {
- /* BUCKs */
- DA9063_ID_BCORE1,
- DA9063_ID_BCORE2,
- DA9063_ID_BPRO,
- DA9063_ID_BMEM,
- DA9063_ID_BIO,
- DA9063_ID_BPERI,
-
- /* BCORE1 and BCORE2 in merged mode */
- DA9063_ID_BCORES_MERGED,
- /* BMEM and BIO in merged mode */
- DA9063_ID_BMEM_BIO_MERGED,
- /* When two BUCKs are merged, they cannot be reused separately */
-
- /* LDOs on both DA9063 and DA9063L */
- DA9063_ID_LDO3,
- DA9063_ID_LDO7,
- DA9063_ID_LDO8,
- DA9063_ID_LDO9,
- DA9063_ID_LDO11,
-
- /* DA9063-only LDOs */
- DA9063_ID_LDO1,
- DA9063_ID_LDO2,
- DA9063_ID_LDO4,
- DA9063_ID_LDO5,
- DA9063_ID_LDO6,
- DA9063_ID_LDO10,
-};
-
-/* Regulators platform data */
-struct da9063_regulator_data {
- int id;
- struct regulator_init_data *initdata;
-};
-
-struct da9063_regulators_pdata {
- unsigned n_regulators;
- struct da9063_regulator_data *regulator_data;
-};
-
-
/*
* RGB LED configuration
*/
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 3ca17eb89aa2..f1631a39acfc 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -20,6 +20,7 @@
#define MIN_850_MV 850000
#define MIN_800_MV 800000
#define MIN_750_MV 750000
+#define MIN_650_MV 650000
#define MIN_600_MV 600000
#define MIN_500_MV 500000
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index 6e7668a389a1..4805c90609c4 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -170,7 +170,9 @@ enum s2mps11_regulators {
#define S2MPS11_ENABLE_MASK (0x03 << S2MPS11_ENABLE_SHIFT)
#define S2MPS11_ENABLE_SHIFT 0x06
#define S2MPS11_LDO_N_VOLTAGES (S2MPS11_LDO_VSEL_MASK + 1)
-#define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1)
+#define S2MPS11_BUCK12346_N_VOLTAGES 153
+#define S2MPS11_BUCK5_N_VOLTAGES 216
+#define S2MPS11_BUCK7810_N_VOLTAGES 225
#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1)
#define S2MPS11_RAMP_DELAY 25000 /* uV/us */
@@ -188,4 +190,9 @@ enum s2mps11_regulators {
#define S2MPS11_BUCK6_RAMP_EN_SHIFT 0
#define S2MPS11_PMIC_EN_SHIFT 6
+/*
+ * Bits for "enable suspend" (On/Off controlled by PWREN)
+ * are the same as in S2MPS14: S2MPS14_ENABLE_SUSPEND
+ */
+
#endif /* __LINUX_MFD_S2MPS11_H */
diff --git a/include/linux/mfd/ti-lmu-register.h b/include/linux/mfd/ti-lmu-register.h
index 222cb14c5b0f..116a749e0302 100644
--- a/include/linux/mfd/ti-lmu-register.h
+++ b/include/linux/mfd/ti-lmu-register.h
@@ -187,47 +187,26 @@
#define LM3695_MAX_REG 0x14
-/* LM3697 */
-#define LM3697_REG_HVLED_OUTPUT_CFG 0x10
-#define LM3697_HVLED1_CFG_MASK BIT(0)
-#define LM3697_HVLED2_CFG_MASK BIT(1)
-#define LM3697_HVLED3_CFG_MASK BIT(2)
-#define LM3697_HVLED1_CFG_SHIFT 0
-#define LM3697_HVLED2_CFG_SHIFT 1
-#define LM3697_HVLED3_CFG_SHIFT 2
+/* LM36274 */
+#define LM36274_REG_REV 0x01
+#define LM36274_REG_BL_CFG_1 0x02
+#define LM36274_REG_BL_CFG_2 0x03
+#define LM36274_REG_BRT_LSB 0x04
+#define LM36274_REG_BRT_MSB 0x05
+#define LM36274_REG_BL_EN 0x08
+
+#define LM36274_REG_BIAS_CONFIG_1 0x09
+#define LM36274_EXT_EN_MASK BIT(0)
+#define LM36274_EN_VNEG_MASK BIT(1)
+#define LM36274_EN_VPOS_MASK BIT(2)
+
+#define LM36274_REG_BIAS_CONFIG_2 0x0a
+#define LM36274_REG_BIAS_CONFIG_3 0x0b
+#define LM36274_REG_VOUT_BOOST 0x0c
+#define LM36274_REG_VOUT_POS 0x0d
+#define LM36274_REG_VOUT_NEG 0x0e
+#define LM36274_VOUT_MASK 0x3F
+
+#define LM36274_MAX_REG 0x13
-#define LM3697_REG_BL0_RAMP 0x11
-#define LM3697_REG_BL1_RAMP 0x12
-#define LM3697_RAMPUP_MASK 0xF0
-#define LM3697_RAMPUP_SHIFT 4
-#define LM3697_RAMPDN_MASK 0x0F
-#define LM3697_RAMPDN_SHIFT 0
-
-#define LM3697_REG_RAMP_CONF 0x14
-#define LM3697_RAMP_MASK 0x0F
-#define LM3697_RAMP_EACH 0x05
-
-#define LM3697_REG_PWM_CFG 0x1C
-#define LM3697_PWM_A_MASK BIT(0)
-#define LM3697_PWM_B_MASK BIT(1)
-
-#define LM3697_REG_IMAX_A 0x17
-#define LM3697_REG_IMAX_B 0x18
-
-#define LM3697_REG_FEEDBACK_ENABLE 0x19
-
-#define LM3697_REG_BRT_A_LSB 0x20
-#define LM3697_REG_BRT_A_MSB 0x21
-#define LM3697_REG_BRT_B_LSB 0x22
-#define LM3697_REG_BRT_B_MSB 0x23
-
-#define LM3697_REG_ENABLE 0x24
-
-#define LM3697_REG_OPEN_FAULT_STATUS 0xB0
-
-#define LM3697_REG_SHORT_FAULT_STATUS 0xB2
-
-#define LM3697_REG_MONITOR_ENABLE 0xB4
-
-#define LM3697_MAX_REG 0xB4
#endif
diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h
index 7d1e9c24f818..0bc0e8199798 100644
--- a/include/linux/mfd/ti-lmu.h
+++ b/include/linux/mfd/ti-lmu.h
@@ -23,7 +23,7 @@ enum ti_lmu_id {
LM3632,
LM3633,
LM3695,
- LM3697,
+ LM36274,
LMU_MAX_ID,
};
@@ -65,6 +65,9 @@ enum lm363x_regulator_id {
LM3632_BOOST, /* Boost output */
LM3632_LDO_POS, /* Positive display bias output */
LM3632_LDO_NEG, /* Negative display bias output */
+ LM36274_BOOST, /* Boost output */
+ LM36274_LDO_POS, /* Positive display bias output */
+ LM36274_LDO_NEG, /* Negative display bias output */
};
/**
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 071cdf3e16cf..986986fe4e4e 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -47,7 +47,6 @@ struct wm831x_battery_pdata {
* I2C or SPI buses.
*/
struct wm831x_buckv_pdata {
- int dvs_gpio; /** CPU GPIO to use for DVS switching */
int dvs_control_src; /** Hardware DVS source to use (1 or 2) */
int dvs_init_state; /** DVS state to expect on startup */
int dvs_state_gpio; /** CPU GPIO to use for monitoring status */
diff --git a/include/linux/module.h b/include/linux/module.h
index 188998d3dca9..1455812dd325 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -21,6 +21,7 @@
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
+#include <linux/srcu.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -450,6 +451,10 @@ struct module {
unsigned int num_tracepoints;
tracepoint_ptr_t *tracepoints_ptrs;
#endif
+#ifdef CONFIG_TREE_SRCU
+ unsigned int num_srcu_structs;
+ struct srcu_struct **srcu_struct_ptrs;
+#endif
#ifdef CONFIG_BPF_EVENTS
unsigned int num_bpf_raw_events;
struct bpf_raw_event_map *bpf_raw_events;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9ec3544baee2..fe0b29bf2df7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -333,19 +333,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
-static inline struct page *find_subpage(struct page *page, pgoff_t offset)
-{
- unsigned long mask;
-
- if (PageHuge(page))
- return page;
-
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- mask = (1UL << compound_order(page)) - 1;
- return page + (offset & mask);
-}
-
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 03cb4b6f842e..3998cdf9cd14 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -17,14 +17,18 @@ struct percpu_rw_semaphore {
int readers_block;
};
-#define DEFINE_STATIC_PERCPU_RWSEM(name) \
+#define __DEFINE_PERCPU_RWSEM(name, is_static) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
-static struct percpu_rw_semaphore name = { \
- .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
+is_static struct percpu_rw_semaphore name = { \
+ .rss = __RCU_SYNC_INITIALIZER(name.rss), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
}
+#define DEFINE_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, /* not static */)
+#define DEFINE_STATIC_PERCPU_RWSEM(name) \
+ __DEFINE_PERCPU_RWSEM(name, static)
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);
@@ -117,7 +121,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
lock_release(&sem->rw_sem.dep_map, 1, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
- sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN;
+ atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
#endif
}
@@ -127,7 +131,7 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
- sem->rw_sem.owner = current;
+ atomic_long_set(&sem->rw_sem.owner, (long)current);
#endif
}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index a9b0ee408fbd..71f525a35ac2 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -171,4 +171,6 @@ void armpmu_free_irq(int irq, int cpu);
#endif /* CONFIG_ARM_PMU */
+#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+
#endif /* __ARM_PMU_H__ */
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index 17edc43201d2..8b30b14b47d3 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -186,7 +186,7 @@ struct omap_gpio_platform_data {
bool is_mpuio; /* whether the bank is of type MPUIO */
u32 non_wakeup_gpios;
- struct omap_gpio_reg_offs *regs;
+ const struct omap_gpio_reg_offs *regs;
/* Return context loss count due to PM states changing */
int (*get_context_loss_count)(struct device *dev);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 52a283ba0465..a705aa2d03f9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -75,6 +75,15 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
+ * provide proc_pid_arch_status() definition.
+ */
+int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
diff --git a/include/linux/processor.h b/include/linux/processor.h
index dbc952eec869..dc78bdc7079a 100644
--- a/include/linux/processor.h
+++ b/include/linux/processor.h
@@ -32,15 +32,6 @@
#define spin_cpu_relax() cpu_relax()
#endif
-/*
- * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
- * necessary. This should be used if the wait is expected to take longer
- * than context switch overhead, but we can't sleep or do a directed yield.
- */
-#ifndef spin_cpu_yield
-#define spin_cpu_yield() cpu_relax_yield()
-#endif
-
#ifndef spin_end
#define spin_end()
#endif
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index d5084ebd9f03..2a9df80ea887 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -355,7 +355,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
info.si_code = SI_USER;
info.si_pid = 0;
info.si_uid = 0;
- force_sig_info(info.si_signo, &info, current);
+ force_sig_info(&info);
}
#endif
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index eaa5c6e3fc9f..24632a7a7d11 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -405,12 +405,16 @@ struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
const struct of_phandle_args *args);
struct pwm_device *pwm_get(struct device *dev, const char *con_id);
-struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id);
+struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
+ const char *con_id);
void pwm_put(struct pwm_device *pwm);
struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
const char *con_id);
+struct pwm_device *devm_fwnode_pwm_get(struct device *dev,
+ struct fwnode_handle *fwnode,
+ const char *con_id);
void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
#else
static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
@@ -493,7 +497,8 @@ static inline struct pwm_device *pwm_get(struct device *dev,
return ERR_PTR(-ENODEV);
}
-static inline struct pwm_device *of_pwm_get(struct device_node *np,
+static inline struct pwm_device *of_pwm_get(struct device *dev,
+ struct device_node *np,
const char *con_id)
{
return ERR_PTR(-ENODEV);
@@ -516,6 +521,13 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
return ERR_PTR(-ENODEV);
}
+static inline struct pwm_device *
+devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode,
+ const char *con_id)
+{
+ return ERR_PTR(-ENODEV);
+}
+
static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
{
}
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 6fc53a1345b3..9b83865d24f9 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -13,62 +13,44 @@
#include <linux/wait.h>
#include <linux/rcupdate.h>
-enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
-
/* Structure to mediate between updaters and fastpath-using readers. */
struct rcu_sync {
int gp_state;
int gp_count;
wait_queue_head_t gp_wait;
- int cb_state;
struct rcu_head cb_head;
-
- enum rcu_sync_type gp_type;
};
-extern void rcu_sync_lockdep_assert(struct rcu_sync *);
-
/**
* rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
- * Returns true if readers are permitted to use their fastpaths.
- * Must be invoked within an RCU read-side critical section whose
- * flavor matches that of the rcu_sync struture.
+ * Returns true if readers are permitted to use their fastpaths. Must be
+ * invoked within some flavor of RCU read-side critical section.
*/
static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
{
-#ifdef CONFIG_PROVE_RCU
- rcu_sync_lockdep_assert(rsp);
-#endif
- return !rsp->gp_state; /* GP_IDLE */
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
+ !rcu_read_lock_bh_held() &&
+ !rcu_read_lock_sched_held(),
+ "suspicious rcu_sync_is_idle() usage");
+ return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
}
-extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
-#define __RCU_SYNC_INITIALIZER(name, type) { \
+#define __RCU_SYNC_INITIALIZER(name) { \
.gp_state = 0, \
.gp_count = 0, \
.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
- .cb_state = 0, \
- .gp_type = type, \
}
-#define __DEFINE_RCU_SYNC(name, type) \
- struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
-
-#define DEFINE_RCU_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_SYNC)
-
-#define DEFINE_RCU_SCHED_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
-
-#define DEFINE_RCU_BH_SYNC(name) \
- __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+#define DEFINE_RCU_SYNC(name) \
+ struct rcu_sync name = __RCU_SYNC_INITIALIZER(name)
#endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b25d20822e75..8f7167478c1d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -365,16 +365,15 @@ static inline void rcu_preempt_sleep_check(void) { }
* other macros that it invokes.
*/
#define rcu_assign_pointer(p, v) \
-({ \
+do { \
uintptr_t _r_a_p__v = (uintptr_t)(v); \
- rcu_check_sparse(p, __rcu); \
+ rcu_check_sparse(p, __rcu); \
\
if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
else \
smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
- _r_a_p__v; \
-})
+} while (0)
/**
* rcu_swap_protected() - swap an RCU and a regular pointer
@@ -586,7 +585,7 @@ static inline void rcu_preempt_sleep_check(void) { }
* read-side critical sections may be preempted and they may also block, but
* only when acquiring spinlocks that are subject to priority inheritance.
*/
-static inline void rcu_read_lock(void)
+static __always_inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
@@ -803,7 +802,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
- * @rcu_head: the name of the struct rcu_head within the type of @ptr.
+ * @rhf: the name of the struct rcu_head within the type of @ptr.
*
* Many rcu callbacks functions just call kfree() on the base structure.
* These functions are trivial, but their size adds up, and furthermore
@@ -826,9 +825,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* The BUILD_BUG_ON check must not involve any function calls, hence the
* checks are done in macros here.
*/
-#define kfree_rcu(ptr, rcu_head) \
- __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
-
+#define kfree_rcu(ptr, rhf) \
+do { \
+ typeof (ptr) ___p = (ptr); \
+ \
+ if (___p) \
+ __kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
+} while (0)
/*
* Place this after a lock-acquisition primitive to guarantee that
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index d3dea823af8e..38e1369e8bd0 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -22,6 +22,7 @@ struct module;
struct clk;
struct device;
struct i2c_client;
+struct i3c_device;
struct irq_domain;
struct slim_device;
struct spi_device;
@@ -621,6 +622,10 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus,
const struct regmap_config *config,
struct lock_class_key *lock_key,
const char *lock_name);
+struct regmap *__devm_regmap_init_i3c(struct i3c_device *i3c,
+ const struct regmap_config *config,
+ struct lock_class_key *lock_key,
+ const char *lock_name);
/*
* Wrapper for regmap_init macros to include a unique lockdep key and name
* for each call. No-op if CONFIG_LOCKDEP is not set.
@@ -979,6 +984,21 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
#define devm_regmap_init_slimbus(slimbus, config) \
__regmap_lockdep_wrapper(__devm_regmap_init_slimbus, #config, \
slimbus, config)
+
+/**
+ * devm_regmap_init_i3c() - Initialise managed register map
+ *
+ * @i3c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap. The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_i3c(i3c, config) \
+ __regmap_lockdep_wrapper(__devm_regmap_init_i3c, #config, \
+ i3c, config)
+
int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk);
void regmap_mmio_detach_clk(struct regmap *map);
void regmap_exit(struct regmap *map);
diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h
new file mode 100644
index 000000000000..0212d6255e4e
--- /dev/null
+++ b/include/linux/regulator/coupler.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * coupler.h -- SoC Regulator support, coupler API.
+ *
+ * Regulator Coupler Interface.
+ */
+
+#ifndef __LINUX_REGULATOR_COUPLER_H_
+#define __LINUX_REGULATOR_COUPLER_H_
+
+#include <linux/kernel.h>
+#include <linux/suspend.h>
+
+struct regulator_coupler;
+struct regulator_dev;
+
+/**
+ * struct regulator_coupler - customized regulator's coupler
+ *
+ * Regulator's coupler allows to customize coupling algorithm.
+ *
+ * @list: couplers list entry
+ * @attach_regulator: Callback invoked on creation of a coupled regulator,
+ * couples are unresolved at this point. The callee should
+ * check that it could handle the regulator and return 0 on
+ * success, -errno on failure and 1 if given regulator is
+ * not suitable for this coupler (case of having multiple
+ * regulators in a system). Callback shall be implemented.
+ * @detach_regulator: Callback invoked on destruction of a coupled regulator.
+ * This callback is optional and could be NULL.
+ * @balance_voltage: Callback invoked when voltage of a coupled regulator is
+ * changing. Called with all of the coupled rdev's being held
+ * under "consumer lock". The callee should perform voltage
+ * balancing, changing voltage of the coupled regulators as
+ * needed. It's up to the coupler to verify the voltage
+ * before changing it in hardware, i.e. coupler should
+ * check consumer's min/max and etc. This callback is
+ * optional and could be NULL, in which case a generic
+ * voltage balancer will be used.
+ */
+struct regulator_coupler {
+ struct list_head list;
+
+ int (*attach_regulator)(struct regulator_coupler *coupler,
+ struct regulator_dev *rdev);
+ int (*detach_regulator)(struct regulator_coupler *coupler,
+ struct regulator_dev *rdev);
+ int (*balance_voltage)(struct regulator_coupler *coupler,
+ struct regulator_dev *rdev,
+ suspend_state_t state);
+};
+
+#ifdef CONFIG_REGULATOR
+int regulator_coupler_register(struct regulator_coupler *coupler);
+const char *rdev_get_name(struct regulator_dev *rdev);
+int regulator_check_consumers(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV,
+ suspend_state_t state);
+int regulator_check_voltage(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV);
+int regulator_get_voltage_rdev(struct regulator_dev *rdev);
+int regulator_set_voltage_rdev(struct regulator_dev *rdev,
+ int min_uV, int max_uV,
+ suspend_state_t state);
+#else
+static inline int regulator_coupler_register(struct regulator_coupler *coupler)
+{
+ return 0;
+}
+static inline const char *rdev_get_name(struct regulator_dev *rdev)
+{
+ return NULL;
+}
+static inline int regulator_check_consumers(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV,
+ suspend_state_t state)
+{
+ return -EINVAL;
+}
+static inline int regulator_check_voltage(struct regulator_dev *rdev,
+ int *min_uV, int *max_uV)
+{
+ return -EINVAL;
+}
+static inline int regulator_get_voltage_rdev(struct regulator_dev *rdev)
+{
+ return -EINVAL;
+}
+static inline int regulator_set_voltage_rdev(struct regulator_dev *rdev,
+ int min_uV, int max_uV,
+ suspend_state_t state)
+{
+ return -EINVAL;
+}
+#endif
+
+#endif
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d45ab52c91c9..9a911bb5fb61 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -12,8 +12,6 @@
#ifndef __LINUX_REGULATOR_DRIVER_H_
#define __LINUX_REGULATOR_DRIVER_H_
-#define MAX_COUPLED 2
-
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/regulator/consumer.h>
@@ -283,6 +281,11 @@ enum regulator_type {
* @vsel_range_mask: Mask for register bitfield used for range selector
* @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
* @vsel_mask: Mask for register bitfield used for selector
+ * @vsel_step: Specify the resolution of selector stepping when setting
+ * voltage. If 0, then no stepping is done (requested selector is
+ * set directly), if >0 then the regulator API will ramp the
+ * voltage up/down gradually each time increasing/decreasing the
+ * selector by the specified step value.
* @csel_reg: Register for current limit selector using regmap set_current_limit
* @csel_mask: Mask for register bitfield used for current limit selector
* @apply_reg: Register for initiate voltage change on the output when
@@ -357,6 +360,7 @@ struct regulator_desc {
unsigned int vsel_range_mask;
unsigned int vsel_reg;
unsigned int vsel_mask;
+ unsigned int vsel_step;
unsigned int csel_reg;
unsigned int csel_mask;
unsigned int apply_reg;
@@ -423,7 +427,8 @@ struct regulator_config {
* incremented.
*/
struct coupling_desc {
- struct regulator_dev *coupled_rdevs[MAX_COUPLED];
+ struct regulator_dev **coupled_rdevs;
+ struct regulator_coupler *coupler;
int n_resolved;
int n_coupled;
};
@@ -549,4 +554,5 @@ void regulator_unlock(struct regulator_dev *rdev);
*/
int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc,
unsigned int selector);
+
#endif
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 5539efa76d26..a84cc8879c3e 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -153,7 +153,7 @@ struct regulation_constraints {
int system_load;
/* used for coupled regulators */
- int max_spread;
+ u32 *max_spread;
/* used for changing voltage in steps */
int max_uV_step;
diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h
index ebd99d2e62ad..8712c091abf0 100644
--- a/include/linux/regulator/max8952.h
+++ b/include/linux/regulator/max8952.h
@@ -105,9 +105,6 @@ enum {
#define MAX8952_NUM_DVS_MODE 4
struct max8952_platform_data {
- int gpio_vid0;
- int gpio_vid1;
-
u32 default_mode;
u32 dvs_mode[MAX8952_NUM_DVS_MODE]; /* MAX8952_DVS_MODEx_XXXXmV */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 2ea18a3def04..e401358c4e7e 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -34,12 +34,13 @@
*/
struct rw_semaphore {
atomic_long_t count;
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
- * Write owner. Used as a speculative check to see
- * if the owner is running on the cpu.
+ * Write owner or one of the read owners as well flags regarding
+ * the current state of the rwsem. Can be used as a speculative
+ * check to see if the write owner is running on the cpu.
*/
- struct task_struct *owner;
+ atomic_long_t owner;
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
struct optimistic_spin_queue osq; /* spinner MCS lock */
#endif
raw_spinlock_t wait_lock;
@@ -50,10 +51,10 @@ struct rw_semaphore {
};
/*
- * Setting bit 1 of the owner field but not bit 0 will indicate
+ * Setting all bits of the owner field except bit 0 will indicate
* that the rwsem is writer-owned with an unknown owner.
*/
-#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L)
+#define RWSEM_OWNER_UNKNOWN (-2L)
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
@@ -73,13 +74,14 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
#endif
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL
+#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
#else
#define __RWSEM_OPT_INIT(lockname)
#endif
#define __RWSEM_INITIALIZER(name) \
{ __RWSEM_INIT_COUNT(name), \
+ .owner = ATOMIC_LONG_INIT(0), \
.wait_list = LIST_HEAD_INIT((name).wait_list), \
.wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
__RWSEM_OPT_INIT(name) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11837410690f..8dc1811487f5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -35,6 +35,7 @@ struct audit_context;
struct backing_dev_info;
struct bio_list;
struct blk_plug;
+struct capture_control;
struct cfs_rq;
struct fs_struct;
struct futex_pi_state;
@@ -47,8 +48,9 @@ struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
-struct capture_control;
struct robust_list_head;
+struct root_domain;
+struct rq;
struct sched_attr;
struct sched_param;
struct seq_file;
@@ -281,6 +283,18 @@ struct vtime {
u64 gtime;
};
+/*
+ * Utilization clamp constraints.
+ * @UCLAMP_MIN: Minimum utilization
+ * @UCLAMP_MAX: Maximum utilization
+ * @UCLAMP_CNT: Utilization clamp constraints count
+ */
+enum uclamp_id {
+ UCLAMP_MIN = 0,
+ UCLAMP_MAX,
+ UCLAMP_CNT
+};
+
struct sched_info {
#ifdef CONFIG_SCHED_INFO
/* Cumulative counters: */
@@ -312,6 +326,10 @@ struct sched_info {
# define SCHED_FIXEDPOINT_SHIFT 10
# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
+/* Increase resolution of cpu_capacity calculations */
+# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
+# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
+
struct load_weight {
unsigned long weight;
u32 inv_weight;
@@ -560,12 +578,47 @@ struct sched_dl_entity {
struct hrtimer inactive_timer;
};
+#ifdef CONFIG_UCLAMP_TASK
+/* Number of utilization clamp buckets (shorter alias) */
+#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
+
+/*
+ * Utilization clamp for a scheduling entity
+ * @value: clamp value "assigned" to a se
+ * @bucket_id: bucket index corresponding to the "assigned" value
+ * @active: the se is currently refcounted in a rq's bucket
+ * @user_defined: the requested clamp value comes from user-space
+ *
+ * The bucket_id is the index of the clamp bucket matching the clamp value
+ * which is pre-computed and stored to avoid expensive integer divisions from
+ * the fast path.
+ *
+ * The active bit is set whenever a task has got an "effective" value assigned,
+ * which can be different from the clamp value "requested" from user-space.
+ * This allows to know a task is refcounted in the rq's bucket corresponding
+ * to the "effective" bucket_id.
+ *
+ * The user_defined bit is set whenever a task has got a task-specific clamp
+ * value requested from userspace, i.e. the system defaults apply to this task
+ * just as a restriction. This allows to relax default clamps when a less
+ * restrictive task-specific value has been requested, thus allowing to
+ * implement a "nice" semantic. For example, a task running with a 20%
+ * default boost can still drop its own boosting to 0%.
+ */
+struct uclamp_se {
+ unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
+ unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
+ unsigned int active : 1;
+ unsigned int user_defined : 1;
+};
+#endif /* CONFIG_UCLAMP_TASK */
+
union rcu_special {
struct {
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
- u8 pad; /* No garbage from compiler! */
+ u8 deferred_qs;
} b; /* Bits. */
u32 s; /* Set of bits. */
};
@@ -640,6 +693,13 @@ struct task_struct {
#endif
struct sched_dl_entity dl;
+#ifdef CONFIG_UCLAMP_TASK
+ /* Clamp values requested for a scheduling entity */
+ struct uclamp_se uclamp_req[UCLAMP_CNT];
+ /* Effective clamp values used for a scheduling entity */
+ struct uclamp_se uclamp[UCLAMP_CNT];
+#endif
+
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* List of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
@@ -651,7 +711,8 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
- cpumask_t cpus_allowed;
+ const cpumask_t *cpus_ptr;
+ cpumask_t cpus_mask;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -831,6 +892,11 @@ struct task_struct {
/* Effective (overridable) subjective task credentials (COW): */
const struct cred __rcu *cred;
+#ifdef CONFIG_KEYS
+ /* Cached requested key. */
+ struct key *cached_requested_key;
+#endif
+
/*
* executable name, excluding path.
*
@@ -1399,7 +1465,7 @@ extern struct pid *cad_pid;
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
-#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
+#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
@@ -1518,10 +1584,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
}
#endif
-#ifndef cpu_relax_yield
-#define cpu_relax_yield() cpu_relax()
-#endif
-
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
@@ -1919,4 +1981,16 @@ static inline void rseq_syscall(struct pt_regs *regs)
#endif
+const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
+char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
+int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
+
+const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
+const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
+const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
+
+int sched_trace_rq_cpu(struct rq *rq);
+
+const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
+
#endif
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index b36f4cf38111..1abe91ff6e4a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -7,14 +7,6 @@
*/
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz_start(void);
-extern void cpu_load_update_nohz_stop(void);
-#else
-static inline void cpu_load_update_nohz_start(void) { }
-static inline void cpu_load_update_nohz_stop(void) { }
-#endif
-
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_balance_enter_idle(int cpu);
extern int get_nohz_timer_target(void);
#else
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 38a0f0785323..532458698bde 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -307,16 +307,19 @@ static inline void kernel_signal_stop(void)
# define ___ARCH_SI_IA64(_a1, _a2, _a3)
#endif
-int force_sig_fault(int sig, int code, void __user *addr
+int force_sig_fault_to_task(int sig, int code, void __user *addr
___ARCH_SI_TRAPNO(int trapno)
___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
, struct task_struct *t);
+int force_sig_fault(int sig, int code, void __user *addr
+ ___ARCH_SI_TRAPNO(int trapno)
+ ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr));
int send_sig_fault(int sig, int code, void __user *addr
___ARCH_SI_TRAPNO(int trapno)
___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
, struct task_struct *t);
-int force_sig_mceerr(int code, void __user *, short, struct task_struct *);
+int force_sig_mceerr(int code, void __user *, short);
int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
@@ -325,17 +328,17 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
int force_sig_ptrace_errno_trap(int errno, void __user *addr);
extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
-extern void force_sigsegv(int sig, struct task_struct *p);
-extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *);
+extern void force_sigsegv(int sig);
+extern int force_sig_info(struct kernel_siginfo *);
extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp);
extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid);
-extern int kill_pid_info_as_cred(int, struct kernel_siginfo *, struct pid *,
+extern int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr, struct pid *,
const struct cred *);
extern int kill_pgrp(struct pid *pid, int sig, int priv);
extern int kill_pid(struct pid *pid, int sig, int priv);
extern __must_check bool do_notify_parent(struct task_struct *, int);
extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
-extern void force_sig(int, struct task_struct *);
+extern void force_sig(int);
extern int send_sig(int, struct task_struct *, int);
extern int zap_other_threads(struct task_struct *p);
extern struct sigqueue *sigqueue_alloc(void);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 99ce6d728df7..d4f6215ee03f 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -56,6 +56,11 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
+#ifdef CONFIG_UCLAMP_TASK
+extern unsigned int sysctl_sched_uclamp_util_min;
+extern unsigned int sysctl_sched_uclamp_util_max;
+#endif
+
#ifdef CONFIG_CFS_BANDWIDTH
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
#endif
@@ -75,6 +80,12 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
+#ifdef CONFIG_UCLAMP_TASK
+extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+#endif
+
extern int sysctl_numa_balancing(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index cfc0a89a7159..7863bb62d2ab 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -7,12 +7,6 @@
#include <linux/sched/idle.h>
/*
- * Increase resolution of cpu_capacity calculations
- */
-#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
-#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
-
-/*
* sched-domains (multiprocessor balancing) declarations:
*/
#ifdef CONFIG_SMP
@@ -84,11 +78,6 @@ struct sched_domain {
unsigned int busy_factor; /* less balancing by factor if busy */
unsigned int imbalance_pct; /* No balance until over watermark */
unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */
- unsigned int busy_idx;
- unsigned int idle_idx;
- unsigned int newidle_idx;
- unsigned int wake_idx;
- unsigned int forkexec_idx;
int nohz_idle; /* NOHZ IDLE status */
int flags; /* See SD_* */
@@ -201,14 +190,6 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl);
# define SD_INIT_NAME(type)
#endif
-#ifndef arch_scale_cpu_capacity
-static __always_inline
-unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
-{
- return SCHED_CAPACITY_SCALE;
-}
-#endif
-
#else /* CONFIG_SMP */
struct sched_domain_attr;
@@ -224,16 +205,16 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
return true;
}
+#endif /* !CONFIG_SMP */
+
#ifndef arch_scale_cpu_capacity
static __always_inline
-unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu)
+unsigned long arch_scale_cpu_capacity(int cpu)
{
return SCHED_CAPACITY_SCALE;
}
#endif
-#endif /* !CONFIG_SMP */
-
static inline int task_node(const struct task_struct *p)
{
return cpu_to_node(task_cpu(p));
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 468d2565a9fe..917d88edb7b9 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -7,8 +7,6 @@
#include <linux/refcount.h>
#include <linux/ratelimit.h>
-struct key;
-
/*
* Some day this will be a full-fledged user tracking system..
*/
@@ -30,18 +28,6 @@ struct user_struct {
unsigned long unix_inflight; /* How many files in flight in unix sockets */
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
-#ifdef CONFIG_KEYS
- /*
- * These pointers can only change from NULL to a non-NULL value once.
- * Writes are protected by key_user_keyring_mutex.
- * Unlocked readers should use READ_ONCE() unless they know that
- * install_user_keyrings() has been called successfully (which sets
- * these members to non-NULL values, preventing further modifications).
- */
- struct key *uid_keyring; /* UID specific keyring */
- struct key *session_keyring; /* UID's default session keyring */
-#endif
-
/* Hash table maintenance information */
struct hlist_node uidhash_node;
kuid_t uid;
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index ad826d2a4557..26a2013ac39c 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -51,6 +51,11 @@ static inline void wake_q_init(struct wake_q_head *head)
head->lastp = &head->first;
}
+static inline bool wake_q_empty(struct wake_q_head *head)
+{
+ return head->first == WAKE_Q_TAIL;
+}
+
extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);
diff --git a/include/linux/security.h b/include/linux/security.h
index 659071c2e57c..5f7441abbf42 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -189,9 +189,9 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
#ifdef CONFIG_SECURITY
-int call_lsm_notifier(enum lsm_event event, void *data);
-int register_lsm_notifier(struct notifier_block *nb);
-int unregister_lsm_notifier(struct notifier_block *nb);
+int call_blocking_lsm_notifier(enum lsm_event event, void *data);
+int register_blocking_lsm_notifier(struct notifier_block *nb);
+int unregister_blocking_lsm_notifier(struct notifier_block *nb);
/* prototypes */
extern int security_init(void);
@@ -394,17 +394,17 @@ int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
#else /* CONFIG_SECURITY */
-static inline int call_lsm_notifier(enum lsm_event event, void *data)
+static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
{
return 0;
}
-static inline int register_lsm_notifier(struct notifier_block *nb)
+static inline int register_blocking_lsm_notifier(struct notifier_block *nb)
{
return 0;
}
-static inline int unregister_lsm_notifier(struct notifier_block *nb)
+static inline int unregister_blocking_lsm_notifier(struct notifier_block *nb)
{
return 0;
}
diff --git a/include/linux/siox.h b/include/linux/siox.h
index a860cb8c1f9d..da7225bf1877 100644
--- a/include/linux/siox.h
+++ b/include/linux/siox.h
@@ -72,3 +72,13 @@ static inline void siox_driver_unregister(struct siox_driver *sdriver)
{
return driver_unregister(&sdriver->driver);
}
+
+/*
+ * module_siox_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit. This eliminates a lot of
+ * boilerplate. Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_siox_driver(__siox_driver) \
+ module_driver(__siox_driver, siox_driver_register, \
+ siox_driver_unregister)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a56f08ff3097..6fc856c9eda5 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -35,7 +35,7 @@ int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
/*
* Call a function on all processors
*/
-int on_each_cpu(smp_call_func_t func, void *info, int wait);
+void on_each_cpu(smp_call_func_t func, void *info, int wait);
/*
* Call a function on processors specified by mask, which might include
@@ -101,7 +101,7 @@ extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
*/
-int smp_call_function(smp_call_func_t func, void *info, int wait);
+void smp_call_function(smp_call_func_t func, void *info, int wait);
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait);
@@ -144,9 +144,8 @@ static inline void smp_send_stop(void) { }
* These macros fold the SMP functionality into a single CPU system
*/
#define raw_smp_processor_id() 0
-static inline int up_smp_call_function(smp_call_func_t func, void *info)
+static inline void up_smp_call_function(smp_call_func_t func, void *info)
{
- return 0;
}
#define smp_call_function(func, info, wait) \
(up_smp_call_function(func, info))
@@ -181,29 +180,46 @@ static inline int get_boot_cpu_id(void)
#endif /* !SMP */
-/*
- * smp_processor_id(): get the current CPU ID.
+/**
+ * raw_processor_id() - get the current (unstable) CPU id
+ *
+ * For then you know what you are doing and need an unstable
+ * CPU id.
+ */
+
+/**
+ * smp_processor_id() - get the current (stable) CPU id
+ *
+ * This is the normal accessor to the CPU id and should be used
+ * whenever possible.
*
- * if DEBUG_PREEMPT is enabled then we check whether it is
- * used in a preemption-safe way. (smp_processor_id() is safe
- * if it's used in a preemption-off critical section, or in
- * a thread that is bound to the current CPU.)
+ * The CPU id is stable when:
*
- * NOTE: raw_smp_processor_id() is for internal use only
- * (smp_processor_id() is the preferred variant), but in rare
- * instances it might also be used to turn off false positives
- * (i.e. smp_processor_id() use that the debugging code reports but
- * which use for some reason is legal). Don't use this to hack around
- * the warning message, as your code might not work under PREEMPT.
+ * - IRQs are disabled;
+ * - preemption is disabled;
+ * - the task is CPU affine.
+ *
+ * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
+ * when smp_processor_id() is used when the CPU id is not stable.
+ */
+
+/*
+ * Allow the architecture to differentiate between a stable and unstable read.
+ * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
+ * regular asm read for the stable.
*/
+#ifndef __smp_processor_id
+#define __smp_processor_id(x) raw_smp_processor_id(x)
+#endif
+
#ifdef CONFIG_DEBUG_PREEMPT
extern unsigned int debug_smp_processor_id(void);
# define smp_processor_id() debug_smp_processor_id()
#else
-# define smp_processor_id() raw_smp_processor_id()
+# define smp_processor_id() __smp_processor_id()
#endif
-#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
+#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
#define put_cpu() preempt_enable()
/*
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 7f7c8c050f63..9cfcc8a756ae 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -120,9 +120,17 @@ struct srcu_struct {
*
* See include/linux/percpu-defs.h for the rules on per-CPU variables.
*/
-#define __DEFINE_SRCU(name, is_static) \
- static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
- is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data)
+#ifdef MODULE
+# define __DEFINE_SRCU(name, is_static) \
+ is_static struct srcu_struct name; \
+ struct srcu_struct * const __srcu_struct_##name \
+ __section("___srcu_struct_ptrs") = &name
+#else
+# define __DEFINE_SRCU(name, is_static) \
+ static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
+ is_static struct srcu_struct name = \
+ __SRCU_STRUCT_INIT(name, name##_srcu_data)
+#endif
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 6d3635c86dbe..f9a0c6189852 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -36,6 +36,7 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
+void stop_machine_yield(const struct cpumask *cpumask);
#else /* CONFIG_SMP */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2bcef4c70183..bc4bbbb9ed9a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -264,7 +264,7 @@ static inline void addr_limit_user_check(void)
if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS),
"Invalid address limit on user-mode return"))
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
#ifdef TIF_FSCHECK
clear_thread_flag(TIF_FSCHECK);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index a8ab0f143ac4..b27e2ffa96c1 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -113,6 +113,34 @@ static inline ktime_t ktime_get_coarse_clocktai(void)
return ktime_get_coarse_with_offset(TK_OFFS_TAI);
}
+static inline ktime_t ktime_get_coarse(void)
+{
+ struct timespec64 ts;
+
+ ktime_get_coarse_ts64(&ts);
+ return timespec64_to_ktime(ts);
+}
+
+static inline u64 ktime_get_coarse_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse());
+}
+
+static inline u64 ktime_get_coarse_real_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_real());
+}
+
+static inline u64 ktime_get_coarse_boottime_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_boottime());
+}
+
+static inline u64 ktime_get_coarse_clocktai_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_clocktai());
+}
+
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
*/
@@ -131,12 +159,12 @@ static inline u64 ktime_get_real_ns(void)
return ktime_to_ns(ktime_get_real());
}
-static inline u64 ktime_get_boot_ns(void)
+static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
-static inline u64 ktime_get_tai_ns(void)
+static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 7b066fd38248..282e4f2a532a 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -36,19 +36,30 @@ struct timer_list {
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/*
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
+/**
+ * @TIMER_DEFERRABLE: A deferrable timer will work normally when the
+ * system is busy, but will not cause a CPU to come out of idle just
+ * to service it; instead, the timer will be serviced when the CPU
+ * eventually wakes up with a subsequent non-deferrable timer.
*
- * An irqsafe timer is executed with IRQ disabled and it's safe to wait for
- * the completion of the running instance from IRQ handlers, for example,
- * by calling del_timer_sync().
+ * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and
+ * it's safe to wait for the completion of the running instance from
+ * IRQ handlers, for example, by calling del_timer_sync().
*
* Note: The irq disabled callback execution is a special case for
* workqueue locking issues. It's not meant for executing random crap
* with interrupts disabled. Abuse is monitored!
+ *
+ * @TIMER_PINNED: A pinned timer will not be affected by any timer
+ * placement heuristics (like, NOHZ) and will always expire on the CPU
+ * on which the timer was enqueued.
+ *
+ * Note: Because enqueuing of timers can migrate the timer from one
+ * CPU to another, pinned timers are not guaranteed to stay on the
+ * initialy selected CPU. They move to the CPU on which the enqueue
+ * function is invoked via mod_timer() or add_timer(). If the timer
+ * should be placed on a particular CPU, then add_timer_on() has to be
+ * used.
*/
#define TIMER_CPUMASK 0x0003FFFF
#define TIMER_MIGRATING 0x00040000
diff --git a/include/linux/topology.h b/include/linux/topology.h
index cb0775e1ee4b..47a3e3c08036 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -184,6 +184,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_physical_package_id
#define topology_physical_package_id(cpu) ((void)(cpu), -1)
#endif
+#ifndef topology_die_id
+#define topology_die_id(cpu) ((void)(cpu), -1)
+#endif
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
@@ -193,6 +196,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_core_cpumask
#define topology_core_cpumask(cpu) cpumask_of(cpu)
#endif
+#ifndef topology_die_cpumask
+#define topology_die_cpumask(cpu) cpumask_of(cpu)
+#endif
#ifdef CONFIG_SCHED_SMT
static inline const struct cpumask *cpu_smt_mask(int cpu)
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 23d80db426d7..a620118385bb 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void));
/* Task stuttering, which forces load/no-load transitions. */
bool stutter_wait(const char *title);
-int torture_stutter_init(int s);
+int torture_stutter_init(int s, int sgap);
/* Initialization and cleanup. */
bool torture_init_begin(char *ttype, int v);
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 81519f163211..63238c84dc0b 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -112,4 +112,156 @@ struct tcg_pcr_event2_head {
struct tpm_digest digests[];
} __packed;
+struct tcg_algorithm_size {
+ u16 algorithm_id;
+ u16 algorithm_size;
+};
+
+struct tcg_algorithm_info {
+ u8 signature[16];
+ u32 platform_class;
+ u8 spec_version_minor;
+ u8 spec_version_major;
+ u8 spec_errata;
+ u8 uintn_size;
+ u32 number_of_algorithms;
+ struct tcg_algorithm_size digest_sizes[];
+};
+
+#ifndef TPM_MEMREMAP
+#define TPM_MEMREMAP(start, size) NULL
+#endif
+
+#ifndef TPM_MEMUNMAP
+#define TPM_MEMUNMAP(start, size) do{} while(0)
+#endif
+
+/**
+ * __calc_tpm2_event_size - calculate the size of a TPM2 event log entry
+ * @event: Pointer to the event whose size should be calculated
+ * @event_header: Pointer to the initial event containing the digest lengths
+ * @do_mapping: Whether or not the event needs to be mapped
+ *
+ * The TPM2 event log format can contain multiple digests corresponding to
+ * separate PCR banks, and also contains a variable length of the data that
+ * was measured. This requires knowledge of how long each digest type is,
+ * and this information is contained within the first event in the log.
+ *
+ * We calculate the length by examining the number of events, and then looking
+ * at each event in turn to determine how much space is used for events in
+ * total. Once we've done this we know the offset of the data length field,
+ * and can calculate the total size of the event.
+ *
+ * Return: size of the event on success, <0 on failure
+ */
+
+static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ struct tcg_pcr_event *event_header,
+ bool do_mapping)
+{
+ struct tcg_efi_specid_event_head *efispecid;
+ struct tcg_event_field *event_field;
+ void *mapping = NULL;
+ int mapping_size;
+ void *marker;
+ void *marker_start;
+ u32 halg_size;
+ size_t size;
+ u16 halg;
+ int i;
+ int j;
+
+ marker = event;
+ marker_start = marker;
+ marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type)
+ + sizeof(event->count);
+
+ /* Map the event header */
+ if (do_mapping) {
+ mapping_size = marker - marker_start;
+ mapping = TPM_MEMREMAP((unsigned long)marker_start,
+ mapping_size);
+ if (!mapping) {
+ size = 0;
+ goto out;
+ }
+ } else {
+ mapping = marker_start;
+ }
+
+ event = (struct tcg_pcr_event2_head *)mapping;
+
+ efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
+
+ /* Check if event is malformed. */
+ if (event->count > efispecid->num_algs) {
+ size = 0;
+ goto out;
+ }
+
+ for (i = 0; i < event->count; i++) {
+ halg_size = sizeof(event->digests[i].alg_id);
+
+ /* Map the digest's algorithm identifier */
+ if (do_mapping) {
+ TPM_MEMUNMAP(mapping, mapping_size);
+ mapping_size = halg_size;
+ mapping = TPM_MEMREMAP((unsigned long)marker,
+ mapping_size);
+ if (!mapping) {
+ size = 0;
+ goto out;
+ }
+ } else {
+ mapping = marker;
+ }
+
+ memcpy(&halg, mapping, halg_size);
+ marker = marker + halg_size;
+
+ for (j = 0; j < efispecid->num_algs; j++) {
+ if (halg == efispecid->digest_sizes[j].alg_id) {
+ marker +=
+ efispecid->digest_sizes[j].digest_size;
+ break;
+ }
+ }
+ /* Algorithm without known length. Such event is unparseable. */
+ if (j == efispecid->num_algs) {
+ size = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * Map the event size - we don't read from the event itself, so
+ * we don't need to map it
+ */
+ if (do_mapping) {
+ TPM_MEMUNMAP(mapping, mapping_size);
+ mapping_size += sizeof(event_field->event_size);
+ mapping = TPM_MEMREMAP((unsigned long)marker,
+ mapping_size);
+ if (!mapping) {
+ size = 0;
+ goto out;
+ }
+ } else {
+ mapping = marker;
+ }
+
+ event_field = (struct tcg_event_field *)mapping;
+
+ marker = marker + sizeof(event_field->event_size)
+ + event_field->event_size;
+ size = marker - marker_start;
+
+ if ((event->event_type == 0) && (event_field->event_size == 0))
+ size = 0;
+out:
+ if (do_mapping)
+ TPM_MEMUNMAP(mapping, mapping_size);
+ return size;
+}
+
#endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 09d678433fc0..8446573cc682 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -184,6 +184,13 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
if (unlikely(current->task_works))
task_work_run();
+#ifdef CONFIG_KEYS_REQUEST_CACHE
+ if (unlikely(current->cached_requested_key)) {
+ key_put(current->cached_requested_key);
+ current->cached_requested_key = NULL;
+ }
+#endif
+
mem_cgroup_handle_over_high();
blkcg_maybe_throttle_current();
}
diff --git a/include/linux/types.h b/include/linux/types.h
index 231114ae38f4..05030f608be3 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -174,7 +174,7 @@ typedef struct {
#ifdef CONFIG_64BIT
typedef struct {
- long counter;
+ s64 counter;
} atomic64_t;
#endif
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index d6b74b91096b..fb9f4f799554 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -64,10 +64,20 @@ struct user_namespace {
struct ns_common ns;
unsigned long flags;
+#ifdef CONFIG_KEYS
+ /* List of joinable keyrings in this namespace. Modification access of
+ * these pointers is controlled by keyring_sem. Once
+ * user_keyring_register is set, it won't be changed, so it can be
+ * accessed directly with READ_ONCE().
+ */
+ struct list_head keyring_name_list;
+ struct key *user_keyring_register;
+ struct rw_semaphore keyring_sem;
+#endif
+
/* Register of per-UID persistent keyrings for this namespace */
#ifdef CONFIG_PERSISTENT_KEYRINGS
struct key *persistent_keyring_register;
- struct rw_semaphore persistent_keyring_register_sem;
#endif
struct work_struct work;
#ifdef CONFIG_SYSCTL
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d59525fca4d3..b7c585b5ec1c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,10 +435,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
extern void destroy_workqueue(struct workqueue_struct *wq);
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
-void free_workqueue_attrs(struct workqueue_attrs *attrs);
-int apply_workqueue_attrs(struct workqueue_struct *wq,
- const struct workqueue_attrs *attrs);
int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8fb5be3ca0ca..1fce25b1d87f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2007,7 +2007,7 @@ enum cfg80211_signal_type {
* received by the device (not just by the host, in case it was
* buffered on the device) and be accurate to about 10ms.
* If the frame isn't buffered, just passing the return value of
- * ktime_get_boot_ns() is likely appropriate.
+ * ktime_get_boottime_ns() is likely appropriate.
* @parent_tsf: the time at the start of reception of the first octet of the
* timestamp field of the frame. The time is the TSF of the BSS specified
* by %parent_bssid.
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12689ddfc24c..a56bf7fc7c2b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -71,6 +71,9 @@ struct net {
*/
struct llist_node cleanup_list; /* namespaces on death row */
+#ifdef CONFIG_KEYS
+ struct key_tag *key_domain; /* Key domain of operation tag */
+#endif
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
spinlock_t nsid_lock;
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c8c7c7efb487..420e80e56e55 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -594,6 +594,37 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
TP_printk("cpu=%d", __entry->cpu)
);
+
+/*
+ * Following tracepoints are not exported in tracefs and provide hooking
+ * mechanisms only for testing and debugging purposes.
+ *
+ * Postfixed with _tp to make them easily identifiable in the code.
+ */
+DECLARE_TRACE(pelt_cfs_tp,
+ TP_PROTO(struct cfs_rq *cfs_rq),
+ TP_ARGS(cfs_rq));
+
+DECLARE_TRACE(pelt_rt_tp,
+ TP_PROTO(struct rq *rq),
+ TP_ARGS(rq));
+
+DECLARE_TRACE(pelt_dl_tp,
+ TP_PROTO(struct rq *rq),
+ TP_ARGS(rq));
+
+DECLARE_TRACE(pelt_irq_tp,
+ TP_PROTO(struct rq *rq),
+ TP_ARGS(rq));
+
+DECLARE_TRACE(pelt_se_tp,
+ TP_PROTO(struct sched_entity *se),
+ TP_ARGS(se));
+
+DECLARE_TRACE(sched_overutilized_tp,
+ TP_PROTO(struct root_domain *rd, bool overutilized),
+ TP_ARGS(rd, overutilized));
+
#endif /* _TRACE_SCHED_H */
/* This part must be outside protection */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index a1280af20336..c89c6495983d 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -281,6 +281,7 @@
#define AUDIT_OBJ_GID 110
#define AUDIT_FIELD_COMPARE 111
#define AUDIT_EXE 112
+#define AUDIT_SADDR_FAM 113
#define AUDIT_ARG0 200
#define AUDIT_ARG1 (AUDIT_ARG0+1)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a8b823c30b43..489e118b69d2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -785,7 +785,7 @@ union bpf_attr {
* based on a user-provided identifier for all traffic coming from
* the tasks belonging to the related cgroup. See also the related
* kernel documentation, available from the Linux sources in file
- * *Documentation/cgroup-v1/net_cls.txt*.
+ * *Documentation/cgroup-v1/net_cls.rst*.
*
* The Linux kernel has two versions for cgroups: there are
* cgroups v1 and cgroups v2. Both are available to users, who can
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index f45ee0f69c0c..1f7a4e737214 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -15,6 +15,69 @@
#include <linux/types.h>
+/*
+ * Keyring permission grant definitions
+ */
+enum key_ace_subject_type {
+ KEY_ACE_SUBJ_STANDARD = 0, /* subject is one of key_ace_standard_subject */
+ nr__key_ace_subject_type
+};
+
+enum key_ace_standard_subject {
+ KEY_ACE_EVERYONE = 0, /* Everyone, including owner and group */
+ KEY_ACE_GROUP = 1, /* The key's group */
+ KEY_ACE_OWNER = 2, /* The owner of the key */
+ KEY_ACE_POSSESSOR = 3, /* Any process that possesses of the key */
+ nr__key_ace_standard_subject
+};
+
+#define KEY_ACE_VIEW 0x00000001 /* Can describe the key */
+#define KEY_ACE_READ 0x00000002 /* Can read the key content */
+#define KEY_ACE_WRITE 0x00000004 /* Can update/modify the key content */
+#define KEY_ACE_SEARCH 0x00000008 /* Can find the key by search */
+#define KEY_ACE_LINK 0x00000010 /* Can make a link to the key */
+#define KEY_ACE_SET_SECURITY 0x00000020 /* Can set owner, group, ACL */
+#define KEY_ACE_INVAL 0x00000040 /* Can invalidate the key */
+#define KEY_ACE_REVOKE 0x00000080 /* Can revoke the key */
+#define KEY_ACE_JOIN 0x00000100 /* Can join keyring */
+#define KEY_ACE_CLEAR 0x00000200 /* Can clear keyring */
+#define KEY_ACE__PERMS 0xffffffff
+
+/*
+ * Old-style permissions mask, deprecated in favour of ACL.
+ */
+#define KEY_POS_VIEW 0x01000000 /* possessor can view a key's attributes */
+#define KEY_POS_READ 0x02000000 /* possessor can read key payload / view keyring */
+#define KEY_POS_WRITE 0x04000000 /* possessor can update key payload / add link to keyring */
+#define KEY_POS_SEARCH 0x08000000 /* possessor can find a key in search / search a keyring */
+#define KEY_POS_LINK 0x10000000 /* possessor can create a link to a key/keyring */
+#define KEY_POS_SETATTR 0x20000000 /* possessor can set key attributes */
+#define KEY_POS_ALL 0x3f000000
+
+#define KEY_USR_VIEW 0x00010000 /* user permissions... */
+#define KEY_USR_READ 0x00020000
+#define KEY_USR_WRITE 0x00040000
+#define KEY_USR_SEARCH 0x00080000
+#define KEY_USR_LINK 0x00100000
+#define KEY_USR_SETATTR 0x00200000
+#define KEY_USR_ALL 0x003f0000
+
+#define KEY_GRP_VIEW 0x00000100 /* group permissions... */
+#define KEY_GRP_READ 0x00000200
+#define KEY_GRP_WRITE 0x00000400
+#define KEY_GRP_SEARCH 0x00000800
+#define KEY_GRP_LINK 0x00001000
+#define KEY_GRP_SETATTR 0x00002000
+#define KEY_GRP_ALL 0x00003f00
+
+#define KEY_OTH_VIEW 0x00000001 /* third party permissions... */
+#define KEY_OTH_READ 0x00000002
+#define KEY_OTH_WRITE 0x00000004
+#define KEY_OTH_SEARCH 0x00000008
+#define KEY_OTH_LINK 0x00000010
+#define KEY_OTH_SETATTR 0x00000020
+#define KEY_OTH_ALL 0x0000003f
+
/* special process keyring shortcut IDs */
#define KEY_SPEC_THREAD_KEYRING -1 /* - key ID for thread-specific keyring */
#define KEY_SPEC_PROCESS_KEYRING -2 /* - key ID for process-specific keyring */
@@ -67,6 +130,9 @@
#define KEYCTL_PKEY_SIGN 27 /* Create a public key signature */
#define KEYCTL_PKEY_VERIFY 28 /* Verify a public key signature */
#define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */
+#define KEYCTL_MOVE 30 /* Move keys between keyrings */
+#define KEYCTL_CAPABILITIES 31 /* Find capabilities of keyrings subsystem */
+#define KEYCTL_GRANT_PERMISSION 32 /* Grant a permit to a key */
/* keyctl structures */
struct keyctl_dh_params {
@@ -112,4 +178,22 @@ struct keyctl_pkey_params {
__u32 __spare[7];
};
+#define KEYCTL_MOVE_EXCL 0x00000001 /* Do not displace from the to-keyring */
+
+/*
+ * Capabilities flags. The capabilities list is an array of 8-bit integers;
+ * each integer can carry up to 8 flags.
+ */
+#define KEYCTL_CAPS0_CAPABILITIES 0x01 /* KEYCTL_CAPABILITIES supported */
+#define KEYCTL_CAPS0_PERSISTENT_KEYRINGS 0x02 /* Persistent keyrings enabled */
+#define KEYCTL_CAPS0_DIFFIE_HELLMAN 0x04 /* Diffie-Hellman computation enabled */
+#define KEYCTL_CAPS0_PUBLIC_KEY 0x08 /* Public key ops enabled */
+#define KEYCTL_CAPS0_BIG_KEY 0x10 /* big_key-type enabled */
+#define KEYCTL_CAPS0_INVALIDATE 0x20 /* KEYCTL_INVALIDATE supported */
+#define KEYCTL_CAPS0_RESTRICT_KEYRING 0x40 /* KEYCTL_RESTRICT_KEYRING supported */
+#define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */
+#define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */
+#define KEYCTL_CAPS1_NS_KEY_TAG 0x02 /* Key indexing can include a namespace tag */
+#define KEYCTL_CAPS1_ACL_ALTERABLE 0x04 /* Keys have internal ACL that can be altered */
+
#endif /* _LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index ed4ee170bee2..617bb59aa8ba 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -51,9 +51,21 @@
#define SCHED_FLAG_RESET_ON_FORK 0x01
#define SCHED_FLAG_RECLAIM 0x02
#define SCHED_FLAG_DL_OVERRUN 0x04
+#define SCHED_FLAG_KEEP_POLICY 0x08
+#define SCHED_FLAG_KEEP_PARAMS 0x10
+#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
+#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
+
+#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
+ SCHED_FLAG_KEEP_PARAMS)
+
+#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
+ SCHED_FLAG_UTIL_CLAMP_MAX)
#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
SCHED_FLAG_RECLAIM | \
- SCHED_FLAG_DL_OVERRUN)
+ SCHED_FLAG_DL_OVERRUN | \
+ SCHED_FLAG_KEEP_ALL | \
+ SCHED_FLAG_UTIL_CLAMP)
#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index 10fbb8031930..c852153ddb0d 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -9,6 +9,7 @@ struct sched_param {
};
#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */
+#define SCHED_ATTR_SIZE_VER1 56 /* add: util_{min,max} */
/*
* Extended scheduling parameters data structure.
@@ -21,8 +22,33 @@ struct sched_param {
* the tasks may be useful for a wide variety of application fields, e.g.,
* multimedia, streaming, automation and control, and many others.
*
- * This variant (sched_attr) is meant at describing a so-called
- * sporadic time-constrained task. In such model a task is specified by:
+ * This variant (sched_attr) allows to define additional attributes to
+ * improve the scheduler knowledge about task requirements.
+ *
+ * Scheduling Class Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes specifies the
+ * scheduling policy and relative POSIX attributes:
+ *
+ * @size size of the structure, for fwd/bwd compat.
+ *
+ * @sched_policy task's scheduling policy
+ * @sched_nice task's nice value (SCHED_NORMAL/BATCH)
+ * @sched_priority task's static priority (SCHED_FIFO/RR)
+ *
+ * Certain more advanced scheduling features can be controlled by a
+ * predefined set of flags via the attribute:
+ *
+ * @sched_flags for customizing the scheduler behaviour
+ *
+ * Sporadic Time-Constrained Task Attributes
+ * =========================================
+ *
+ * A subset of sched_attr attributes allows to describe a so-called
+ * sporadic time-constrained task.
+ *
+ * In such a model a task is specified by:
* - the activation period or minimum instance inter-arrival time;
* - the maximum (or average, depending on the actual scheduling
* discipline) computation time of all instances, a.k.a. runtime;
@@ -34,14 +60,8 @@ struct sched_param {
* than the runtime and must be completed by time instant t equal to
* the instance activation time + the deadline.
*
- * This is reflected by the actual fields of the sched_attr structure:
+ * This is reflected by the following fields of the sched_attr structure:
*
- * @size size of the structure, for fwd/bwd compat.
- *
- * @sched_policy task's scheduling policy
- * @sched_flags for customizing the scheduler behaviour
- * @sched_nice task's nice value (SCHED_NORMAL/BATCH)
- * @sched_priority task's static priority (SCHED_FIFO/RR)
* @sched_deadline representative of the task's deadline
* @sched_runtime representative of the task's runtime
* @sched_period representative of the task's period
@@ -53,6 +73,29 @@ struct sched_param {
* As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
* only user of this new interface. More information about the algorithm
* available in the scheduling class file or in Documentation/.
+ *
+ * Task Utilization Attributes
+ * ===========================
+ *
+ * A subset of sched_attr attributes allows to specify the utilization
+ * expected for a task. These attributes allow to inform the scheduler about
+ * the utilization boundaries within which it should schedule the task. These
+ * boundaries are valuable hints to support scheduler decisions on both task
+ * placement and frequency selection.
+ *
+ * @sched_util_min represents the minimum utilization
+ * @sched_util_max represents the maximum utilization
+ *
+ * Utilization is a value in the range [0..SCHED_CAPACITY_SCALE]. It
+ * represents the percentage of CPU time used by a task when running at the
+ * maximum frequency on the highest capacity CPU of the system. For example, a
+ * 20% utilization task is a task running for 2ms every 10ms at maximum
+ * frequency.
+ *
+ * A task with a min utilization value bigger than 0 is more likely scheduled
+ * on a CPU with a capacity big enough to fit the specified value.
+ * A task with a max utilization value smaller than 1024 is more likely
+ * scheduled on a CPU with no more capacity than the specified value.
*/
struct sched_attr {
__u32 size;
@@ -70,6 +113,11 @@ struct sched_attr {
__u64 sched_runtime;
__u64 sched_deadline;
__u64 sched_period;
+
+ /* Utilization hints */
+ __u32 sched_util_min;
+ __u32 sched_util_max;
+
};
#endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
new file mode 100644
index 000000000000..2e302c0f41f7
--- /dev/null
+++ b/include/vdso/datapage.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bits.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+#define VDSO_BASES (CLOCK_TAI + 1)
+#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
+ BIT(CLOCK_MONOTONIC) | \
+ BIT(CLOCK_BOOTTIME) | \
+ BIT(CLOCK_TAI))
+#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
+ BIT(CLOCK_MONOTONIC_COARSE))
+#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
+
+#define CS_HRES_COARSE 0
+#define CS_RAW 1
+#define CS_BASES (CS_RAW + 1)
+
+/**
+ * struct vdso_timestamp - basetime per clock_id
+ * @sec: seconds
+ * @nsec: nanoseconds
+ *
+ * There is one vdso_timestamp object in vvar for each vDSO-accelerated
+ * clock_id. For high-resolution clocks, this encodes the time
+ * corresponding to vdso_data.cycle_last. For coarse clocks this encodes
+ * the actual time.
+ *
+ * To be noticed that for highres clocks nsec is left-shifted by
+ * vdso_data.cs[x].shift.
+ */
+struct vdso_timestamp {
+ u64 sec;
+ u64 nsec;
+};
+
+/**
+ * struct vdso_data - vdso datapage representation
+ * @seq: timebase sequence counter
+ * @clock_mode: clock mode
+ * @cycle_last: timebase at clocksource init
+ * @mask: clocksource mask
+ * @mult: clocksource multiplier
+ * @shift: clocksource shift
+ * @basetime[clock_id]: basetime per clock_id
+ * @tz_minuteswest: minutes west of Greenwich
+ * @tz_dsttime: type of DST correction
+ * @hrtimer_res: hrtimer resolution
+ * @__unused: unused
+ *
+ * vdso_data will be accessed by 64 bit and compat code at the same time
+ * so we should be careful before modifying this structure.
+ */
+struct vdso_data {
+ u32 seq;
+
+ s32 clock_mode;
+ u64 cycle_last;
+ u64 mask;
+ u32 mult;
+ u32 shift;
+
+ struct vdso_timestamp basetime[VDSO_BASES];
+
+ s32 tz_minuteswest;
+ s32 tz_dsttime;
+ u32 hrtimer_res;
+ u32 __unused;
+};
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * must not be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation, and this is what we need.
+ */
+extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_DATAPAGE_H */
diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h
new file mode 100644
index 000000000000..01641dbb68ef
--- /dev/null
+++ b/include/vdso/helpers.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_HELPERS_H
+#define __VDSO_HELPERS_H
+
+#ifndef __ASSEMBLY__
+
+#include <vdso/datapage.h>
+
+static __always_inline u32 vdso_read_begin(const struct vdso_data *vd)
+{
+ u32 seq;
+
+ while ((seq = READ_ONCE(vd->seq)) & 1)
+ cpu_relax();
+
+ smp_rmb();
+ return seq;
+}
+
+static __always_inline u32 vdso_read_retry(const struct vdso_data *vd,
+ u32 start)
+{
+ u32 seq;
+
+ smp_rmb();
+ seq = READ_ONCE(vd->seq);
+ return seq != start;
+}
+
+static __always_inline void vdso_write_begin(struct vdso_data *vd)
+{
+ /*
+ * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * updates to vd[x].seq and it is possible that the value seen by the
+ * reader it is inconsistent.
+ */
+ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
+ WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
+ smp_wmb();
+}
+
+static __always_inline void vdso_write_end(struct vdso_data *vd)
+{
+ smp_wmb();
+ /*
+ * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * updates to vd[x].seq and it is possible that the value seen by the
+ * reader it is inconsistent.
+ */
+ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
+ WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_HELPERS_H */
diff --git a/include/vdso/vsyscall.h b/include/vdso/vsyscall.h
new file mode 100644
index 000000000000..2c6134e0c23d
--- /dev/null
+++ b/include/vdso/vsyscall.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_VSYSCALL_H
+#define __VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_VSYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 0e2344389501..bf96faf3fe43 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -677,6 +677,59 @@ config HAVE_UNSTABLE_SCHED_CLOCK
config GENERIC_SCHED_CLOCK
bool
+menu "Scheduler features"
+
+config UCLAMP_TASK
+ bool "Enable utilization clamping for RT/FAIR tasks"
+ depends on CPU_FREQ_GOV_SCHEDUTIL
+ help
+ This feature enables the scheduler to track the clamped utilization
+ of each CPU based on RUNNABLE tasks scheduled on that CPU.
+
+ With this option, the user can specify the min and max CPU
+ utilization allowed for RUNNABLE tasks. The max utilization defines
+ the maximum frequency a task should use while the min utilization
+ defines the minimum frequency it should use.
+
+ Both min and max utilization clamp values are hints to the scheduler,
+ aiming at improving its frequency selection policy, but they do not
+ enforce or grant any specific bandwidth for tasks.
+
+ If in doubt, say N.
+
+config UCLAMP_BUCKETS_COUNT
+ int "Number of supported utilization clamp buckets"
+ range 5 20
+ default 5
+ depends on UCLAMP_TASK
+ help
+ Defines the number of clamp buckets to use. The range of each bucket
+ will be SCHED_CAPACITY_SCALE/UCLAMP_BUCKETS_COUNT. The higher the
+ number of clamp buckets the finer their granularity and the higher
+ the precision of clamping aggregation and tracking at run-time.
+
+ For example, with the minimum configuration value we will have 5
+ clamp buckets tracking 20% utilization each. A 25% boosted tasks will
+ be refcounted in the [20..39]% bucket and will set the bucket clamp
+ effective value to 25%.
+ If a second 30% boosted task should be co-scheduled on the same CPU,
+ that task will be refcounted in the same bucket of the first task and
+ it will boost the bucket clamp effective value to 30%.
+ The clamp effective value of a bucket is reset to its nominal value
+ (20% in the example above) when there are no more tasks refcounted in
+ that bucket.
+
+ An additional boost/capping margin can be added to some tasks. In the
+ example above the 25% task will be boosted to 30% until it exits the
+ CPU. If that should be considered not acceptable on certain systems,
+ it's always possible to reduce the margin by increasing the number of
+ clamp buckets to trade off used memory for run-time tracking
+ precision.
+
+ If in doubt, use the default value.
+
+endmenu
+
#
# For architectures that want to enable the support for NUMA-affine scheduler
# balancing logic:
@@ -797,7 +850,7 @@ config BLK_CGROUP
CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
CONFIG_BLK_DEV_THROTTLING=y.
- See Documentation/cgroup-v1/blkio-controller.txt for more information.
+ See Documentation/cgroup-v1/blkio-controller.rst for more information.
config DEBUG_BLK_CGROUP
bool "IO controller debugging"
diff --git a/init/init_task.c b/init/init_task.c
index c70ef656d0f4..7ab773b9b3cd 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -72,7 +72,8 @@ struct task_struct init_task
.static_prio = MAX_PRIO - 20,
.normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL,
- .cpus_allowed = CPU_MASK_ALL,
+ .cpus_ptr = &init_task.cpus_mask,
+ .cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS,
.mm = NULL,
.active_mm = &init_mm,
@@ -166,6 +167,8 @@ struct task_struct init_task
.softirqs_enabled = 1,
#endif
#ifdef CONFIG_LOCKDEP
+ .lockdep_depth = 0, /* no locks held yet */
+ .curr_chain_key = INITIAL_CHAIN_KEY,
.lockdep_recursion = 0,
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/kernel/audit.c b/kernel/audit.c
index 486c968214d9..da8dc0db5bd3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2261,6 +2261,33 @@ out:
}
/**
+ * audit_signal_info - record signal info for shutting down audit subsystem
+ * @sig: signal value
+ * @t: task being signaled
+ *
+ * If the audit subsystem is being terminated, record the task (pid)
+ * and uid that is doing that.
+ */
+int audit_signal_info(int sig, struct task_struct *t)
+{
+ kuid_t uid = current_uid(), auid;
+
+ if (auditd_test_task(t) &&
+ (sig == SIGTERM || sig == SIGHUP ||
+ sig == SIGUSR1 || sig == SIGUSR2)) {
+ audit_sig_pid = task_tgid_nr(current);
+ auid = audit_get_loginuid(current);
+ if (uid_valid(auid))
+ audit_sig_uid = auid;
+ else
+ audit_sig_uid = uid;
+ security_task_getsecid(current, &audit_sig_sid);
+ }
+
+ return audit_signal_info_syscall(t);
+}
+
+/**
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
diff --git a/kernel/audit.h b/kernel/audit.h
index 6c076d4982da..6fb7160412d4 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -286,7 +286,7 @@ extern const char *audit_tree_path(struct audit_tree *tree);
extern void audit_put_tree(struct audit_tree *tree);
extern void audit_kill_trees(struct audit_context *context);
-extern int audit_signal_info(int sig, struct task_struct *t);
+extern int audit_signal_info_syscall(struct task_struct *t);
extern void audit_filter_inodes(struct task_struct *tsk,
struct audit_context *ctx);
extern struct list_head *audit_killed_trees(void);
@@ -317,7 +317,11 @@ extern struct list_head *audit_killed_trees(void);
#define audit_tree_path(rule) "" /* never called */
#define audit_kill_trees(context) BUG()
-#define audit_signal_info(s, t) AUDIT_DISABLED
+static inline int audit_signal_info_syscall(struct task_struct *t)
+{
+ return 0;
+}
+
#define audit_filter_inodes(t, c) AUDIT_DISABLED
#endif /* CONFIG_AUDITSYSCALL */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 9f8e190e3bea..b0126e9c0743 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -322,7 +322,7 @@ static u32 audit_to_op(u32 op)
/* check if an audit field is valid */
static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
{
- switch(f->type) {
+ switch (f->type) {
case AUDIT_MSGTYPE:
if (entry->rule.listnr != AUDIT_FILTER_EXCLUDE &&
entry->rule.listnr != AUDIT_FILTER_USER)
@@ -334,7 +334,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
break;
}
- switch(entry->rule.listnr) {
+ switch (entry->rule.listnr) {
case AUDIT_FILTER_FS:
switch(f->type) {
case AUDIT_FSTYPE:
@@ -345,9 +345,16 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
}
}
- switch(f->type) {
- default:
- return -EINVAL;
+ /* Check for valid field type and op */
+ switch (f->type) {
+ case AUDIT_ARG0:
+ case AUDIT_ARG1:
+ case AUDIT_ARG2:
+ case AUDIT_ARG3:
+ case AUDIT_PERS: /* <uapi/linux/personality.h> */
+ case AUDIT_DEVMINOR:
+ /* all ops are valid */
+ break;
case AUDIT_UID:
case AUDIT_EUID:
case AUDIT_SUID:
@@ -360,46 +367,53 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
case AUDIT_FSGID:
case AUDIT_OBJ_GID:
case AUDIT_PID:
- case AUDIT_PERS:
case AUDIT_MSGTYPE:
case AUDIT_PPID:
case AUDIT_DEVMAJOR:
- case AUDIT_DEVMINOR:
case AUDIT_EXIT:
case AUDIT_SUCCESS:
case AUDIT_INODE:
case AUDIT_SESSIONID:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
+ case AUDIT_SADDR_FAM:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
return -EINVAL;
break;
- case AUDIT_ARG0:
- case AUDIT_ARG1:
- case AUDIT_ARG2:
- case AUDIT_ARG3:
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
- case AUDIT_SUBJ_SEN:
- case AUDIT_SUBJ_CLR:
case AUDIT_OBJ_USER:
case AUDIT_OBJ_ROLE:
case AUDIT_OBJ_TYPE:
- case AUDIT_OBJ_LEV_LOW:
- case AUDIT_OBJ_LEV_HIGH:
case AUDIT_WATCH:
case AUDIT_DIR:
case AUDIT_FILTERKEY:
- break;
case AUDIT_LOGINUID_SET:
- if ((f->val != 0) && (f->val != 1))
- return -EINVAL;
- /* FALL THROUGH */
case AUDIT_ARCH:
case AUDIT_FSTYPE:
+ case AUDIT_PERM:
+ case AUDIT_FILETYPE:
+ case AUDIT_FIELD_COMPARE:
+ case AUDIT_EXE:
+ /* only equal and not equal valid ops */
if (f->op != Audit_not_equal && f->op != Audit_equal)
return -EINVAL;
break;
+ default:
+ /* field not recognized */
+ return -EINVAL;
+ }
+
+ /* Check for select valid field values */
+ switch (f->type) {
+ case AUDIT_LOGINUID_SET:
+ if ((f->val != 0) && (f->val != 1))
+ return -EINVAL;
+ break;
case AUDIT_PERM:
if (f->val & ~15)
return -EINVAL;
@@ -412,11 +426,14 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
if (f->val > AUDIT_MAX_FIELD_COMPARE)
return -EINVAL;
break;
- case AUDIT_EXE:
- if (f->op != Audit_not_equal && f->op != Audit_equal)
+ case AUDIT_SADDR_FAM:
+ if (f->val >= AF_MAX)
return -EINVAL;
break;
+ default:
+ break;
}
+
return 0;
}
@@ -1190,7 +1207,6 @@ int audit_comparator(u32 left, u32 op, u32 right)
case Audit_bittest:
return ((left & right) == right);
default:
- BUG();
return 0;
}
}
@@ -1213,7 +1229,6 @@ int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
case Audit_bitmask:
case Audit_bittest:
default:
- BUG();
return 0;
}
}
@@ -1236,7 +1251,6 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right)
case Audit_bitmask:
case Audit_bittest:
default:
- BUG();
return 0;
}
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 95ae27edd417..4effe01ebbe2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -601,12 +601,20 @@ static int audit_filter_rules(struct task_struct *tsk,
}
break;
case AUDIT_WATCH:
- if (name)
- result = audit_watch_compare(rule->watch, name->ino, name->dev);
+ if (name) {
+ result = audit_watch_compare(rule->watch,
+ name->ino,
+ name->dev);
+ if (f->op == Audit_not_equal)
+ result = !result;
+ }
break;
case AUDIT_DIR:
- if (ctx)
+ if (ctx) {
result = match_tree_refs(ctx, rule->tree);
+ if (f->op == Audit_not_equal)
+ result = !result;
+ }
break;
case AUDIT_LOGINUID:
result = audit_uid_comparator(audit_get_loginuid(tsk),
@@ -615,6 +623,11 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_LOGINUID_SET:
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
break;
+ case AUDIT_SADDR_FAM:
+ if (ctx->sockaddr)
+ result = audit_comparator(ctx->sockaddr->ss_family,
+ f->op, f->val);
+ break;
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
@@ -684,9 +697,13 @@ static int audit_filter_rules(struct task_struct *tsk,
break;
case AUDIT_PERM:
result = audit_match_perm(ctx, f->val);
+ if (f->op == Audit_not_equal)
+ result = !result;
break;
case AUDIT_FILETYPE:
result = audit_match_filetype(ctx, f->val);
+ if (f->op == Audit_not_equal)
+ result = !result;
break;
case AUDIT_FIELD_COMPARE:
result = audit_field_compare(tsk, cred, f, ctx, name);
@@ -2360,30 +2377,17 @@ void __audit_ptrace(struct task_struct *t)
}
/**
- * audit_signal_info - record signal info for shutting down audit subsystem
- * @sig: signal value
+ * audit_signal_info_syscall - record signal info for syscalls
* @t: task being signaled
*
* If the audit subsystem is being terminated, record the task (pid)
* and uid that is doing that.
*/
-int audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info_syscall(struct task_struct *t)
{
struct audit_aux_data_pids *axp;
struct audit_context *ctx = audit_context();
- kuid_t uid = current_uid(), auid, t_uid = task_uid(t);
-
- if (auditd_test_task(t) &&
- (sig == SIGTERM || sig == SIGHUP ||
- sig == SIGUSR1 || sig == SIGUSR2)) {
- audit_sig_pid = task_tgid_nr(current);
- auid = audit_get_loginuid(current);
- if (uid_valid(auid))
- audit_sig_uid = auid;
- else
- audit_sig_uid = uid;
- security_task_getsecid(current, &audit_sig_sid);
- }
+ kuid_t t_uid = task_uid(t);
if (!audit_signals || audit_dummy_context())
return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 42d17f730780..5b30f8baaf02 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1668,7 +1668,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (err < 0)
goto free_prog;
- prog->aux->load_time = ktime_get_boot_ns();
+ prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
if (err)
goto free_prog;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bf9dbffd46b1..aaba2a41562a 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
#define cgroup_assert_mutex_or_rcu_locked() \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
@@ -5666,7 +5666,6 @@ int __init cgroup_init(void)
int ssid;
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
- BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
@@ -6241,6 +6240,48 @@ struct cgroup *cgroup_get_from_fd(int fd)
}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+static u64 power_of_ten(int power)
+{
+ u64 v = 1;
+ while (power--)
+ v *= 10;
+ return v;
+}
+
+/**
+ * cgroup_parse_float - parse a floating number
+ * @input: input string
+ * @dec_shift: number of decimal digits to shift
+ * @v: output
+ *
+ * Parse a decimal floating point number in @input and store the result in
+ * @v with decimal point right shifted @dec_shift times. For example, if
+ * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345.
+ * Returns 0 on success, -errno otherwise.
+ *
+ * There's nothing cgroup specific about this function except that it's
+ * currently the only user.
+ */
+int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
+{
+ s64 whole, frac = 0;
+ int fstart = 0, fend = 0, flen;
+
+ if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
+ return -EINVAL;
+ if (frac < 0)
+ return -EINVAL;
+
+ flen = fend > fstart ? fend - fstart : 0;
+ if (flen < dec_shift)
+ frac *= power_of_ten(dec_shift - flen);
+ else
+ frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
+
+ *v = whole * power_of_ten(dec_shift) + frac;
+ return 0;
+}
+
/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
@@ -6403,4 +6444,5 @@ static int __init cgroup_sysfs_init(void)
return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
}
subsys_initcall(cgroup_sysfs_init);
+
#endif /* CONFIG_SYSFS */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 515525ff1cfd..b3b02b9c4405 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -729,7 +729,7 @@ static inline int nr_cpusets(void)
* load balancing domains (sched domains) as specified by that partial
* partition.
*
- * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.txt
+ * See "What is sched_load_balance" in Documentation/cgroup-v1/cpusets.rst
* for a background explanation of this.
*
* Does not return errors, on the theory that the callers of this
@@ -2829,7 +2829,7 @@ static void cpuset_fork(struct task_struct *task)
if (task_css_is_root(task, cpuset_cgrp_id))
return;
- set_cpus_allowed_ptr(task, &current->cpus_allowed);
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
task->mems_allowed = current->mems_allowed;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ef1c565edc5d..e84c0873559e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -522,7 +522,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
/*
* SMT soft disabling on X86 requires to bring the CPU out of the
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
- * CPU marked itself as booted_once in cpu_notify_starting() so the
+ * CPU marked itself as booted_once in notify_cpu_starting() so the
* cpu_smt_allowed() check will now return false if this is not the
* primary sibling.
*/
@@ -1221,6 +1221,13 @@ int freeze_secondary_cpus(int primary)
for_each_online_cpu(cpu) {
if (cpu == primary)
continue;
+
+ if (pm_wakeup_pending()) {
+ pr_info("Wakeup pending. Abort CPU freeze\n");
+ error = -EBUSY;
+ break;
+ }
+
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
diff --git a/kernel/cred.c b/kernel/cred.c
index c73a87a4df13..f9a0ce66c9c3 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -170,6 +170,11 @@ void exit_creds(struct task_struct *tsk)
validate_creds(cred);
alter_cred_subscribers(cred, -1);
put_cred(cred);
+
+#ifdef CONFIG_KEYS_REQUEST_CACHE
+ key_put(current->cached_requested_key);
+ current->cached_requested_key = NULL;
+#endif
}
/**
@@ -323,6 +328,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
struct cred *new;
int ret;
+#ifdef CONFIG_KEYS_REQUEST_CACHE
+ p->cached_requested_key = NULL;
+#endif
+
if (
#ifdef CONFIG_KEYS
!p->cred->thread_keyring &&
@@ -460,9 +469,9 @@ int commit_creds(struct cred *new)
/* alter the thread keyring */
if (!uid_eq(new->fsuid, old->fsuid))
- key_fsuid_changed(task);
+ key_fsuid_changed(new);
if (!gid_eq(new->fsgid, old->fsgid))
- key_fsgid_changed(task);
+ key_fsgid_changed(new);
/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f85929ce13be..29e5f7880a4b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10693,11 +10693,11 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
break;
case CLOCK_BOOTTIME:
- event->clock = &ktime_get_boot_ns;
+ event->clock = &ktime_get_boottime_ns;
break;
case CLOCK_TAI:
- event->clock = &ktime_get_tai_ns;
+ event->clock = &ktime_get_clocktai_ns;
break;
default:
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..84fa00497c49 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -46,7 +46,7 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
-static struct percpu_rw_semaphore dup_mmap_sem;
+DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
@@ -2112,7 +2112,7 @@ static void handle_trampoline(struct pt_regs *regs)
sigill:
uprobe_warn(current, "handle uretprobe, sending SIGILL.");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
@@ -2228,7 +2228,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");
- force_sig(SIGILL, current);
+ force_sig(SIGILL);
}
}
@@ -2302,7 +2302,5 @@ void __init uprobes_init(void)
for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mmap_mutex[i]);
- BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
-
BUG_ON(register_die_notifier(&uprobe_exception_nb));
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 61667909ce83..847dd147b068 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -898,6 +898,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_STACKPROTECTOR
tsk->stack_canary = get_random_canary();
#endif
+ if (orig->cpus_ptr == &orig->cpus_mask)
+ tsk->cpus_ptr = &tsk->cpus_mask;
/*
* One for us, one for whoever does the "release_task()" (usually
@@ -1952,9 +1954,6 @@ static __latent_entropy struct task_struct *copy_process(
p->pagefault_disabled = 0;
#ifdef CONFIG_LOCKDEP
- p->lockdep_depth = 0; /* no locks held yet */
- p->curr_chain_key = 0;
- p->lockdep_recursion = 0;
lockdep_init_task(p);
#endif
@@ -2036,6 +2035,7 @@ static __latent_entropy struct task_struct *copy_process(
O_RDWR | O_CLOEXEC);
if (IS_ERR(pidfile)) {
put_unused_fd(pidfd);
+ retval = PTR_ERR(pidfile);
goto bad_fork_free_pid;
}
get_pid(pid); /* held by pidfile now */
@@ -2116,7 +2116,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boot_ns();
+ p->real_start_time = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
diff --git a/kernel/futex.c b/kernel/futex.c
index 4b5b468c58b6..6d50728ef2e7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -471,6 +471,37 @@ enum futex_access {
};
/**
+ * futex_setup_timer - set up the sleeping hrtimer.
+ * @time: ptr to the given timeout value
+ * @timeout: the hrtimer_sleeper structure to be set up
+ * @flags: futex flags
+ * @range_ns: optional range in ns
+ *
+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
+ * value given
+ */
+static inline struct hrtimer_sleeper *
+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
+ int flags, u64 range_ns)
+{
+ if (!time)
+ return NULL;
+
+ hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
+ CLOCK_REALTIME : CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper(timeout, current);
+
+ /*
+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
+ * effectively the same as calling hrtimer_set_expires().
+ */
+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
+
+ return timeout;
+}
+
+/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
@@ -2679,7 +2710,7 @@ out:
static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct restart_block *restart;
struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
@@ -2689,17 +2720,8 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
return -EINVAL;
q.bitset = bitset;
- if (abs_time) {
- to = &timeout;
-
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
- }
-
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
retry:
/*
* Prepare to wait on uaddr. On success, holds hb lock and increments
@@ -2779,7 +2801,7 @@ static long futex_wait_restart(struct restart_block *restart)
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@@ -2792,13 +2814,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
if (refill_pi_state_cache())
return -ENOMEM;
- if (time) {
- to = &timeout;
- hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires(&to->timer, *time);
- }
+ to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
retry:
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
@@ -3195,7 +3211,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 val, ktime_t *abs_time, u32 bitset,
u32 __user *uaddr2)
{
- struct hrtimer_sleeper timeout, *to = NULL;
+ struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@@ -3212,15 +3228,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (!bitset)
return -EINVAL;
- if (abs_time) {
- to = &timeout;
- hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
- CLOCK_REALTIME : CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS);
- hrtimer_init_sleeper(to, current);
- hrtimer_set_expires_range_ns(&to->timer, *abs_time,
- current->timer_slack_ns);
- }
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
/*
* The waiter is allocated on our stack, manipulated by the requeue
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index ff6e352e3a6c..b4f53717d143 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -2,6 +2,9 @@
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
obj-$(CONFIG_IRQ_TIMINGS) += timings.o
+ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y)
+ CFLAGS_timings.o += -DDEBUG
+endif
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f18cd5aa33e8..4352b08ae48d 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -94,8 +94,7 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
return nodes;
}
-static int __irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec,
+static int __irq_build_affinity_masks(unsigned int startvec,
unsigned int numvecs,
unsigned int firstvec,
cpumask_var_t *node_to_cpumask,
@@ -171,8 +170,7 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
* 1) spread present CPU on these vectors
* 2) spread other possible CPUs on these vectors
*/
-static int irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec, unsigned int numvecs,
+static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
unsigned int firstvec,
struct irq_affinity_desc *masks)
{
@@ -197,7 +195,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
build_node_to_cpumask(node_to_cpumask);
/* Spread on present CPUs starting from affd->pre_vectors */
- nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_present = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
cpu_present_mask, nmsk, masks);
@@ -212,7 +210,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
else
curvec = firstvec + nr_present;
cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
- nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_others = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
npresmsk, nmsk, masks);
put_online_cpus();
@@ -295,7 +293,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
unsigned int this_vecs = affd->set_size[i];
int ret;
- ret = irq_build_affinity_masks(affd, curvec, this_vecs,
+ ret = irq_build_affinity_masks(curvec, this_vecs,
curvec, masks);
if (ret) {
kfree(masks);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16cbf6beb276..ae60cae24e9a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val)
mask |= 1 << i;
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
nr_of_irqs++;
}
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 29d6c7d070b4..b76703b2c0af 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
}
irq_state_clr_started(desc);
}
+}
+
+
+void irq_shutdown_and_deactivate(struct irq_desc *desc)
+{
+ irq_shutdown(desc);
/*
* This must be called even if the interrupt was never started up,
* because the activation can happen before the interrupt is
@@ -748,6 +754,8 @@ void handle_fasteoi_nmi(struct irq_desc *desc)
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
/*
* NMIs cannot be shared, there is only one action.
@@ -962,6 +970,8 @@ void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
trace_irq_handler_exit(irq, action, res);
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 5b1072e394b2..6c7ca2e983a5 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 70c3053bc1f6..3924fbe829d4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
@@ -96,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { }
extern void irq_mark_irq(unsigned int irq);
#endif
+extern int __irq_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool *state);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
@@ -354,6 +359,16 @@ static inline int irq_timing_decode(u64 value, u64 *timestamp)
return value & U16_MAX;
}
+static __always_inline void irq_timings_push(u64 ts, int irq)
+{
+ struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+ timings->values[timings->count & IRQ_TIMINGS_MASK] =
+ irq_timing_encode(ts, irq);
+
+ timings->count++;
+}
+
/*
* The function record_irq_time is only called in one place in the
* interrupts handler. We want this function always inline so the code
@@ -367,15 +382,8 @@ static __always_inline void record_irq_time(struct irq_desc *desc)
if (!static_branch_likely(&irq_timing_enabled))
return;
- if (desc->istate & IRQS_TIMINGS) {
- struct irq_timings *timings = this_cpu_ptr(&irq_timings);
-
- timings->values[timings->count & IRQ_TIMINGS_MASK] =
- irq_timing_encode(local_clock(),
- irq_desc_get_irq(desc));
-
- timings->count++;
- }
+ if (desc->istate & IRQS_TIMINGS)
+ irq_timings_push(local_clock(), irq_desc_get_irq(desc));
}
#else
static inline void irq_remove_timings(struct irq_desc *desc) {}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index c52b737ab8e3..9484e88dabc2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
* @hwirq: The HW irq number to convert to a logical one
* @regs: Register file coming from the low-level handling code
*
+ * This function must be called from an NMI context.
+ *
* Returns: 0 on success, or -EINVAL if conversion has failed
*/
int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
@@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq;
int ret = 0;
- nmi_enter();
+ /*
+ * NMI context needs to be setup earlier in order to deal with tracing.
+ */
+ WARN_ON(!in_nmi());
irq = irq_find_mapping(domain, hwirq);
@@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
else
ret = -EINVAL;
- nmi_exit();
set_irq_regs(old_regs);
return ret;
}
@@ -946,6 +950,11 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
+static bool irq_is_nmi(struct irq_desc *desc)
+{
+ return desc->istate & IRQS_NMI;
+}
+
/**
* kstat_irqs - Get the statistics for an interrupt
* @irq: The interrupt number
@@ -963,7 +972,8 @@ unsigned int kstat_irqs(unsigned int irq)
if (!desc || !desc->kstat_irqs)
return 0;
if (!irq_settings_is_per_cpu_devid(desc) &&
- !irq_settings_is_per_cpu(desc))
+ !irq_settings_is_per_cpu(desc) &&
+ !irq_is_nmi(desc))
return desc->tot_count;
for_each_possible_cpu(cpu)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index a453e229f99c..3078d0e48bba 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
* @ops: domain callbacks
* @host_data: Controller private data pointer
*
- * Allocates and initialize and irq_domain structure.
+ * Allocates and initializes an irq_domain structure.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
@@ -139,7 +139,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
- if (WARN_ON(!domain))
+ if (!domain)
return NULL;
if (fwnode && is_fwnode_irqchip(fwnode)) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 78f3ddeb7fe4..e8f7f179bf77 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
@@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg)
early_param("threadirqs", setup_forced_irqthreads);
#endif
-static void __synchronize_hardirq(struct irq_desc *desc)
+static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
{
+ struct irq_data *irqd = irq_desc_get_irq_data(desc);
bool inprogress;
do {
@@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc)
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
inprogress = irqd_irq_inprogress(&desc->irq_data);
+
+ /*
+ * If requested and supported, check at the chip whether it
+ * is in flight at the hardware level, i.e. already pending
+ * in a CPU and waiting for service and acknowledge.
+ */
+ if (!inprogress && sync_chip) {
+ /*
+ * Ignore the return code. inprogress is only updated
+ * when the chip supports it.
+ */
+ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+ &inprogress);
+ }
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
@@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc)
* Returns: false if a threaded handler is active.
*
* This function may be called - with care - from IRQ context.
+ *
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when
+ * called with interrupts disabled and the target CPU of the interrupt
+ * is the current CPU.
*/
bool synchronize_hardirq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, false);
return !atomic_read(&desc->threads_active);
}
@@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq);
* to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock.
*
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
+ *
+ * It optionally makes sure (when the irq chip supports that method)
+ * that the interrupt is not pending in any CPU and waiting for
+ * service.
*/
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, true);
/*
* We made sure that no hardirq handler is
* running. Now verify that no threaded handlers are
@@ -1699,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
+ /* Only shutdown. Deactivate after synchronize_hardirq() */
irq_shutdown(desc);
}
@@ -1727,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
unregister_handler_proc(irq, action);
- /* Make sure it's not being used on another CPU: */
- synchronize_hardirq(irq);
+ /*
+ * Make sure it's not being used on another CPU and if the chip
+ * supports it also make sure that there is no (not yet serviced)
+ * interrupt in flight at the hardware level.
+ */
+ __synchronize_hardirq(desc, true);
#ifdef CONFIG_DEBUG_SHIRQ
/*
@@ -1768,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* require it to deallocate resources over the slow bus.
*/
chip_bus_lock(desc);
+ /*
+ * There is no interrupt on the fly anymore. Deactivate it
+ * completely.
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_domain_deactivate_irq(&desc->irq_data);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
irq_release_resources(desc);
chip_bus_sync_unlock(desc);
irq_remove_timings(desc);
@@ -1855,7 +1894,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
}
irq_settings_clr_disable_unlazy(desc);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
irq_release_resources(desc);
@@ -2578,6 +2617,28 @@ out:
irq_put_desc_unlock(desc, flags);
}
+int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
+ bool *state)
+{
+ struct irq_chip *chip;
+ int err = -EINVAL;
+
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip->irq_get_irqchip_state)
+ break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ data = data->parent_data;
+#else
+ data = NULL;
+#endif
+ } while (data);
+
+ if (data)
+ err = chip->irq_get_irqchip_state(data, which, state);
+ return err;
+}
+
/**
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
* @irq: Interrupt line that is forwarded to a VM
@@ -2596,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
{
struct irq_desc *desc;
struct irq_data *data;
- struct irq_chip *chip;
unsigned long flags;
int err = -EINVAL;
@@ -2606,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
data = irq_desc_get_irq_data(desc);
- do {
- chip = irq_data_get_irq_chip(data);
- if (chip->irq_get_irqchip_state)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
-
- if (data)
- err = chip->irq_get_irqchip_state(data, which, state);
+ err = __irq_get_irqchip_state(data, which, state);
irq_put_desc_busunlock(desc, flags);
return err;
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 90c735da15d0..e960d7ce7bcc 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define pr_fmt(fmt) "irq_timings: " fmt
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/static_key.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/idr.h>
#include <linux/irq.h>
@@ -261,12 +263,29 @@ void irq_timings_disable(void)
#define EMA_ALPHA_VAL 64
#define EMA_ALPHA_SHIFT 7
-#define PREDICTION_PERIOD_MIN 2
+#define PREDICTION_PERIOD_MIN 3
#define PREDICTION_PERIOD_MAX 5
#define PREDICTION_FACTOR 4
#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
+/*
+ * Number of elements in the circular buffer: If it happens it was
+ * flushed before, then the number of elements could be smaller than
+ * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
+ * used as we wrapped. The index begins from zero when we did not
+ * wrap. That could be done in a nicer way with the proper circular
+ * array structure type but with the cost of extra computation in the
+ * interrupt handler hot path. We choose efficiency.
+ */
+#define for_each_irqts(i, irqts) \
+ for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
+ 0 : irqts->count & IRQ_TIMINGS_MASK, \
+ irqts->count = min(IRQ_TIMINGS_SIZE, \
+ irqts->count); \
+ irqts->count > 0; irqts->count--, \
+ i = (i + 1) & IRQ_TIMINGS_MASK)
+
struct irqt_stat {
u64 last_ts;
u64 ema_time[PREDICTION_BUFFER_SIZE];
@@ -297,7 +316,16 @@ static u64 irq_timings_ema_new(u64 value, u64 ema_old)
static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
{
- int i;
+ int period;
+
+ /*
+ * Move the beginning pointer to the end minus the max period x 3.
+ * We are at the point we can begin searching the pattern
+ */
+ buffer = &buffer[len - (period_max * 3)];
+
+ /* Adjust the length to the maximum allowed period x 3 */
+ len = period_max * 3;
/*
* The buffer contains the suite of intervals, in a ilog2
@@ -306,21 +334,45 @@ static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
* period beginning at the end of the buffer. We do that for
* each suffix.
*/
- for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
+ for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
- int *begin = &buffer[len - (i * 3)];
- int *ptr = begin;
+ /*
+ * The first comparison always succeed because the
+ * suffix is deduced from the first n-period bytes of
+ * the buffer and we compare the initial suffix with
+ * itself, so we can skip the first iteration.
+ */
+ int idx = period;
+ size_t size = period;
/*
* We look if the suite with period 'i' repeat
* itself. If it is truncated at the end, as it
* repeats we can use the period to find out the next
- * element.
+ * element with the modulo.
*/
- while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
- ptr += i;
- if (ptr >= &buffer[len])
- return begin[((i * 3) % i)];
+ while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
+
+ /*
+ * Move the index in a period basis
+ */
+ idx += size;
+
+ /*
+ * If this condition is reached, all previous
+ * memcmp were successful, so the period is
+ * found.
+ */
+ if (idx == len)
+ return buffer[len % period];
+
+ /*
+ * If the remaining elements to compare are
+ * smaller than the period, readjust the size
+ * of the comparison for the last iteration.
+ */
+ if (len - idx < period)
+ size = len - idx;
}
}
@@ -380,11 +432,43 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
return irqs->last_ts + irqs->ema_time[index];
}
+static __always_inline int irq_timings_interval_index(u64 interval)
+{
+ /*
+ * The PREDICTION_FACTOR increase the interval size for the
+ * array of exponential average.
+ */
+ u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
+
+ return likely(interval_us) ? ilog2(interval_us) : 0;
+}
+
+static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
+ u64 interval)
+{
+ int index;
+
+ /*
+ * Get the index in the ema table for this interrupt.
+ */
+ index = irq_timings_interval_index(interval);
+
+ /*
+ * Store the index as an element of the pattern in another
+ * circular array.
+ */
+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
+
+ irqs->ema_time[index] = irq_timings_ema_new(interval,
+ irqs->ema_time[index]);
+
+ irqs->count++;
+}
+
static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
{
u64 old_ts = irqs->last_ts;
u64 interval;
- int index;
/*
* The timestamps are absolute time values, we need to compute
@@ -415,24 +499,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
return;
}
- /*
- * Get the index in the ema table for this interrupt. The
- * PREDICTION_FACTOR increase the interval size for the array
- * of exponential average.
- */
- index = likely(interval) ?
- ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
-
- /*
- * Store the index as an element of the pattern in another
- * circular array.
- */
- irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
-
- irqs->ema_time[index] = irq_timings_ema_new(interval,
- irqs->ema_time[index]);
-
- irqs->count++;
+ __irq_timings_store(irq, irqs, interval);
}
/**
@@ -493,11 +560,7 @@ u64 irq_timings_next_event(u64 now)
* model while decrementing the counter because we consume the
* data from our circular buffer.
*/
-
- i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
-
- for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+ for_each_irqts(i, irqts) {
irq = irq_timing_decode(irqts->values[i], &ts);
s = idr_find(&irqt_stats, irq);
if (s)
@@ -564,3 +627,325 @@ int irq_timings_alloc(int irq)
return 0;
}
+
+#ifdef CONFIG_TEST_IRQ_TIMINGS
+struct timings_intervals {
+ u64 *intervals;
+ size_t count;
+};
+
+/*
+ * Intervals are given in nanosecond base
+ */
+static u64 intervals0[] __initdata = {
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000,
+};
+
+static u64 intervals1[] __initdata = {
+ 223947000, 1240000, 1384000, 1386000, 1386000,
+ 217416000, 1236000, 1384000, 1386000, 1387000,
+ 214719000, 1241000, 1386000, 1387000, 1384000,
+ 213696000, 1234000, 1384000, 1386000, 1388000,
+ 219904000, 1240000, 1385000, 1389000, 1385000,
+ 212240000, 1240000, 1386000, 1386000, 1386000,
+ 214415000, 1236000, 1384000, 1386000, 1387000,
+ 214276000, 1234000,
+};
+
+static u64 intervals2[] __initdata = {
+ 4000, 3000, 5000, 100000,
+ 3000, 3000, 5000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 5000, 3000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 4000, 5000, 112000,
+ 4000,
+};
+
+static u64 intervals3[] __initdata = {
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+};
+
+static u64 intervals4[] __initdata = {
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000,
+};
+
+static struct timings_intervals tis[] __initdata = {
+ { intervals0, ARRAY_SIZE(intervals0) },
+ { intervals1, ARRAY_SIZE(intervals1) },
+ { intervals2, ARRAY_SIZE(intervals2) },
+ { intervals3, ARRAY_SIZE(intervals3) },
+ { intervals4, ARRAY_SIZE(intervals4) },
+};
+
+static int __init irq_timings_test_next_index(struct timings_intervals *ti)
+{
+ int _buffer[IRQ_TIMINGS_SIZE];
+ int buffer[IRQ_TIMINGS_SIZE];
+ int index, start, i, count, period_max;
+
+ count = ti->count - 1;
+
+ period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
+ PREDICTION_PERIOD_MAX : count / 3;
+
+ /*
+ * Inject all values except the last one which will be used
+ * to compare with the next index result.
+ */
+ pr_debug("index suite: ");
+
+ for (i = 0; i < count; i++) {
+ index = irq_timings_interval_index(ti->intervals[i]);
+ _buffer[i & IRQ_TIMINGS_MASK] = index;
+ pr_cont("%d ", index);
+ }
+
+ start = count < IRQ_TIMINGS_SIZE ? 0 :
+ count & IRQ_TIMINGS_MASK;
+
+ count = min_t(int, count, IRQ_TIMINGS_SIZE);
+
+ for (i = 0; i < count; i++) {
+ int index = (start + i) & IRQ_TIMINGS_MASK;
+ buffer[i] = _buffer[index];
+ }
+
+ index = irq_timings_next_event_index(buffer, count, period_max);
+ i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
+
+ if (index != i) {
+ pr_err("Expected (%d) and computed (%d) next indexes differ\n",
+ i, index);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init irq_timings_next_index_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+
+ ret = irq_timings_test_next_index(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqs(struct timings_intervals *ti)
+{
+ struct irqt_stat __percpu *s;
+ struct irqt_stat *irqs;
+ int i, index, ret, irq = 0xACE5;
+
+ ret = irq_timings_alloc(irq);
+ if (ret) {
+ pr_err("Failed to allocate irq timings\n");
+ return ret;
+ }
+
+ s = idr_find(&irqt_stats, irq);
+ if (!s) {
+ ret = -EIDRM;
+ goto out;
+ }
+
+ irqs = this_cpu_ptr(s);
+
+ for (i = 0; i < ti->count; i++) {
+
+ index = irq_timings_interval_index(ti->intervals[i]);
+ pr_debug("%d: interval=%llu ema_index=%d\n",
+ i, ti->intervals[i], index);
+
+ __irq_timings_store(irq, irqs, ti->intervals[i]);
+ if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
+ pr_err("Failed to store in the circular buffer\n");
+ goto out;
+ }
+ }
+
+ if (irqs->count != ti->count) {
+ pr_err("Count differs\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ irq_timings_free(irq);
+
+ return ret;
+}
+
+static int __init irq_timings_irqs_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+ ret = irq_timings_test_irqs(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqts(struct irq_timings *irqts,
+ unsigned count)
+{
+ int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
+ int i, irq, oirq = 0xBEEF;
+ u64 ots = 0xDEAD, ts;
+
+ /*
+ * Fill the circular buffer by using the dedicated function.
+ */
+ for (i = 0; i < count; i++) {
+ pr_debug("%d: index=%d, ts=%llX irq=%X\n",
+ i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
+
+ irq_timings_push(ots + i, oirq + i);
+ }
+
+ /*
+ * Compute the first elements values after the index wrapped
+ * up or not.
+ */
+ ots += start;
+ oirq += start;
+
+ /*
+ * Test the circular buffer count is correct.
+ */
+ pr_debug("---> Checking timings array count (%d) is right\n", count);
+ if (WARN_ON(irqts->count != count))
+ return -EINVAL;
+
+ /*
+ * Test the macro allowing to browse all the irqts.
+ */
+ pr_debug("---> Checking the for_each_irqts() macro\n");
+ for_each_irqts(i, irqts) {
+
+ irq = irq_timing_decode(irqts->values[i], &ts);
+
+ pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
+ i, ts, ots, irq, oirq);
+
+ if (WARN_ON(ts != ots || irq != oirq))
+ return -EINVAL;
+
+ ots++; oirq++;
+ }
+
+ /*
+ * The circular buffer should have be flushed when browsed
+ * with for_each_irqts
+ */
+ pr_debug("---> Checking timings array is empty after browsing it\n");
+ if (WARN_ON(irqts->count))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init irq_timings_irqts_selftest(void)
+{
+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+ int i, ret;
+
+ /*
+ * Test the circular buffer with different number of
+ * elements. The purpose is to test at the limits (empty, half
+ * full, full, wrapped with the cursor at the boundaries,
+ * wrapped several times, etc ...
+ */
+ int count[] = { 0,
+ IRQ_TIMINGS_SIZE >> 1,
+ IRQ_TIMINGS_SIZE,
+ IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
+ 2 * IRQ_TIMINGS_SIZE,
+ (2 * IRQ_TIMINGS_SIZE) + 3,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(count); i++) {
+
+ pr_info("---> Checking the timings with %d/%d values\n",
+ count[i], IRQ_TIMINGS_SIZE);
+
+ ret = irq_timings_test_irqts(irqts, count[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_selftest(void)
+{
+ int ret;
+
+ pr_info("------------------- selftest start -----------------\n");
+
+ /*
+ * At this point, we don't except any subsystem to use the irq
+ * timings but us, so it should not be enabled.
+ */
+ if (static_branch_unlikely(&irq_timing_enabled)) {
+ pr_warn("irq timings already initialized, skipping selftest\n");
+ return 0;
+ }
+
+ ret = irq_timings_irqts_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_irqs_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_next_index_selftest();
+out:
+ pr_info("---------- selftest end with %s -----------\n",
+ ret ? "failure" : "success");
+
+ return ret;
+}
+early_initcall(irq_timings_selftest);
+#endif
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 0bfa10f4410c..df3008419a1d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -37,12 +37,26 @@ static int jump_label_cmp(const void *a, const void *b)
const struct jump_entry *jea = a;
const struct jump_entry *jeb = b;
+ /*
+ * Entrires are sorted by key.
+ */
if (jump_entry_key(jea) < jump_entry_key(jeb))
return -1;
if (jump_entry_key(jea) > jump_entry_key(jeb))
return 1;
+ /*
+ * In the batching mode, entries should also be sorted by the code
+ * inside the already sorted list of entries, enabling a bsearch in
+ * the vector.
+ */
+ if (jump_entry_code(jea) < jump_entry_code(jeb))
+ return -1;
+
+ if (jump_entry_code(jea) > jump_entry_code(jeb))
+ return 1;
+
return 0;
}
@@ -384,25 +398,55 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
return enabled ^ branch;
}
+static bool jump_label_can_update(struct jump_entry *entry, bool init)
+{
+ /*
+ * Cannot update code that was in an init text area.
+ */
+ if (!init && jump_entry_is_init(entry))
+ return false;
+
+ if (!kernel_text_address(jump_entry_code(entry))) {
+ WARN_ONCE(1, "can't patch jump_label at %pS", (void *)jump_entry_code(entry));
+ return false;
+ }
+
+ return true;
+}
+
+#ifndef HAVE_JUMP_LABEL_BATCH
static void __jump_label_update(struct static_key *key,
struct jump_entry *entry,
struct jump_entry *stop,
bool init)
{
for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
- /*
- * An entry->code of 0 indicates an entry which has been
- * disabled because it was in an init text area.
- */
- if (init || !jump_entry_is_init(entry)) {
- if (kernel_text_address(jump_entry_code(entry)))
- arch_jump_label_transform(entry, jump_label_type(entry));
- else
- WARN_ONCE(1, "can't patch jump_label at %pS",
- (void *)jump_entry_code(entry));
+ if (jump_label_can_update(entry, init))
+ arch_jump_label_transform(entry, jump_label_type(entry));
+ }
+}
+#else
+static void __jump_label_update(struct static_key *key,
+ struct jump_entry *entry,
+ struct jump_entry *stop,
+ bool init)
+{
+ for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
+
+ if (!jump_label_can_update(entry, init))
+ continue;
+
+ if (!arch_jump_label_transform_queue(entry, jump_label_type(entry))) {
+ /*
+ * Queue is full: Apply the current queue and try again.
+ */
+ arch_jump_label_transform_apply();
+ BUG_ON(!arch_jump_label_transform_queue(entry, jump_label_type(entry)));
}
}
+ arch_jump_label_transform_apply();
}
+#endif
void __init jump_label_init(void)
{
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index ef7b951a8087..b8cc032d5620 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -196,9 +196,6 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
return ret;
image->kernel_buf_len = size;
- /* IMA needs to pass the measurement list to the next kernel. */
- ima_add_kexec_buffer(image);
-
/* Call arch image probe handlers */
ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
image->kernel_buf_len);
@@ -239,8 +236,14 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
ret = -EINVAL;
goto out;
}
+
+ ima_kexec_cmdline(image->cmdline_buf,
+ image->cmdline_buf_len - 1);
}
+ /* IMA needs to pass the measurement list to the next kernel. */
+ ima_add_kexec_buffer(image);
+
/* Call arch image load handlers */
ldata = arch_kexec_kernel_image_load(image);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 6fe2f333aecb..45452facff3b 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
# and is generally not a function of system call inputs.
KCOV_INSTRUMENT := n
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
+obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
index 46b71af8eef2..8c7e7d25f09c 100644
--- a/kernel/locking/lock_events.h
+++ b/kernel/locking/lock_events.h
@@ -31,50 +31,13 @@ enum lock_events {
DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
/*
- * The purpose of the lock event counting subsystem is to provide a low
- * overhead way to record the number of specific locking events by using
- * percpu counters. It is the percpu sum that matters, not specifically
- * how many of them happens in each cpu.
- *
- * It is possible that the same percpu counter may be modified in both
- * the process and interrupt contexts. For architectures that perform
- * percpu operation with multiple instructions, it is possible to lose
- * count if a process context percpu update is interrupted in the middle
- * and the same counter is updated in the interrupt context. Therefore,
- * the generated percpu sum may not be precise. The error, if any, should
- * be small and insignificant.
- *
- * For those architectures that do multi-instruction percpu operation,
- * preemption in the middle and moving the task to another cpu may cause
- * a larger error in the count. Again, this will be few and far between.
- * Given the imprecise nature of the count and the possibility of resetting
- * the count and doing the measurement again, this is not really a big
- * problem.
- *
- * To get a better picture of what is happening under the hood, it is
- * suggested that a few measurements should be taken with the counts
- * reset in between to stamp out outliner because of these possible
- * error conditions.
- *
- * To minimize overhead, we use __this_cpu_*() in all cases except when
- * CONFIG_DEBUG_PREEMPT is defined. In this particular case, this_cpu_*()
- * will be used to avoid the appearance of unwanted BUG messages.
- */
-#ifdef CONFIG_DEBUG_PREEMPT
-#define lockevent_percpu_inc(x) this_cpu_inc(x)
-#define lockevent_percpu_add(x, v) this_cpu_add(x, v)
-#else
-#define lockevent_percpu_inc(x) __this_cpu_inc(x)
-#define lockevent_percpu_add(x, v) __this_cpu_add(x, v)
-#endif
-
-/*
- * Increment the PV qspinlock statistical counters
+ * Increment the statistical counters. use raw_cpu_inc() because of lower
+ * overhead and we don't care if we loose the occasional update.
*/
static inline void __lockevent_inc(enum lock_events event, bool cond)
{
if (cond)
- lockevent_percpu_inc(lockevents[event]);
+ raw_cpu_inc(lockevents[event]);
}
#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
@@ -82,7 +45,7 @@ static inline void __lockevent_inc(enum lock_events event, bool cond)
static inline void __lockevent_add(enum lock_events event, int inc)
{
- lockevent_percpu_add(lockevents[event], inc);
+ raw_cpu_add(lockevents[event], inc);
}
#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index ad7668cfc9da..239039d0ce21 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -56,12 +56,16 @@ LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
-LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
-LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
+LOCK_EVENT(rwsem_opt_rlock) /* # of opt-acquired read locks */
+LOCK_EVENT(rwsem_opt_wlock) /* # of opt-acquired write locks */
+LOCK_EVENT(rwsem_opt_fail) /* # of failed optspins */
+LOCK_EVENT(rwsem_opt_nospin) /* # of disabled optspins */
+LOCK_EVENT(rwsem_opt_norspin) /* # of disabled reader-only optspins */
+LOCK_EVENT(rwsem_opt_rlock2) /* # of opt-acquired 2ndary read locks */
LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
-LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
+LOCK_EVENT(rwsem_rlock_handoff) /* # of read lock handoffs */
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
-LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
+LOCK_EVENT(rwsem_wlock_handoff) /* # of write lock handoffs */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c47788fa85f9..341f52117f88 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -151,17 +151,28 @@ unsigned long nr_lock_classes;
static
#endif
struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+static DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
static inline struct lock_class *hlock_class(struct held_lock *hlock)
{
- if (!hlock->class_idx) {
+ unsigned int class_idx = hlock->class_idx;
+
+ /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfield */
+ barrier();
+
+ if (!test_bit(class_idx, lock_classes_in_use)) {
/*
* Someone passed in garbage, we give up.
*/
DEBUG_LOCKS_WARN_ON(1);
return NULL;
}
- return lock_classes + hlock->class_idx - 1;
+
+ /*
+ * At this point, if the passed hlock->class_idx is still garbage,
+ * we just have to live with it
+ */
+ return lock_classes + class_idx;
}
#ifdef CONFIG_LOCK_STAT
@@ -359,6 +370,13 @@ static inline u64 iterate_chain_key(u64 key, u32 idx)
return k0 | (u64)k1 << 32;
}
+void lockdep_init_task(struct task_struct *task)
+{
+ task->lockdep_depth = 0; /* no locks held yet */
+ task->curr_chain_key = INITIAL_CHAIN_KEY;
+ task->lockdep_recursion = 0;
+}
+
void lockdep_off(void)
{
current->lockdep_recursion++;
@@ -419,13 +437,6 @@ static int verbose(struct lock_class *class)
return 0;
}
-/*
- * Stack-trace: tightly packed array of stack backtrace
- * addresses. Protected by the graph_lock.
- */
-unsigned long nr_stack_trace_entries;
-static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
-
static void print_lockdep_off(const char *bug_msg)
{
printk(KERN_DEBUG "%s\n", bug_msg);
@@ -435,6 +446,15 @@ static void print_lockdep_off(const char *bug_msg)
#endif
}
+unsigned long nr_stack_trace_entries;
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the graph_lock.
+ */
+static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+
static int save_trace(struct lock_trace *trace)
{
unsigned long *entries = stack_trace + nr_stack_trace_entries;
@@ -457,6 +477,7 @@ static int save_trace(struct lock_trace *trace)
return 1;
}
+#endif
unsigned int nr_hardirq_chains;
unsigned int nr_softirq_chains;
@@ -470,6 +491,7 @@ unsigned int max_lockdep_depth;
DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
#endif
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* Locking printouts:
*/
@@ -487,6 +509,7 @@ static const char *usage_str[] =
#undef LOCKDEP_STATE
[LOCK_USED] = "INITIAL USE",
};
+#endif
const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
{
@@ -500,15 +523,26 @@ static inline unsigned long lock_flag(enum lock_usage_bit bit)
static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
{
+ /*
+ * The usage character defaults to '.' (i.e., irqs disabled and not in
+ * irq context), which is the safest usage category.
+ */
char c = '.';
- if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
+ /*
+ * The order of the following usage checks matters, which will
+ * result in the outcome character as follows:
+ *
+ * - '+': irq is enabled and not in irq context
+ * - '-': in irq context and irq is disabled
+ * - '?': in irq context and irq is enabled
+ */
+ if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) {
c = '+';
- if (class->usage_mask & lock_flag(bit)) {
- c = '-';
- if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
+ if (class->usage_mask & lock_flag(bit))
c = '?';
- }
+ } else if (class->usage_mask & lock_flag(bit))
+ c = '-';
return c;
}
@@ -572,19 +606,22 @@ static void print_lock(struct held_lock *hlock)
/*
* We can be called locklessly through debug_show_all_locks() so be
* extra careful, the hlock might have been released and cleared.
+ *
+ * If this indeed happens, lets pretend it does not hurt to continue
+ * to print the lock unless the hlock class_idx does not point to a
+ * registered class. The rationale here is: since we don't attempt
+ * to distinguish whether we are in this situation, if it just
+ * happened we can't count on class_idx to tell either.
*/
- unsigned int class_idx = hlock->class_idx;
-
- /* Don't re-read hlock->class_idx, can't use READ_ONCE() on bitfields: */
- barrier();
+ struct lock_class *lock = hlock_class(hlock);
- if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) {
+ if (!lock) {
printk(KERN_CONT "<RELEASED>\n");
return;
}
printk(KERN_CONT "%p", hlock->instance);
- print_lock_name(lock_classes + class_idx - 1);
+ print_lock_name(lock);
printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
}
@@ -732,7 +769,8 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
* Huh! same key, different name? Did someone trample
* on some memory? We're most confused.
*/
- WARN_ON_ONCE(class->name != lock->name);
+ WARN_ON_ONCE(class->name != lock->name &&
+ lock->key != &__lockdep_no_validate__);
return class;
}
}
@@ -838,11 +876,11 @@ static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
static bool check_lock_chain_key(struct lock_chain *chain)
{
#ifdef CONFIG_PROVE_LOCKING
- u64 chain_key = 0;
+ u64 chain_key = INITIAL_CHAIN_KEY;
int i;
for (i = chain->base; i < chain->base + chain->depth; i++)
- chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+ chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
/*
* The 'unsigned long long' casts avoid that a compiler warning
* is reported when building tools/lib/lockdep.
@@ -1117,6 +1155,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
return NULL;
}
nr_lock_classes++;
+ __set_bit(class - lock_classes, lock_classes_in_use);
debug_atomic_inc(nr_unused_locks);
class->key = key;
class->name = lock->name;
@@ -1228,13 +1267,17 @@ static int add_lock_to_list(struct lock_class *this,
#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1)
/*
- * The circular_queue and helpers is used to implement the
- * breadth-first search(BFS)algorithem, by which we can build
- * the shortest path from the next lock to be acquired to the
- * previous held lock if there is a circular between them.
+ * The circular_queue and helpers are used to implement graph
+ * breadth-first search (BFS) algorithm, by which we can determine
+ * whether there is a path from a lock to another. In deadlock checks,
+ * a path from the next lock to be acquired to a previous held lock
+ * indicates that adding the <prev> -> <next> lock dependency will
+ * produce a circle in the graph. Breadth-first search instead of
+ * depth-first search is used in order to find the shortest (circular)
+ * path.
*/
struct circular_queue {
- unsigned long element[MAX_CIRCULAR_QUEUE_SIZE];
+ struct lock_list *element[MAX_CIRCULAR_QUEUE_SIZE];
unsigned int front, rear;
};
@@ -1260,7 +1303,7 @@ static inline int __cq_full(struct circular_queue *cq)
return ((cq->rear + 1) & CQ_MASK) == cq->front;
}
-static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
+static inline int __cq_enqueue(struct circular_queue *cq, struct lock_list *elem)
{
if (__cq_full(cq))
return -1;
@@ -1270,14 +1313,21 @@ static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
return 0;
}
-static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
+/*
+ * Dequeue an element from the circular_queue, return a lock_list if
+ * the queue is not empty, or NULL if otherwise.
+ */
+static inline struct lock_list * __cq_dequeue(struct circular_queue *cq)
{
+ struct lock_list * lock;
+
if (__cq_empty(cq))
- return -1;
+ return NULL;
- *elem = cq->element[cq->front];
+ lock = cq->element[cq->front];
cq->front = (cq->front + 1) & CQ_MASK;
- return 0;
+
+ return lock;
}
static inline unsigned int __cq_get_elem_count(struct circular_queue *cq)
@@ -1322,13 +1372,32 @@ static inline int get_lock_depth(struct lock_list *child)
return depth;
}
+/*
+ * Return the forward or backward dependency list.
+ *
+ * @lock: the lock_list to get its class's dependency list
+ * @offset: the offset to struct lock_class to determine whether it is
+ * locks_after or locks_before
+ */
+static inline struct list_head *get_dep_list(struct lock_list *lock, int offset)
+{
+ void *lock_class = lock->class;
+
+ return lock_class + offset;
+}
+
+/*
+ * Forward- or backward-dependency search, used for both circular dependency
+ * checking and hardirq-unsafe/softirq-unsafe checking.
+ */
static int __bfs(struct lock_list *source_entry,
void *data,
int (*match)(struct lock_list *entry, void *data),
struct lock_list **target_entry,
- int forward)
+ int offset)
{
struct lock_list *entry;
+ struct lock_list *lock;
struct list_head *head;
struct circular_queue *cq = &lock_cq;
int ret = 1;
@@ -1339,31 +1408,21 @@ static int __bfs(struct lock_list *source_entry,
goto exit;
}
- if (forward)
- head = &source_entry->class->locks_after;
- else
- head = &source_entry->class->locks_before;
-
+ head = get_dep_list(source_entry, offset);
if (list_empty(head))
goto exit;
__cq_init(cq);
- __cq_enqueue(cq, (unsigned long)source_entry);
+ __cq_enqueue(cq, source_entry);
- while (!__cq_empty(cq)) {
- struct lock_list *lock;
-
- __cq_dequeue(cq, (unsigned long *)&lock);
+ while ((lock = __cq_dequeue(cq))) {
if (!lock->class) {
ret = -2;
goto exit;
}
- if (forward)
- head = &lock->class->locks_after;
- else
- head = &lock->class->locks_before;
+ head = get_dep_list(lock, offset);
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -1377,7 +1436,7 @@ static int __bfs(struct lock_list *source_entry,
goto exit;
}
- if (__cq_enqueue(cq, (unsigned long)entry)) {
+ if (__cq_enqueue(cq, entry)) {
ret = -1;
goto exit;
}
@@ -1396,7 +1455,8 @@ static inline int __bfs_forwards(struct lock_list *src_entry,
int (*match)(struct lock_list *entry, void *data),
struct lock_list **target_entry)
{
- return __bfs(src_entry, data, match, target_entry, 1);
+ return __bfs(src_entry, data, match, target_entry,
+ offsetof(struct lock_class, locks_after));
}
@@ -1405,16 +1465,11 @@ static inline int __bfs_backwards(struct lock_list *src_entry,
int (*match)(struct lock_list *entry, void *data),
struct lock_list **target_entry)
{
- return __bfs(src_entry, data, match, target_entry, 0);
+ return __bfs(src_entry, data, match, target_entry,
+ offsetof(struct lock_class, locks_before));
}
-/*
- * Recursive, forwards-direction lock-dependency checking, used for
- * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
- * checking.
- */
-
static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
{
unsigned long *entries = stack_trace + trace->offset;
@@ -1426,16 +1481,15 @@ static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
* Print a dependency chain entry (this is only done when a deadlock
* has been detected):
*/
-static noinline int
+static noinline void
print_circular_bug_entry(struct lock_list *target, int depth)
{
if (debug_locks_silent)
- return 0;
+ return;
printk("\n-> #%u", depth);
print_lock_name(target->class);
printk(KERN_CONT ":\n");
print_lock_trace(&target->trace, 6);
- return 0;
}
static void
@@ -1492,7 +1546,7 @@ print_circular_lock_scenario(struct held_lock *src,
* When a circular dependency is detected, print the
* header first:
*/
-static noinline int
+static noinline void
print_circular_bug_header(struct lock_list *entry, unsigned int depth,
struct held_lock *check_src,
struct held_lock *check_tgt)
@@ -1500,7 +1554,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
struct task_struct *curr = current;
if (debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("======================================================\n");
@@ -1518,8 +1572,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
print_circular_bug_entry(entry, depth);
-
- return 0;
}
static inline int class_equal(struct lock_list *entry, void *data)
@@ -1527,10 +1579,10 @@ static inline int class_equal(struct lock_list *entry, void *data)
return entry->class == data;
}
-static noinline int print_circular_bug(struct lock_list *this,
- struct lock_list *target,
- struct held_lock *check_src,
- struct held_lock *check_tgt)
+static noinline void print_circular_bug(struct lock_list *this,
+ struct lock_list *target,
+ struct held_lock *check_src,
+ struct held_lock *check_tgt)
{
struct task_struct *curr = current;
struct lock_list *parent;
@@ -1538,10 +1590,10 @@ static noinline int print_circular_bug(struct lock_list *this,
int depth;
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
- return 0;
+ return;
if (!save_trace(&this->trace))
- return 0;
+ return;
depth = get_lock_depth(target);
@@ -1563,21 +1615,17 @@ static noinline int print_circular_bug(struct lock_list *this,
printk("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
-static noinline int print_bfs_bug(int ret)
+static noinline void print_bfs_bug(int ret)
{
if (!debug_locks_off_graph_unlock())
- return 0;
+ return;
/*
* Breadth-first-search failed, graph got corrupted?
*/
WARN(1, "lockdep bfs error:%d\n", ret);
-
- return 0;
}
static int noop_count(struct lock_list *entry, void *data)
@@ -1640,36 +1688,95 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
}
/*
- * Prove that the dependency graph starting at <entry> can not
- * lead to <target>. Print an error and return 0 if it does.
+ * Check that the dependency graph starting at <src> can lead to
+ * <target> or not. Print an error and return 0 if it does.
*/
static noinline int
-check_noncircular(struct lock_list *root, struct lock_class *target,
- struct lock_list **target_entry)
+check_path(struct lock_class *target, struct lock_list *src_entry,
+ struct lock_list **target_entry)
{
- int result;
+ int ret;
+
+ ret = __bfs_forwards(src_entry, (void *)target, class_equal,
+ target_entry);
+
+ if (unlikely(ret < 0))
+ print_bfs_bug(ret);
+
+ return ret;
+}
+
+/*
+ * Prove that the dependency graph starting at <src> can not
+ * lead to <target>. If it can, there is a circle when adding
+ * <target> -> <src> dependency.
+ *
+ * Print an error and return 0 if it does.
+ */
+static noinline int
+check_noncircular(struct held_lock *src, struct held_lock *target,
+ struct lock_trace *trace)
+{
+ int ret;
+ struct lock_list *uninitialized_var(target_entry);
+ struct lock_list src_entry = {
+ .class = hlock_class(src),
+ .parent = NULL,
+ };
debug_atomic_inc(nr_cyclic_checks);
- result = __bfs_forwards(root, target, class_equal, target_entry);
+ ret = check_path(hlock_class(target), &src_entry, &target_entry);
- return result;
+ if (unlikely(!ret)) {
+ if (!trace->nr_entries) {
+ /*
+ * If save_trace fails here, the printing might
+ * trigger a WARN but because of the !nr_entries it
+ * should not do bad things.
+ */
+ save_trace(trace);
+ }
+
+ print_circular_bug(&src_entry, target_entry, src, target);
+ }
+
+ return ret;
}
+#ifdef CONFIG_LOCKDEP_SMALL
+/*
+ * Check that the dependency graph starting at <src> can lead to
+ * <target> or not. If it can, <src> -> <target> dependency is already
+ * in the graph.
+ *
+ * Print an error and return 2 if it does or 1 if it does not.
+ */
static noinline int
-check_redundant(struct lock_list *root, struct lock_class *target,
- struct lock_list **target_entry)
+check_redundant(struct held_lock *src, struct held_lock *target)
{
- int result;
+ int ret;
+ struct lock_list *uninitialized_var(target_entry);
+ struct lock_list src_entry = {
+ .class = hlock_class(src),
+ .parent = NULL,
+ };
debug_atomic_inc(nr_redundant_checks);
- result = __bfs_forwards(root, target, class_equal, target_entry);
+ ret = check_path(hlock_class(target), &src_entry, &target_entry);
- return result;
+ if (!ret) {
+ debug_atomic_inc(nr_redundant);
+ ret = 2;
+ } else if (ret < 0)
+ ret = 0;
+
+ return ret;
}
+#endif
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+#ifdef CONFIG_TRACE_IRQFLAGS
static inline int usage_accumulate(struct lock_list *entry, void *mask)
{
@@ -1766,7 +1873,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
*/
static void __used
print_shortest_lock_dependencies(struct lock_list *leaf,
- struct lock_list *root)
+ struct lock_list *root)
{
struct lock_list *entry = leaf;
int depth;
@@ -1788,8 +1895,6 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
entry = get_lock_parent(entry);
depth--;
} while (entry && (depth >= 0));
-
- return;
}
static void
@@ -1848,7 +1953,7 @@ print_irq_lock_scenario(struct lock_list *safe_entry,
printk("\n *** DEADLOCK ***\n\n");
}
-static int
+static void
print_bad_irq_dependency(struct task_struct *curr,
struct lock_list *prev_root,
struct lock_list *next_root,
@@ -1861,7 +1966,7 @@ print_bad_irq_dependency(struct task_struct *curr,
const char *irqclass)
{
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("=====================================================\n");
@@ -1907,19 +2012,17 @@ print_bad_irq_dependency(struct task_struct *curr,
pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
if (!save_trace(&prev_root->trace))
- return 0;
+ return;
print_shortest_lock_dependencies(backwards_entry, prev_root);
pr_warn("\nthe dependencies between the lock to be acquired");
pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
if (!save_trace(&next_root->trace))
- return 0;
+ return;
print_shortest_lock_dependencies(forwards_entry, next_root);
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
static const char *state_names[] = {
@@ -2066,8 +2169,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
this.class = hlock_class(prev);
ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
- if (ret < 0)
- return print_bfs_bug(ret);
+ if (ret < 0) {
+ print_bfs_bug(ret);
+ return 0;
+ }
usage_mask &= LOCKF_USED_IN_IRQ_ALL;
if (!usage_mask)
@@ -2083,8 +2188,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
that.class = hlock_class(next);
ret = find_usage_forwards(&that, forward_mask, &target_entry1);
- if (ret < 0)
- return print_bfs_bug(ret);
+ if (ret < 0) {
+ print_bfs_bug(ret);
+ return 0;
+ }
if (ret == 1)
return ret;
@@ -2096,8 +2203,10 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
backward_mask = original_mask(target_entry1->class->usage_mask);
ret = find_usage_backwards(&this, backward_mask, &target_entry);
- if (ret < 0)
- return print_bfs_bug(ret);
+ if (ret < 0) {
+ print_bfs_bug(ret);
+ return 0;
+ }
if (DEBUG_LOCKS_WARN_ON(ret == 1))
return 1;
@@ -2111,11 +2220,13 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
if (DEBUG_LOCKS_WARN_ON(ret == -1))
return 1;
- return print_bad_irq_dependency(curr, &this, &that,
- target_entry, target_entry1,
- prev, next,
- backward_bit, forward_bit,
- state_name(backward_bit));
+ print_bad_irq_dependency(curr, &this, &that,
+ target_entry, target_entry1,
+ prev, next,
+ backward_bit, forward_bit,
+ state_name(backward_bit));
+
+ return 0;
}
static void inc_chains(void)
@@ -2143,11 +2254,10 @@ static inline void inc_chains(void)
nr_process_chains++;
}
-#endif
+#endif /* CONFIG_TRACE_IRQFLAGS */
static void
-print_deadlock_scenario(struct held_lock *nxt,
- struct held_lock *prv)
+print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
{
struct lock_class *next = hlock_class(nxt);
struct lock_class *prev = hlock_class(prv);
@@ -2165,12 +2275,12 @@ print_deadlock_scenario(struct held_lock *nxt,
printk(" May be due to missing lock nesting notation\n\n");
}
-static int
+static void
print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next)
{
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("============================================\n");
@@ -2189,8 +2299,6 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
/*
@@ -2202,8 +2310,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
* Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
*/
static int
-check_deadlock(struct task_struct *curr, struct held_lock *next,
- struct lockdep_map *next_instance, int read)
+check_deadlock(struct task_struct *curr, struct held_lock *next)
{
struct held_lock *prev;
struct held_lock *nest = NULL;
@@ -2222,7 +2329,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
* Allow read-after-read recursion of the same
* lock class (i.e. read_lock(lock)+read_lock(lock)):
*/
- if ((read == 2) && prev->read)
+ if ((next->read == 2) && prev->read)
return 2;
/*
@@ -2232,14 +2339,15 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
if (nest)
return 2;
- return print_deadlock_bug(curr, prev, next);
+ print_deadlock_bug(curr, prev, next);
+ return 0;
}
return 1;
}
/*
* There was a chain-cache miss, and we are about to add a new dependency
- * to a previous lock. We recursively validate the following rules:
+ * to a previous lock. We validate the following rules:
*
* - would the adding of the <prev> -> <next> dependency create a
* circular dependency in the graph? [== circular deadlock]
@@ -2263,9 +2371,7 @@ static int
check_prev_add(struct task_struct *curr, struct held_lock *prev,
struct held_lock *next, int distance, struct lock_trace *trace)
{
- struct lock_list *uninitialized_var(target_entry);
struct lock_list *entry;
- struct lock_list this;
int ret;
if (!hlock_class(prev)->key || !hlock_class(next)->key) {
@@ -2289,28 +2395,16 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
/*
* Prove that the new <prev> -> <next> dependency would not
* create a circular dependency in the graph. (We do this by
- * forward-recursing into the graph starting at <next>, and
- * checking whether we can reach <prev>.)
+ * a breadth-first search into the graph starting at <next>,
+ * and check whether we can reach <prev>.)
*
- * We are using global variables to control the recursion, to
- * keep the stackframe size of the recursive functions low:
+ * The search is limited by the size of the circular queue (i.e.,
+ * MAX_CIRCULAR_QUEUE_SIZE) which keeps track of a breadth of nodes
+ * in the graph whose neighbours are to be checked.
*/
- this.class = hlock_class(next);
- this.parent = NULL;
- ret = check_noncircular(&this, hlock_class(prev), &target_entry);
- if (unlikely(!ret)) {
- if (!trace->nr_entries) {
- /*
- * If save_trace fails here, the printing might
- * trigger a WARN but because of the !nr_entries it
- * should not do bad things.
- */
- save_trace(trace);
- }
- return print_circular_bug(&this, target_entry, next, prev);
- }
- else if (unlikely(ret < 0))
- return print_bfs_bug(ret);
+ ret = check_noncircular(next, prev, trace);
+ if (unlikely(ret <= 0))
+ return 0;
if (!check_irq_usage(curr, prev, next))
return 0;
@@ -2341,19 +2435,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
}
}
+#ifdef CONFIG_LOCKDEP_SMALL
/*
* Is the <prev> -> <next> link redundant?
*/
- this.class = hlock_class(prev);
- this.parent = NULL;
- ret = check_redundant(&this, hlock_class(next), &target_entry);
- if (!ret) {
- debug_atomic_inc(nr_redundant);
- return 2;
- }
- if (ret < 0)
- return print_bfs_bug(ret);
-
+ ret = check_redundant(prev, next);
+ if (ret != 1)
+ return ret;
+#endif
if (!trace->nr_entries && !save_trace(trace))
return 0;
@@ -2505,12 +2594,13 @@ static void
print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next)
{
struct held_lock *hlock;
- u64 chain_key = 0;
+ u64 chain_key = INITIAL_CHAIN_KEY;
int depth = curr->lockdep_depth;
- int i;
+ int i = get_first_held_lock(curr, hlock_next);
- printk("depth: %u\n", depth + 1);
- for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) {
+ printk("depth: %u (irq_context %u)\n", depth - i + 1,
+ hlock_next->irq_context);
+ for (; i < depth; i++) {
hlock = curr->held_locks + i;
chain_key = print_chain_key_iteration(hlock->class_idx, chain_key);
@@ -2524,13 +2614,13 @@ print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_ne
static void print_chain_keys_chain(struct lock_chain *chain)
{
int i;
- u64 chain_key = 0;
+ u64 chain_key = INITIAL_CHAIN_KEY;
int class_id;
printk("depth: %u\n", chain->depth);
for (i = 0; i < chain->depth; i++) {
class_id = chain_hlocks[chain->base + i];
- chain_key = print_chain_key_iteration(class_id + 1, chain_key);
+ chain_key = print_chain_key_iteration(class_id, chain_key);
print_lock_name(lock_classes + class_id);
printk("\n");
@@ -2581,7 +2671,7 @@ static int check_no_collision(struct task_struct *curr,
}
for (j = 0; j < chain->depth - 1; j++, i++) {
- id = curr->held_locks[i].class_idx - 1;
+ id = curr->held_locks[i].class_idx;
if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) {
print_collision(curr, hlock, chain);
@@ -2664,7 +2754,7 @@ static inline int add_chain_cache(struct task_struct *curr,
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = nr_chain_hlocks;
for (j = 0; j < chain->depth - 1; j++, i++) {
- int lock_id = curr->held_locks[i].class_idx - 1;
+ int lock_id = curr->held_locks[i].class_idx;
chain_hlocks[chain->base + j] = lock_id;
}
chain_hlocks[chain->base + j] = class - lock_classes;
@@ -2754,8 +2844,9 @@ cache_hit:
return 1;
}
-static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
- struct held_lock *hlock, int chain_head, u64 chain_key)
+static int validate_chain(struct task_struct *curr,
+ struct held_lock *hlock,
+ int chain_head, u64 chain_key)
{
/*
* Trylock needs to maintain the stack of held locks, but it
@@ -2776,12 +2867,18 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
* - is softirq-safe, if this lock is hardirq-unsafe
*
* And check whether the new lock's dependency graph
- * could lead back to the previous lock.
+ * could lead back to the previous lock:
*
- * any of these scenarios could lead to a deadlock. If
- * All validations
+ * - within the current held-lock stack
+ * - across our accumulated lock dependency records
+ *
+ * any of these scenarios could lead to a deadlock.
*/
- int ret = check_deadlock(curr, hlock, lock, hlock->read);
+ /*
+ * The simple case: does the current hold the same lock
+ * already?
+ */
+ int ret = check_deadlock(curr, hlock);
if (!ret)
return 0;
@@ -2812,16 +2909,12 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
}
#else
static inline int validate_chain(struct task_struct *curr,
- struct lockdep_map *lock, struct held_lock *hlock,
- int chain_head, u64 chain_key)
+ struct held_lock *hlock,
+ int chain_head, u64 chain_key)
{
return 1;
}
-
-static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
-{
-}
-#endif
+#endif /* CONFIG_PROVE_LOCKING */
/*
* We are building curr_chain_key incrementally, so double-check
@@ -2832,7 +2925,7 @@ static void check_chain_key(struct task_struct *curr)
#ifdef CONFIG_DEBUG_LOCKDEP
struct held_lock *hlock, *prev_hlock = NULL;
unsigned int i;
- u64 chain_key = 0;
+ u64 chain_key = INITIAL_CHAIN_KEY;
for (i = 0; i < curr->lockdep_depth; i++) {
hlock = curr->held_locks + i;
@@ -2848,15 +2941,17 @@ static void check_chain_key(struct task_struct *curr)
(unsigned long long)hlock->prev_chain_key);
return;
}
+
/*
- * Whoops ran out of static storage again?
+ * hlock->class_idx can't go beyond MAX_LOCKDEP_KEYS, but is
+ * it registered lock class index?
*/
- if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(!test_bit(hlock->class_idx, lock_classes_in_use)))
return;
if (prev_hlock && (prev_hlock->irq_context !=
hlock->irq_context))
- chain_key = 0;
+ chain_key = INITIAL_CHAIN_KEY;
chain_key = iterate_chain_key(chain_key, hlock->class_idx);
prev_hlock = hlock;
}
@@ -2874,14 +2969,11 @@ static void check_chain_key(struct task_struct *curr)
#endif
}
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
-
-
-static void
-print_usage_bug_scenario(struct held_lock *lock)
+static void print_usage_bug_scenario(struct held_lock *lock)
{
struct lock_class *class = hlock_class(lock);
@@ -2898,12 +2990,12 @@ print_usage_bug_scenario(struct held_lock *lock)
printk("\n *** DEADLOCK ***\n\n");
}
-static int
+static void
print_usage_bug(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
{
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("================================\n");
@@ -2933,8 +3025,6 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
/*
@@ -2944,8 +3034,10 @@ static inline int
valid_state(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
{
- if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
- return print_usage_bug(curr, this, bad_bit, new_bit);
+ if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) {
+ print_usage_bug(curr, this, bad_bit, new_bit);
+ return 0;
+ }
return 1;
}
@@ -2953,7 +3045,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
/*
* print irq inversion bug:
*/
-static int
+static void
print_irq_inversion_bug(struct task_struct *curr,
struct lock_list *root, struct lock_list *other,
struct held_lock *this, int forwards,
@@ -2964,7 +3056,7 @@ print_irq_inversion_bug(struct task_struct *curr,
int depth;
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("========================================================\n");
@@ -3005,13 +3097,11 @@ print_irq_inversion_bug(struct task_struct *curr,
pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
if (!save_trace(&root->trace))
- return 0;
+ return;
print_shortest_lock_dependencies(other, root);
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
/*
@@ -3029,13 +3119,16 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
- if (ret < 0)
- return print_bfs_bug(ret);
+ if (ret < 0) {
+ print_bfs_bug(ret);
+ return 0;
+ }
if (ret == 1)
return ret;
- return print_irq_inversion_bug(curr, &root, target_entry,
- this, 1, irqclass);
+ print_irq_inversion_bug(curr, &root, target_entry,
+ this, 1, irqclass);
+ return 0;
}
/*
@@ -3053,13 +3146,16 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
- if (ret < 0)
- return print_bfs_bug(ret);
+ if (ret < 0) {
+ print_bfs_bug(ret);
+ return 0;
+ }
if (ret == 1)
return ret;
- return print_irq_inversion_bug(curr, &root, target_entry,
- this, 0, irqclass);
+ print_irq_inversion_bug(curr, &root, target_entry,
+ this, 0, irqclass);
+ return 0;
}
void print_irqtrace_events(struct task_struct *curr)
@@ -3142,7 +3238,7 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this,
* Validate that the lock dependencies don't have conflicting usage
* states.
*/
- if ((!read || !dir || STRICT_READ_CHECKS) &&
+ if ((!read || STRICT_READ_CHECKS) &&
!usage(curr, this, excl_bit, state_name(new_bit & ~LOCK_USAGE_READ_MASK)))
return 0;
@@ -3367,8 +3463,12 @@ void trace_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off);
}
-static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
+static int
+mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
{
+ if (!check)
+ goto lock_used;
+
/*
* If non-trylock use in a hardirq or softirq context, then
* mark the lock as used in these contexts:
@@ -3412,6 +3512,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
}
}
+lock_used:
+ /* mark it as used: */
+ if (!mark_lock(curr, hlock, LOCK_USED))
+ return 0;
+
return 1;
}
@@ -3443,35 +3548,6 @@ static int separate_irq_context(struct task_struct *curr,
return 0;
}
-#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
-
-static inline
-int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
- enum lock_usage_bit new_bit)
-{
- WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
- return 1;
-}
-
-static inline int mark_irqflags(struct task_struct *curr,
- struct held_lock *hlock)
-{
- return 1;
-}
-
-static inline unsigned int task_irq_context(struct task_struct *task)
-{
- return 0;
-}
-
-static inline int separate_irq_context(struct task_struct *curr,
- struct held_lock *hlock)
-{
- return 0;
-}
-
-#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
-
/*
* Mark a lock with a usage bit, and validate the state transition:
*/
@@ -3480,6 +3556,11 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
{
unsigned int new_mask = 1 << new_bit, ret = 1;
+ if (new_bit >= LOCK_USAGE_STATES) {
+ DEBUG_LOCKS_WARN_ON(1);
+ return 0;
+ }
+
/*
* If already set then do not dirty the cacheline,
* nor do any checks:
@@ -3503,25 +3584,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
switch (new_bit) {
-#define LOCKDEP_STATE(__STATE) \
- case LOCK_USED_IN_##__STATE: \
- case LOCK_USED_IN_##__STATE##_READ: \
- case LOCK_ENABLED_##__STATE: \
- case LOCK_ENABLED_##__STATE##_READ:
-#include "lockdep_states.h"
-#undef LOCKDEP_STATE
- ret = mark_lock_irq(curr, this, new_bit);
- if (!ret)
- return 0;
- break;
case LOCK_USED:
debug_atomic_dec(nr_unused_locks);
break;
default:
- if (!debug_locks_off_graph_unlock())
+ ret = mark_lock_irq(curr, this, new_bit);
+ if (!ret)
return 0;
- WARN_ON(1);
- return 0;
}
graph_unlock();
@@ -3539,6 +3608,27 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return ret;
}
+#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+
+static inline int
+mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
+{
+ return 1;
+}
+
+static inline unsigned int task_irq_context(struct task_struct *task)
+{
+ return 0;
+}
+
+static inline int separate_irq_context(struct task_struct *curr,
+ struct held_lock *hlock)
+{
+ return 0;
+}
+
+#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+
/*
* Initialize a lock instance's lock-class mapping info:
*/
@@ -3602,15 +3692,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
struct lock_class_key __lockdep_no_validate__;
EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
-static int
+static void
print_lock_nested_lock_not_held(struct task_struct *curr,
struct held_lock *hlock,
unsigned long ip)
{
if (!debug_locks_off())
- return 0;
+ return;
if (debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("==================================\n");
@@ -3632,8 +3722,6 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
static int __lock_is_held(const struct lockdep_map *lock, int read);
@@ -3698,24 +3786,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
return 0;
- class_idx = class - lock_classes + 1;
+ class_idx = class - lock_classes;
if (depth) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
- if (hlock->references) {
- /*
- * Check: unsigned int references:12, overflow.
- */
- if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
- return 0;
+ if (!references)
+ references++;
+ if (!hlock->references)
hlock->references++;
- } else {
- hlock->references = 2;
- }
- return 1;
+ hlock->references += references;
+
+ /* Overflow */
+ if (DEBUG_LOCKS_WARN_ON(hlock->references < references))
+ return 0;
+
+ return 2;
}
}
@@ -3742,11 +3830,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
#endif
hlock->pin_count = pin_count;
- if (check && !mark_irqflags(curr, hlock))
- return 0;
-
- /* mark it as used: */
- if (!mark_lock(curr, hlock, LOCK_USED))
+ /* Initialize the lock usage bit */
+ if (!mark_usage(curr, hlock, check))
return 0;
/*
@@ -3760,9 +3845,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* the hash, not class->key.
*/
/*
- * Whoops, we did it again.. ran straight out of our static allocation.
+ * Whoops, we did it again.. class_idx is invalid.
*/
- if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(!test_bit(class_idx, lock_classes_in_use)))
return 0;
chain_key = curr->curr_chain_key;
@@ -3770,27 +3855,29 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
/*
* How can we have a chain hash when we ain't got no keys?!
*/
- if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
+ if (DEBUG_LOCKS_WARN_ON(chain_key != INITIAL_CHAIN_KEY))
return 0;
chain_head = 1;
}
hlock->prev_chain_key = chain_key;
if (separate_irq_context(curr, hlock)) {
- chain_key = 0;
+ chain_key = INITIAL_CHAIN_KEY;
chain_head = 1;
}
chain_key = iterate_chain_key(chain_key, class_idx);
- if (nest_lock && !__lock_is_held(nest_lock, -1))
- return print_lock_nested_lock_not_held(curr, hlock, ip);
+ if (nest_lock && !__lock_is_held(nest_lock, -1)) {
+ print_lock_nested_lock_not_held(curr, hlock, ip);
+ return 0;
+ }
if (!debug_locks_silent) {
WARN_ON_ONCE(depth && !hlock_class(hlock - 1)->key);
WARN_ON_ONCE(!hlock_class(hlock)->key);
}
- if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
+ if (!validate_chain(curr, hlock, chain_head, chain_key))
return 0;
curr->curr_chain_key = chain_key;
@@ -3819,14 +3906,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
return 1;
}
-static int
-print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
- unsigned long ip)
+static void print_unlock_imbalance_bug(struct task_struct *curr,
+ struct lockdep_map *lock,
+ unsigned long ip)
{
if (!debug_locks_off())
- return 0;
+ return;
if (debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("=====================================\n");
@@ -3844,8 +3931,6 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
static int match_held_lock(const struct held_lock *hlock,
@@ -3877,7 +3962,7 @@ static int match_held_lock(const struct held_lock *hlock,
if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
return 0;
- if (hlock->class_idx == class - lock_classes + 1)
+ if (hlock->class_idx == class - lock_classes)
return 1;
}
@@ -3921,22 +4006,33 @@ out:
}
static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
- int idx)
+ int idx, unsigned int *merged)
{
struct held_lock *hlock;
+ int first_idx = idx;
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
- if (!__lock_acquire(hlock->instance,
+ switch (__lock_acquire(hlock->instance,
hlock_class(hlock)->subclass,
hlock->trylock,
hlock->read, hlock->check,
hlock->hardirqs_off,
hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count))
+ hlock->references, hlock->pin_count)) {
+ case 0:
return 1;
+ case 1:
+ break;
+ case 2:
+ *merged += (idx == first_idx);
+ break;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
}
return 0;
}
@@ -3947,9 +4043,9 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
unsigned long ip)
{
struct task_struct *curr = current;
+ unsigned int depth, merged = 0;
struct held_lock *hlock;
struct lock_class *class;
- unsigned int depth;
int i;
if (unlikely(!debug_locks))
@@ -3964,24 +4060,26 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
return 0;
hlock = find_held_lock(curr, lock, depth, &i);
- if (!hlock)
- return print_unlock_imbalance_bug(curr, lock, ip);
+ if (!hlock) {
+ print_unlock_imbalance_bug(curr, lock, ip);
+ return 0;
+ }
lockdep_init_map(lock, name, key, 0);
class = register_lock_class(lock, subclass, 0);
- hlock->class_idx = class - lock_classes + 1;
+ hlock->class_idx = class - lock_classes;
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
- if (reacquire_held_locks(curr, depth, i))
+ if (reacquire_held_locks(curr, depth, i, &merged))
return 0;
/*
* I took it apart and put it back together again, except now I have
* these 'spare' parts.. where shall I put them.
*/
- if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
+ if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged))
return 0;
return 1;
}
@@ -3989,8 +4087,8 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
+ unsigned int depth, merged = 0;
struct held_lock *hlock;
- unsigned int depth;
int i;
if (unlikely(!debug_locks))
@@ -4005,8 +4103,10 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
return 0;
hlock = find_held_lock(curr, lock, depth, &i);
- if (!hlock)
- return print_unlock_imbalance_bug(curr, lock, ip);
+ if (!hlock) {
+ print_unlock_imbalance_bug(curr, lock, ip);
+ return 0;
+ }
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
@@ -4015,7 +4115,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
hlock->read = 1;
hlock->acquire_ip = ip;
- if (reacquire_held_locks(curr, depth, i))
+ if (reacquire_held_locks(curr, depth, i, &merged))
+ return 0;
+
+ /* Merging can't happen with unchanged classes.. */
+ if (DEBUG_LOCKS_WARN_ON(merged))
return 0;
/*
@@ -4024,6 +4128,7 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
*/
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
return 0;
+
return 1;
}
@@ -4035,11 +4140,11 @@ static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
* @nested is an hysterical artifact, needs a tree wide cleanup.
*/
static int
-__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+__lock_release(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
+ unsigned int depth, merged = 1;
struct held_lock *hlock;
- unsigned int depth;
int i;
if (unlikely(!debug_locks))
@@ -4050,16 +4155,20 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
* So we're all set to release this lock.. wait what lock? We don't
* own any locks, you've been drinking again?
*/
- if (DEBUG_LOCKS_WARN_ON(depth <= 0))
- return print_unlock_imbalance_bug(curr, lock, ip);
+ if (depth <= 0) {
+ print_unlock_imbalance_bug(curr, lock, ip);
+ return 0;
+ }
/*
* Check whether the lock exists in the current stack
* of held locks:
*/
hlock = find_held_lock(curr, lock, depth, &i);
- if (!hlock)
- return print_unlock_imbalance_bug(curr, lock, ip);
+ if (!hlock) {
+ print_unlock_imbalance_bug(curr, lock, ip);
+ return 0;
+ }
if (hlock->instance == lock)
lock_release_holdtime(hlock);
@@ -4094,14 +4203,15 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
if (i == depth-1)
return 1;
- if (reacquire_held_locks(curr, depth, i + 1))
+ if (reacquire_held_locks(curr, depth, i + 1, &merged))
return 0;
/*
* We had N bottles of beer on the wall, we drank one, but now
* there's not N-1 bottles of beer left on the wall...
+ * Pouring two of the bottles together is acceptable.
*/
- DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1);
+ DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - merged);
/*
* Since reacquire_held_locks() would have called check_chain_key()
@@ -4319,7 +4429,7 @@ void lock_release(struct lockdep_map *lock, int nested,
check_flags(flags);
current->lockdep_recursion = 1;
trace_lock_release(lock, ip);
- if (__lock_release(lock, nested, ip))
+ if (__lock_release(lock, ip))
check_chain_key(current);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
@@ -4402,14 +4512,14 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
EXPORT_SYMBOL_GPL(lock_unpin_lock);
#ifdef CONFIG_LOCK_STAT
-static int
-print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
- unsigned long ip)
+static void print_lock_contention_bug(struct task_struct *curr,
+ struct lockdep_map *lock,
+ unsigned long ip)
{
if (!debug_locks_off())
- return 0;
+ return;
if (debug_locks_silent)
- return 0;
+ return;
pr_warn("\n");
pr_warn("=================================\n");
@@ -4427,8 +4537,6 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
pr_warn("\nstack backtrace:\n");
dump_stack();
-
- return 0;
}
static void
@@ -4573,9 +4681,7 @@ void lockdep_reset(void)
int i;
raw_local_irq_save(flags);
- current->curr_chain_key = 0;
- current->lockdep_depth = 0;
- current->lockdep_recursion = 0;
+ lockdep_init_task(current);
memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
nr_hardirq_chains = 0;
nr_softirq_chains = 0;
@@ -4615,9 +4721,9 @@ static void remove_class_from_lock_chain(struct pending_free *pf,
return;
recalc:
- chain_key = 0;
+ chain_key = INITIAL_CHAIN_KEY;
for (i = chain->base; i < chain->base + chain->depth; i++)
- chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+ chain_key = iterate_chain_key(chain_key, chain_hlocks[i]);
if (chain->depth && chain->chain_key == chain_key)
return;
/* Overwrite the chain key for concurrent RCU readers. */
@@ -4691,6 +4797,7 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
WRITE_ONCE(class->key, NULL);
WRITE_ONCE(class->name, NULL);
nr_lock_classes--;
+ __clear_bit(class - lock_classes, lock_classes_in_use);
} else {
WARN_ONCE(true, "%s() failed for class %s\n", __func__,
class->name);
@@ -5036,6 +5143,7 @@ void __init lockdep_init(void)
printk(" memory used by lock dependency info: %zu kB\n",
(sizeof(lock_classes) +
+ sizeof(lock_classes_in_use) +
sizeof(classhash_table) +
sizeof(list_entries) +
sizeof(list_entries_in_use) +
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 150ec3f0c5b5..cc83568d5012 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -131,7 +131,6 @@ extern unsigned int nr_hardirq_chains;
extern unsigned int nr_softirq_chains;
extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
-extern unsigned int max_recursion_depth;
extern unsigned int max_bfs_queue_depth;
@@ -160,25 +159,22 @@ lockdep_count_backward_deps(struct lock_class *class)
* and we want to avoid too much cache bouncing.
*/
struct lockdep_stats {
- int chain_lookup_hits;
- int chain_lookup_misses;
- int hardirqs_on_events;
- int hardirqs_off_events;
- int redundant_hardirqs_on;
- int redundant_hardirqs_off;
- int softirqs_on_events;
- int softirqs_off_events;
- int redundant_softirqs_on;
- int redundant_softirqs_off;
- int nr_unused_locks;
- int nr_redundant_checks;
- int nr_redundant;
- int nr_cyclic_checks;
- int nr_cyclic_check_recursions;
- int nr_find_usage_forwards_checks;
- int nr_find_usage_forwards_recursions;
- int nr_find_usage_backwards_checks;
- int nr_find_usage_backwards_recursions;
+ unsigned long chain_lookup_hits;
+ unsigned int chain_lookup_misses;
+ unsigned long hardirqs_on_events;
+ unsigned long hardirqs_off_events;
+ unsigned long redundant_hardirqs_on;
+ unsigned long redundant_hardirqs_off;
+ unsigned long softirqs_on_events;
+ unsigned long softirqs_off_events;
+ unsigned long redundant_softirqs_on;
+ unsigned long redundant_softirqs_off;
+ int nr_unused_locks;
+ unsigned int nr_redundant_checks;
+ unsigned int nr_redundant;
+ unsigned int nr_cyclic_checks;
+ unsigned int nr_find_usage_forwards_checks;
+ unsigned int nr_find_usage_backwards_checks;
/*
* Per lock class locking operation stat counts
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 80a463d31a8d..c513031cd7e3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -975,7 +975,7 @@ static int __init lock_torture_init(void)
goto unwind;
}
if (stutter > 0) {
- firsterr = torture_stutter_init(stutter);
+ firsterr = torture_stutter_init(stutter, stutter);
if (firsterr)
goto unwind;
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index b6a9cc62099a..364d38a0c444 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
- rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+ rcu_sync_init(&sem->rss);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
sem->readers_block = 0;
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
deleted file mode 100644
index 0b1f77957240..000000000000
--- a/kernel/locking/rwsem-xadd.c
+++ /dev/null
@@ -1,745 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* rwsem.c: R/W semaphores: contention handling functions
- *
- * Written by David Howells (dhowells@redhat.com).
- * Derived from arch/i386/kernel/semaphore.c
- *
- * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
- * and Michel Lespinasse <walken@google.com>
- *
- * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
- * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
- */
-#include <linux/rwsem.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/rt.h>
-#include <linux/sched/wake_q.h>
-#include <linux/sched/debug.h>
-#include <linux/osq_lock.h>
-
-#include "rwsem.h"
-
-/*
- * Guide to the rw_semaphore's count field for common values.
- * (32-bit case illustrated, similar for 64-bit)
- *
- * 0x0000000X (1) X readers active or attempting lock, no writer waiting
- * X = #active_readers + #readers attempting to lock
- * (X*ACTIVE_BIAS)
- *
- * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
- * attempting to read lock or write lock.
- *
- * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
- * X = #active readers + # readers attempting lock
- * (X*ACTIVE_BIAS + WAITING_BIAS)
- * (2) 1 writer attempting lock, no waiters for lock
- * X-1 = #active readers + #readers attempting lock
- * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
- * (3) 1 writer active, no waiters for lock
- * X-1 = #active readers + #readers attempting lock
- * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
- *
- * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
- * (WAITING_BIAS + ACTIVE_BIAS)
- * (2) 1 writer active or attempting lock, no waiters for lock
- * (ACTIVE_WRITE_BIAS)
- *
- * 0xffff0000 (1) There are writers or readers queued but none active
- * or in the process of attempting lock.
- * (WAITING_BIAS)
- * Note: writer can attempt to steal lock for this count by adding
- * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
- *
- * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
- * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
- *
- * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
- * the count becomes more than 0 for successful lock acquisition,
- * i.e. the case where there are only readers or nobody has lock.
- * (1st and 2nd case above).
- *
- * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
- * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
- * acquisition (i.e. nobody else has lock or attempts lock). If
- * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
- * are only waiters but none active (5th case above), and attempt to
- * steal the lock.
- *
- */
-
-/*
- * Initialize an rwsem:
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held semaphore:
- */
- debug_check_no_locks_freed((void *)sem, sizeof(*sem));
- lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
- atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
- raw_spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
- sem->owner = NULL;
- osq_lock_init(&sem->osq);
-#endif
-}
-
-EXPORT_SYMBOL(__init_rwsem);
-
-enum rwsem_waiter_type {
- RWSEM_WAITING_FOR_WRITE,
- RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
- struct list_head list;
- struct task_struct *task;
- enum rwsem_waiter_type type;
-};
-
-enum rwsem_wake_type {
- RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
- RWSEM_WAKE_READERS, /* Wake readers only */
- RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
-};
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here from up_xxxx(), then:
- * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
- * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- * - there must be someone on the queue
- * - the wait_lock must be held by the caller
- * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
- * to actually wakeup the blocked task(s) and drop the reference count,
- * preferably when the wait_lock is released
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only marked woken if downgrading is false
- */
-static void __rwsem_mark_wake(struct rw_semaphore *sem,
- enum rwsem_wake_type wake_type,
- struct wake_q_head *wake_q)
-{
- struct rwsem_waiter *waiter, *tmp;
- long oldcount, woken = 0, adjustment = 0;
- struct list_head wlist;
-
- /*
- * Take a peek at the queue head waiter such that we can determine
- * the wakeup(s) to perform.
- */
- waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
-
- if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
- if (wake_type == RWSEM_WAKE_ANY) {
- /*
- * Mark writer at the front of the queue for wakeup.
- * Until the task is actually later awoken later by
- * the caller, other writers are able to steal it.
- * Readers, on the other hand, will block as they
- * will notice the queued writer.
- */
- wake_q_add(wake_q, waiter->task);
- lockevent_inc(rwsem_wake_writer);
- }
-
- return;
- }
-
- /*
- * Writers might steal the lock before we grant it to the next reader.
- * We prefer to do the first reader grant before counting readers
- * so we can bail out early if a writer stole the lock.
- */
- if (wake_type != RWSEM_WAKE_READ_OWNED) {
- adjustment = RWSEM_ACTIVE_READ_BIAS;
- try_reader_grant:
- oldcount = atomic_long_fetch_add(adjustment, &sem->count);
- if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
- /*
- * If the count is still less than RWSEM_WAITING_BIAS
- * after removing the adjustment, it is assumed that
- * a writer has stolen the lock. We have to undo our
- * reader grant.
- */
- if (atomic_long_add_return(-adjustment, &sem->count) <
- RWSEM_WAITING_BIAS)
- return;
-
- /* Last active locker left. Retry waking readers. */
- goto try_reader_grant;
- }
- /*
- * Set it to reader-owned to give spinners an early
- * indication that readers now have the lock.
- */
- __rwsem_set_reader_owned(sem, waiter->task);
- }
-
- /*
- * Grant an infinite number of read locks to the readers at the front
- * of the queue. We know that woken will be at least 1 as we accounted
- * for above. Note we increment the 'active part' of the count by the
- * number of readers before waking any processes up.
- *
- * We have to do wakeup in 2 passes to prevent the possibility that
- * the reader count may be decremented before it is incremented. It
- * is because the to-be-woken waiter may not have slept yet. So it
- * may see waiter->task got cleared, finish its critical section and
- * do an unlock before the reader count increment.
- *
- * 1) Collect the read-waiters in a separate list, count them and
- * fully increment the reader count in rwsem.
- * 2) For each waiters in the new list, clear waiter->task and
- * put them into wake_q to be woken up later.
- */
- list_for_each_entry(waiter, &sem->wait_list, list) {
- if (waiter->type == RWSEM_WAITING_FOR_WRITE)
- break;
-
- woken++;
- }
- list_cut_before(&wlist, &sem->wait_list, &waiter->list);
-
- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
- lockevent_cond_inc(rwsem_wake_reader, woken);
- if (list_empty(&sem->wait_list)) {
- /* hit end of list above */
- adjustment -= RWSEM_WAITING_BIAS;
- }
-
- if (adjustment)
- atomic_long_add(adjustment, &sem->count);
-
- /* 2nd pass */
- list_for_each_entry_safe(waiter, tmp, &wlist, list) {
- struct task_struct *tsk;
-
- tsk = waiter->task;
- get_task_struct(tsk);
-
- /*
- * Ensure calling get_task_struct() before setting the reader
- * waiter to nil such that rwsem_down_read_failed() cannot
- * race with do_exit() by always holding a reference count
- * to the task to wakeup.
- */
- smp_store_release(&waiter->task, NULL);
- /*
- * Ensure issuing the wakeup (either by us or someone else)
- * after setting the reader waiter to nil.
- */
- wake_q_add_safe(wake_q, tsk);
- }
-}
-
-/*
- * This function must be called with the sem->wait_lock held to prevent
- * race conditions between checking the rwsem wait list and setting the
- * sem->count accordingly.
- */
-static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
-{
- /*
- * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
- */
- if (count != RWSEM_WAITING_BIAS)
- return false;
-
- /*
- * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
- * are other tasks on the wait list, we need to add on WAITING_BIAS.
- */
- count = list_is_singular(&sem->wait_list) ?
- RWSEM_ACTIVE_WRITE_BIAS :
- RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
-
- if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
- == RWSEM_WAITING_BIAS) {
- rwsem_set_owner(sem);
- return true;
- }
-
- return false;
-}
-
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-/*
- * Try to acquire write lock before the writer has been put on wait queue.
- */
-static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
-{
- long count = atomic_long_read(&sem->count);
-
- while (!count || count == RWSEM_WAITING_BIAS) {
- if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
- count + RWSEM_ACTIVE_WRITE_BIAS)) {
- rwsem_set_owner(sem);
- lockevent_inc(rwsem_opt_wlock);
- return true;
- }
- }
- return false;
-}
-
-static inline bool owner_on_cpu(struct task_struct *owner)
-{
- /*
- * As lock holder preemption issue, we both skip spinning if
- * task is not on cpu or its cpu is preempted
- */
- return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
-}
-
-static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
-{
- struct task_struct *owner;
- bool ret = true;
-
- BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN));
-
- if (need_resched())
- return false;
-
- rcu_read_lock();
- owner = READ_ONCE(sem->owner);
- if (owner) {
- ret = is_rwsem_owner_spinnable(owner) &&
- owner_on_cpu(owner);
- }
- rcu_read_unlock();
- return ret;
-}
-
-/*
- * Return true only if we can still spin on the owner field of the rwsem.
- */
-static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
-{
- struct task_struct *owner = READ_ONCE(sem->owner);
-
- if (!is_rwsem_owner_spinnable(owner))
- return false;
-
- rcu_read_lock();
- while (owner && (READ_ONCE(sem->owner) == owner)) {
- /*
- * Ensure we emit the owner->on_cpu, dereference _after_
- * checking sem->owner still matches owner, if that fails,
- * owner might point to free()d memory, if it still matches,
- * the rcu_read_lock() ensures the memory stays valid.
- */
- barrier();
-
- /*
- * abort spinning when need_resched or owner is not running or
- * owner's cpu is preempted.
- */
- if (need_resched() || !owner_on_cpu(owner)) {
- rcu_read_unlock();
- return false;
- }
-
- cpu_relax();
- }
- rcu_read_unlock();
-
- /*
- * If there is a new owner or the owner is not set, we continue
- * spinning.
- */
- return is_rwsem_owner_spinnable(READ_ONCE(sem->owner));
-}
-
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
-{
- bool taken = false;
-
- preempt_disable();
-
- /* sem->wait_lock should not be held when doing optimistic spinning */
- if (!rwsem_can_spin_on_owner(sem))
- goto done;
-
- if (!osq_lock(&sem->osq))
- goto done;
-
- /*
- * Optimistically spin on the owner field and attempt to acquire the
- * lock whenever the owner changes. Spinning will be stopped when:
- * 1) the owning writer isn't running; or
- * 2) readers own the lock as we can't determine if they are
- * actively running or not.
- */
- while (rwsem_spin_on_owner(sem)) {
- /*
- * Try to acquire the lock
- */
- if (rwsem_try_write_lock_unqueued(sem)) {
- taken = true;
- break;
- }
-
- /*
- * When there's no owner, we might have preempted between the
- * owner acquiring the lock and setting the owner field. If
- * we're an RT task that will live-lock because we won't let
- * the owner complete.
- */
- if (!sem->owner && (need_resched() || rt_task(current)))
- break;
-
- /*
- * The cpu_relax() call is a compiler barrier which forces
- * everything in this loop to be re-loaded. We don't need
- * memory barriers as we'll eventually observe the right
- * values at the cost of a few extra spins.
- */
- cpu_relax();
- }
- osq_unlock(&sem->osq);
-done:
- preempt_enable();
- lockevent_cond_inc(rwsem_opt_fail, !taken);
- return taken;
-}
-
-/*
- * Return true if the rwsem has active spinner
- */
-static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
-{
- return osq_is_locked(&sem->osq);
-}
-
-#else
-static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
-{
- return false;
-}
-
-static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
-{
- return false;
-}
-#endif
-
-/*
- * Wait for the read lock to be granted
- */
-static inline struct rw_semaphore __sched *
-__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
-{
- long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
- struct rwsem_waiter waiter;
- DEFINE_WAKE_Q(wake_q);
-
- waiter.task = current;
- waiter.type = RWSEM_WAITING_FOR_READ;
-
- raw_spin_lock_irq(&sem->wait_lock);
- if (list_empty(&sem->wait_list)) {
- /*
- * In case the wait queue is empty and the lock isn't owned
- * by a writer, this reader can exit the slowpath and return
- * immediately as its RWSEM_ACTIVE_READ_BIAS has already
- * been set in the count.
- */
- if (atomic_long_read(&sem->count) >= 0) {
- raw_spin_unlock_irq(&sem->wait_lock);
- rwsem_set_reader_owned(sem);
- lockevent_inc(rwsem_rlock_fast);
- return sem;
- }
- adjustment += RWSEM_WAITING_BIAS;
- }
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* we're now waiting on the lock, but no longer actively locking */
- count = atomic_long_add_return(adjustment, &sem->count);
-
- /*
- * If there are no active locks, wake the front queued process(es).
- *
- * If there are no writers and we are first in the queue,
- * wake our own waiter to join the existing active readers !
- */
- if (count == RWSEM_WAITING_BIAS ||
- (count > RWSEM_WAITING_BIAS &&
- adjustment != -RWSEM_ACTIVE_READ_BIAS))
- __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
- raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
-
- /* wait to be given the lock */
- while (true) {
- set_current_state(state);
- if (!waiter.task)
- break;
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- break;
- }
- schedule();
- lockevent_inc(rwsem_sleep_reader);
- }
-
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock);
- return sem;
-out_nolock:
- list_del(&waiter.list);
- if (list_empty(&sem->wait_list))
- atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
- raw_spin_unlock_irq(&sem->wait_lock);
- __set_current_state(TASK_RUNNING);
- lockevent_inc(rwsem_rlock_fail);
- return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed(struct rw_semaphore *sem)
-{
- return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed_killable(struct rw_semaphore *sem)
-{
- return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed_killable);
-
-/*
- * Wait until we successfully acquire the write lock
- */
-static inline struct rw_semaphore *
-__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
-{
- long count;
- bool waiting = true; /* any queued threads before us */
- struct rwsem_waiter waiter;
- struct rw_semaphore *ret = sem;
- DEFINE_WAKE_Q(wake_q);
-
- /* undo write bias from down_write operation, stop active locking */
- count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
-
- /* do optimistic spinning and steal lock if possible */
- if (rwsem_optimistic_spin(sem))
- return sem;
-
- /*
- * Optimistic spinning failed, proceed to the slowpath
- * and block until we can acquire the sem.
- */
- waiter.task = current;
- waiter.type = RWSEM_WAITING_FOR_WRITE;
-
- raw_spin_lock_irq(&sem->wait_lock);
-
- /* account for this before adding a new element to the list */
- if (list_empty(&sem->wait_list))
- waiting = false;
-
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* we're now waiting on the lock, but no longer actively locking */
- if (waiting) {
- count = atomic_long_read(&sem->count);
-
- /*
- * If there were already threads queued before us and there are
- * no active writers, the lock must be read owned; so we try to
- * wake any read locks that were queued ahead of us.
- */
- if (count > RWSEM_WAITING_BIAS) {
- __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
- /*
- * The wakeup is normally called _after_ the wait_lock
- * is released, but given that we are proactively waking
- * readers we can deal with the wake_q overhead as it is
- * similar to releasing and taking the wait_lock again
- * for attempting rwsem_try_write_lock().
- */
- wake_up_q(&wake_q);
-
- /*
- * Reinitialize wake_q after use.
- */
- wake_q_init(&wake_q);
- }
-
- } else
- count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
-
- /* wait until we successfully acquire the lock */
- set_current_state(state);
- while (true) {
- if (rwsem_try_write_lock(count, sem))
- break;
- raw_spin_unlock_irq(&sem->wait_lock);
-
- /* Block until there are no active lockers. */
- do {
- if (signal_pending_state(state, current))
- goto out_nolock;
-
- schedule();
- lockevent_inc(rwsem_sleep_writer);
- set_current_state(state);
- } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
-
- raw_spin_lock_irq(&sem->wait_lock);
- }
- __set_current_state(TASK_RUNNING);
- list_del(&waiter.list);
- raw_spin_unlock_irq(&sem->wait_lock);
- lockevent_inc(rwsem_wlock);
-
- return ret;
-
-out_nolock:
- __set_current_state(TASK_RUNNING);
- raw_spin_lock_irq(&sem->wait_lock);
- list_del(&waiter.list);
- if (list_empty(&sem->wait_list))
- atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
- else
- __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
- raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
- lockevent_inc(rwsem_wlock_fail);
-
- return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_write_failed(struct rw_semaphore *sem)
-{
- return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_write_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_write_failed_killable(struct rw_semaphore *sem)
-{
- return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_write_failed_killable);
-
-/*
- * handle waking up a waiter on the semaphore
- * - up_read/up_write has decremented the active part of count if we come here
- */
-__visible
-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
-{
- unsigned long flags;
- DEFINE_WAKE_Q(wake_q);
-
- /*
- * __rwsem_down_write_failed_common(sem)
- * rwsem_optimistic_spin(sem)
- * osq_unlock(sem->osq)
- * ...
- * atomic_long_add_return(&sem->count)
- *
- * - VS -
- *
- * __up_write()
- * if (atomic_long_sub_return_release(&sem->count) < 0)
- * rwsem_wake(sem)
- * osq_is_locked(&sem->osq)
- *
- * And __up_write() must observe !osq_is_locked() when it observes the
- * atomic_long_add_return() in order to not miss a wakeup.
- *
- * This boils down to:
- *
- * [S.rel] X = 1 [RmW] r0 = (Y += 0)
- * MB RMB
- * [RmW] Y += 1 [L] r1 = X
- *
- * exists (r0=1 /\ r1=0)
- */
- smp_rmb();
-
- /*
- * If a spinner is present, it is not necessary to do the wakeup.
- * Try to do wakeup only if the trylock succeeds to minimize
- * spinlock contention which may introduce too much delay in the
- * unlock operation.
- *
- * spinning writer up_write/up_read caller
- * --------------- -----------------------
- * [S] osq_unlock() [L] osq
- * MB RMB
- * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
- *
- * Here, it is important to make sure that there won't be a missed
- * wakeup while the rwsem is free and the only spinning writer goes
- * to sleep without taking the rwsem. Even when the spinning writer
- * is just going to break out of the waiting loop, it will still do
- * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
- * rwsem_has_spinner() is true, it will guarantee at least one
- * trylock attempt on the rwsem later on.
- */
- if (rwsem_has_spinner(sem)) {
- /*
- * The smp_rmb() here is to make sure that the spinner
- * state is consulted before reading the wait_lock.
- */
- smp_rmb();
- if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
- return sem;
- goto locked;
- }
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-locked:
-
- if (!list_empty(&sem->wait_list))
- __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- wake_up_q(&wake_q);
-
- return sem;
-}
-EXPORT_SYMBOL(rwsem_wake);
-
-/*
- * downgrade a write lock into a read lock
- * - caller incremented waiting part of count and discovered it still negative
- * - just wake up any readers at the front of the queue
- */
-__visible
-struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
-{
- unsigned long flags;
- DEFINE_WAKE_Q(wake_q);
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (!list_empty(&sem->wait_list))
- __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- wake_up_q(&wake_q);
-
- return sem;
-}
-EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index ccbf18f560ff..37524a47f002 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -3,17 +3,1438 @@
*
* Written by David Howells (dhowells@redhat.com).
* Derived from asm-i386/semaphore.h
+ *
+ * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
+ * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
+ *
+ * Rwsem count bit fields re-definition and rwsem rearchitecture by
+ * Waiman Long <longman@redhat.com> and
+ * Peter Zijlstra <peterz@infradead.org>.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/sched/task.h>
#include <linux/sched/debug.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include "rwsem.h"
+#include "lock_events.h"
+
+/*
+ * The least significant 3 bits of the owner value has the following
+ * meanings when set.
+ * - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
+ * - Bit 1: RWSEM_RD_NONSPINNABLE - Readers cannot spin on this lock.
+ * - Bit 2: RWSEM_WR_NONSPINNABLE - Writers cannot spin on this lock.
+ *
+ * When the rwsem is either owned by an anonymous writer, or it is
+ * reader-owned, but a spinning writer has timed out, both nonspinnable
+ * bits will be set to disable optimistic spinning by readers and writers.
+ * In the later case, the last unlocking reader should then check the
+ * writer nonspinnable bit and clear it only to give writers preference
+ * to acquire the lock via optimistic spinning, but not readers. Similar
+ * action is also done in the reader slowpath.
+
+ * When a writer acquires a rwsem, it puts its task_struct pointer
+ * into the owner field. It is cleared after an unlock.
+ *
+ * When a reader acquires a rwsem, it will also puts its task_struct
+ * pointer into the owner field with the RWSEM_READER_OWNED bit set.
+ * On unlock, the owner field will largely be left untouched. So
+ * for a free or reader-owned rwsem, the owner value may contain
+ * information about the last reader that acquires the rwsem.
+ *
+ * That information may be helpful in debugging cases where the system
+ * seems to hang on a reader owned rwsem especially if only one reader
+ * is involved. Ideally we would like to track all the readers that own
+ * a rwsem, but the overhead is simply too big.
+ *
+ * Reader optimistic spinning is helpful when the reader critical section
+ * is short and there aren't that many readers around. It makes readers
+ * relatively more preferred than writers. When a writer times out spinning
+ * on a reader-owned lock and set the nospinnable bits, there are two main
+ * reasons for that.
+ *
+ * 1) The reader critical section is long, perhaps the task sleeps after
+ * acquiring the read lock.
+ * 2) There are just too many readers contending the lock causing it to
+ * take a while to service all of them.
+ *
+ * In the former case, long reader critical section will impede the progress
+ * of writers which is usually more important for system performance. In
+ * the later case, reader optimistic spinning tends to make the reader
+ * groups that contain readers that acquire the lock together smaller
+ * leading to more of them. That may hurt performance in some cases. In
+ * other words, the setting of nonspinnable bits indicates that reader
+ * optimistic spinning may not be helpful for those workloads that cause
+ * it.
+ *
+ * Therefore, any writers that had observed the setting of the writer
+ * nonspinnable bit for a given rwsem after they fail to acquire the lock
+ * via optimistic spinning will set the reader nonspinnable bit once they
+ * acquire the write lock. Similarly, readers that observe the setting
+ * of reader nonspinnable bit at slowpath entry will set the reader
+ * nonspinnable bits when they acquire the read lock via the wakeup path.
+ *
+ * Once the reader nonspinnable bit is on, it will only be reset when
+ * a writer is able to acquire the rwsem in the fast path or somehow a
+ * reader or writer in the slowpath doesn't observe the nonspinable bit.
+ *
+ * This is to discourage reader optmistic spinning on that particular
+ * rwsem and make writers more preferred. This adaptive disabling of reader
+ * optimistic spinning will alleviate the negative side effect of this
+ * feature.
+ */
+#define RWSEM_READER_OWNED (1UL << 0)
+#define RWSEM_RD_NONSPINNABLE (1UL << 1)
+#define RWSEM_WR_NONSPINNABLE (1UL << 2)
+#define RWSEM_NONSPINNABLE (RWSEM_RD_NONSPINNABLE | RWSEM_WR_NONSPINNABLE)
+#define RWSEM_OWNER_FLAGS_MASK (RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
+
+#ifdef CONFIG_DEBUG_RWSEMS
+# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
+ if (!debug_locks_silent && \
+ WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+ #c, atomic_long_read(&(sem)->count), \
+ atomic_long_read(&(sem)->owner), (long)current, \
+ list_empty(&(sem)->wait_list) ? "" : "not ")) \
+ debug_locks_off(); \
+ } while (0)
+#else
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
+#endif
+
+/*
+ * On 64-bit architectures, the bit definitions of the count are:
+ *
+ * Bit 0 - writer locked bit
+ * Bit 1 - waiters present bit
+ * Bit 2 - lock handoff bit
+ * Bits 3-7 - reserved
+ * Bits 8-62 - 55-bit reader count
+ * Bit 63 - read fail bit
+ *
+ * On 32-bit architectures, the bit definitions of the count are:
+ *
+ * Bit 0 - writer locked bit
+ * Bit 1 - waiters present bit
+ * Bit 2 - lock handoff bit
+ * Bits 3-7 - reserved
+ * Bits 8-30 - 23-bit reader count
+ * Bit 31 - read fail bit
+ *
+ * It is not likely that the most significant bit (read fail bit) will ever
+ * be set. This guard bit is still checked anyway in the down_read() fastpath
+ * just in case we need to use up more of the reader bits for other purpose
+ * in the future.
+ *
+ * atomic_long_fetch_add() is used to obtain reader lock, whereas
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
+ *
+ * There are three places where the lock handoff bit may be set or cleared.
+ * 1) rwsem_mark_wake() for readers.
+ * 2) rwsem_try_write_lock() for writers.
+ * 3) Error path of rwsem_down_write_slowpath().
+ *
+ * For all the above cases, wait_lock will be held. A writer must also
+ * be the first one in the wait_list to be eligible for setting the handoff
+ * bit. So concurrent setting/clearing of handoff bit is not possible.
+ */
+#define RWSEM_WRITER_LOCKED (1UL << 0)
+#define RWSEM_FLAG_WAITERS (1UL << 1)
+#define RWSEM_FLAG_HANDOFF (1UL << 2)
+#define RWSEM_FLAG_READFAIL (1UL << (BITS_PER_LONG - 1))
+
+#define RWSEM_READER_SHIFT 8
+#define RWSEM_READER_BIAS (1UL << RWSEM_READER_SHIFT)
+#define RWSEM_READER_MASK (~(RWSEM_READER_BIAS - 1))
+#define RWSEM_WRITER_MASK RWSEM_WRITER_LOCKED
+#define RWSEM_LOCK_MASK (RWSEM_WRITER_MASK|RWSEM_READER_MASK)
+#define RWSEM_READ_FAILED_MASK (RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
+ RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
+
+/*
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
+ * store tearing can't happen as optimistic spinners may read and use
+ * the owner value concurrently without lock. Read from owner, however,
+ * may not need READ_ONCE() as long as the pointer value is only used
+ * for comparison and isn't being dereferenced.
+ */
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+ atomic_long_set(&sem->owner, (long)current);
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+ atomic_long_set(&sem->owner, 0);
+}
+
+/*
+ * Test the flags in the owner field.
+ */
+static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
+{
+ return atomic_long_read(&sem->owner) & flags;
+}
+
+/*
+ * The task_struct pointer of the last owning reader will be left in
+ * the owner field.
+ *
+ * Note that the owner value just indicates the task has owned the rwsem
+ * previously, it may not be the real owner or one of the real owners
+ * anymore when that field is examined, so take it with a grain of salt.
+ *
+ * The reader non-spinnable bit is preserved.
+ */
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+ struct task_struct *owner)
+{
+ unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
+ (atomic_long_read(&sem->owner) & RWSEM_RD_NONSPINNABLE);
+
+ atomic_long_set(&sem->owner, val);
+}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+ __rwsem_set_reader_owned(sem, current);
+}
+
+/*
+ * Return true if the rwsem is owned by a reader.
+ */
+static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
+{
+#ifdef CONFIG_DEBUG_RWSEMS
+ /*
+ * Check the count to see if it is write-locked.
+ */
+ long count = atomic_long_read(&sem->count);
+
+ if (count & RWSEM_WRITER_MASK)
+ return false;
+#endif
+ return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
+}
+
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
+ * real owner or one of the real owners. The only exception is when the
+ * unlock is done by up_read_non_owner().
+ */
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+ unsigned long val = atomic_long_read(&sem->owner);
+
+ while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
+ if (atomic_long_try_cmpxchg(&sem->owner, &val,
+ val & RWSEM_OWNER_FLAGS_MASK))
+ return;
+ }
+}
+#else
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+}
+#endif
+
+/*
+ * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
+ * remains set. Otherwise, the operation will be aborted.
+ */
+static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
+{
+ unsigned long owner = atomic_long_read(&sem->owner);
+
+ do {
+ if (!(owner & RWSEM_READER_OWNED))
+ break;
+ if (owner & RWSEM_NONSPINNABLE)
+ break;
+ } while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
+ owner | RWSEM_NONSPINNABLE));
+}
+
+static inline bool rwsem_read_trylock(struct rw_semaphore *sem)
+{
+ long cnt = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
+ if (WARN_ON_ONCE(cnt < 0))
+ rwsem_set_nonspinnable(sem);
+ return !(cnt & RWSEM_READ_FAILED_MASK);
+}
+
+/*
+ * Return just the real task structure pointer of the owner
+ */
+static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
+{
+ return (struct task_struct *)
+ (atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
+/*
+ * Return the real task structure pointer of the owner and the embedded
+ * flags in the owner. pflags must be non-NULL.
+ */
+static inline struct task_struct *
+rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
+{
+ unsigned long owner = atomic_long_read(&sem->owner);
+
+ *pflags = owner & RWSEM_OWNER_FLAGS_MASK;
+ return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
+}
+
+/*
+ * Guide to the rw_semaphore's count field.
+ *
+ * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
+ * by a writer.
+ *
+ * The lock is owned by readers when
+ * (1) the RWSEM_WRITER_LOCKED isn't set in count,
+ * (2) some of the reader bits are set in count, and
+ * (3) the owner field has RWSEM_READ_OWNED bit set.
+ *
+ * Having some reader bits set is not enough to guarantee a readers owned
+ * lock as the readers may be in the process of backing out from the count
+ * and a writer has just released the lock. So another writer may steal
+ * the lock immediately after that.
+ */
+
+/*
+ * Initialize an rwsem:
+ */
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held semaphore:
+ */
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
+ atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
+ raw_spin_lock_init(&sem->wait_lock);
+ INIT_LIST_HEAD(&sem->wait_list);
+ atomic_long_set(&sem->owner, 0L);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ osq_lock_init(&sem->osq);
+#endif
+}
+EXPORT_SYMBOL(__init_rwsem);
+
+enum rwsem_waiter_type {
+ RWSEM_WAITING_FOR_WRITE,
+ RWSEM_WAITING_FOR_READ
+};
+
+struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ enum rwsem_waiter_type type;
+ unsigned long timeout;
+ unsigned long last_rowner;
+};
+#define rwsem_first_waiter(sem) \
+ list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
+
+enum rwsem_wake_type {
+ RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
+ RWSEM_WAKE_READERS, /* Wake readers only */
+ RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
+};
+
+enum writer_wait_state {
+ WRITER_NOT_FIRST, /* Writer is not first in wait list */
+ WRITER_FIRST, /* Writer is first in wait list */
+ WRITER_HANDOFF /* Writer is first & handoff needed */
+};
+
+/*
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
+ * queue before initiating the handoff protocol.
+ */
+#define RWSEM_WAIT_TIMEOUT DIV_ROUND_UP(HZ, 250)
+
+/*
+ * Magic number to batch-wakeup waiting readers, even when writers are
+ * also present in the queue. This both limits the amount of work the
+ * waking thread must do and also prevents any potential counter overflow,
+ * however unlikely.
+ */
+#define MAX_READERS_WAKEUP 0x100
+
+/*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
+ * have been set.
+ * - there must be someone on the queue
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ * to actually wakeup the blocked task(s) and drop the reference count,
+ * preferably when the wait_lock is released
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only marked woken if downgrading is false
+ */
+static void rwsem_mark_wake(struct rw_semaphore *sem,
+ enum rwsem_wake_type wake_type,
+ struct wake_q_head *wake_q)
+{
+ struct rwsem_waiter *waiter, *tmp;
+ long oldcount, woken = 0, adjustment = 0;
+ struct list_head wlist;
+
+ lockdep_assert_held(&sem->wait_lock);
+
+ /*
+ * Take a peek at the queue head waiter such that we can determine
+ * the wakeup(s) to perform.
+ */
+ waiter = rwsem_first_waiter(sem);
+
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
+ if (wake_type == RWSEM_WAKE_ANY) {
+ /*
+ * Mark writer at the front of the queue for wakeup.
+ * Until the task is actually later awoken later by
+ * the caller, other writers are able to steal it.
+ * Readers, on the other hand, will block as they
+ * will notice the queued writer.
+ */
+ wake_q_add(wake_q, waiter->task);
+ lockevent_inc(rwsem_wake_writer);
+ }
+
+ return;
+ }
+
+ /*
+ * No reader wakeup if there are too many of them already.
+ */
+ if (unlikely(atomic_long_read(&sem->count) < 0))
+ return;
+
+ /*
+ * Writers might steal the lock before we grant it to the next reader.
+ * We prefer to do the first reader grant before counting readers
+ * so we can bail out early if a writer stole the lock.
+ */
+ if (wake_type != RWSEM_WAKE_READ_OWNED) {
+ struct task_struct *owner;
+
+ adjustment = RWSEM_READER_BIAS;
+ oldcount = atomic_long_fetch_add(adjustment, &sem->count);
+ if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
+ /*
+ * When we've been waiting "too" long (for writers
+ * to give up the lock), request a HANDOFF to
+ * force the issue.
+ */
+ if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
+ time_after(jiffies, waiter->timeout)) {
+ adjustment -= RWSEM_FLAG_HANDOFF;
+ lockevent_inc(rwsem_rlock_handoff);
+ }
+
+ atomic_long_add(-adjustment, &sem->count);
+ return;
+ }
+ /*
+ * Set it to reader-owned to give spinners an early
+ * indication that readers now have the lock.
+ * The reader nonspinnable bit seen at slowpath entry of
+ * the reader is copied over.
+ */
+ owner = waiter->task;
+ if (waiter->last_rowner & RWSEM_RD_NONSPINNABLE) {
+ owner = (void *)((unsigned long)owner | RWSEM_RD_NONSPINNABLE);
+ lockevent_inc(rwsem_opt_norspin);
+ }
+ __rwsem_set_reader_owned(sem, owner);
+ }
+
+ /*
+ * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
+ * queue. We know that the woken will be at least 1 as we accounted
+ * for above. Note we increment the 'active part' of the count by the
+ * number of readers before waking any processes up.
+ *
+ * This is an adaptation of the phase-fair R/W locks where at the
+ * reader phase (first waiter is a reader), all readers are eligible
+ * to acquire the lock at the same time irrespective of their order
+ * in the queue. The writers acquire the lock according to their
+ * order in the queue.
+ *
+ * We have to do wakeup in 2 passes to prevent the possibility that
+ * the reader count may be decremented before it is incremented. It
+ * is because the to-be-woken waiter may not have slept yet. So it
+ * may see waiter->task got cleared, finish its critical section and
+ * do an unlock before the reader count increment.
+ *
+ * 1) Collect the read-waiters in a separate list, count them and
+ * fully increment the reader count in rwsem.
+ * 2) For each waiters in the new list, clear waiter->task and
+ * put them into wake_q to be woken up later.
+ */
+ INIT_LIST_HEAD(&wlist);
+ list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+ if (waiter->type == RWSEM_WAITING_FOR_WRITE)
+ continue;
+
+ woken++;
+ list_move_tail(&waiter->list, &wlist);
+
+ /*
+ * Limit # of readers that can be woken up per wakeup call.
+ */
+ if (woken >= MAX_READERS_WAKEUP)
+ break;
+ }
+
+ adjustment = woken * RWSEM_READER_BIAS - adjustment;
+ lockevent_cond_inc(rwsem_wake_reader, woken);
+ if (list_empty(&sem->wait_list)) {
+ /* hit end of list above */
+ adjustment -= RWSEM_FLAG_WAITERS;
+ }
+
+ /*
+ * When we've woken a reader, we no longer need to force writers
+ * to give up the lock and we can clear HANDOFF.
+ */
+ if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
+ adjustment -= RWSEM_FLAG_HANDOFF;
+
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
+
+ /* 2nd pass */
+ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+ struct task_struct *tsk;
+
+ tsk = waiter->task;
+ get_task_struct(tsk);
+
+ /*
+ * Ensure calling get_task_struct() before setting the reader
+ * waiter to nil such that rwsem_down_read_slowpath() cannot
+ * race with do_exit() by always holding a reference count
+ * to the task to wakeup.
+ */
+ smp_store_release(&waiter->task, NULL);
+ /*
+ * Ensure issuing the wakeup (either by us or someone else)
+ * after setting the reader waiter to nil.
+ */
+ wake_q_add_safe(wake_q, tsk);
+ }
+}
+
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ *
+ * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
+ * bit is set or the lock is acquired with handoff bit cleared.
+ */
+static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+ enum writer_wait_state wstate)
+{
+ long count, new;
+
+ lockdep_assert_held(&sem->wait_lock);
+
+ count = atomic_long_read(&sem->count);
+ do {
+ bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
+
+ if (has_handoff && wstate == WRITER_NOT_FIRST)
+ return false;
+
+ new = count;
+
+ if (count & RWSEM_LOCK_MASK) {
+ if (has_handoff || (wstate != WRITER_HANDOFF))
+ return false;
+
+ new |= RWSEM_FLAG_HANDOFF;
+ } else {
+ new |= RWSEM_WRITER_LOCKED;
+ new &= ~RWSEM_FLAG_HANDOFF;
+
+ if (list_is_singular(&sem->wait_list))
+ new &= ~RWSEM_FLAG_WAITERS;
+ }
+ } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
+
+ /*
+ * We have either acquired the lock with handoff bit cleared or
+ * set the handoff bit.
+ */
+ if (new & RWSEM_FLAG_HANDOFF)
+ return false;
+
+ rwsem_set_owner(sem);
+ return true;
+}
+
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+/*
+ * Try to acquire read lock before the reader is put on wait queue.
+ * Lock acquisition isn't allowed if the rwsem is locked or a writer handoff
+ * is ongoing.
+ */
+static inline bool rwsem_try_read_lock_unqueued(struct rw_semaphore *sem)
+{
+ long count = atomic_long_read(&sem->count);
+
+ if (count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))
+ return false;
+
+ count = atomic_long_fetch_add_acquire(RWSEM_READER_BIAS, &sem->count);
+ if (!(count & (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+ rwsem_set_reader_owned(sem);
+ lockevent_inc(rwsem_opt_rlock);
+ return true;
+ }
+
+ /* Back out the change */
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
+ return false;
+}
+
+/*
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+ long count = atomic_long_read(&sem->count);
+
+ while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
+ count | RWSEM_WRITER_LOCKED)) {
+ rwsem_set_owner(sem);
+ lockevent_inc(rwsem_opt_wlock);
+ return true;
+ }
+ }
+ return false;
+}
+
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+ /*
+ * As lock holder preemption issue, we both skip spinning if
+ * task is not on cpu or its cpu is preempted
+ */
+ return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
+ unsigned long nonspinnable)
+{
+ struct task_struct *owner;
+ unsigned long flags;
+ bool ret = true;
+
+ BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
+
+ if (need_resched()) {
+ lockevent_inc(rwsem_opt_fail);
+ return false;
+ }
+
+ preempt_disable();
+ rcu_read_lock();
+ owner = rwsem_owner_flags(sem, &flags);
+ if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner)))
+ ret = false;
+ rcu_read_unlock();
+ preempt_enable();
+
+ lockevent_cond_inc(rwsem_opt_fail, !ret);
+ return ret;
+}
+
+/*
+ * The rwsem_spin_on_owner() function returns the folowing 4 values
+ * depending on the lock owner state.
+ * OWNER_NULL : owner is currently NULL
+ * OWNER_WRITER: when owner changes and is a writer
+ * OWNER_READER: when owner changes and the new owner may be a reader.
+ * OWNER_NONSPINNABLE:
+ * when optimistic spinning has to stop because either the
+ * owner stops running, is unknown, or its timeslice has
+ * been used up.
+ */
+enum owner_state {
+ OWNER_NULL = 1 << 0,
+ OWNER_WRITER = 1 << 1,
+ OWNER_READER = 1 << 2,
+ OWNER_NONSPINNABLE = 1 << 3,
+};
+#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
+
+static inline enum owner_state
+rwsem_owner_state(struct task_struct *owner, unsigned long flags, unsigned long nonspinnable)
+{
+ if (flags & nonspinnable)
+ return OWNER_NONSPINNABLE;
+
+ if (flags & RWSEM_READER_OWNED)
+ return OWNER_READER;
+
+ return owner ? OWNER_WRITER : OWNER_NULL;
+}
+
+static noinline enum owner_state
+rwsem_spin_on_owner(struct rw_semaphore *sem, unsigned long nonspinnable)
+{
+ struct task_struct *new, *owner;
+ unsigned long flags, new_flags;
+ enum owner_state state;
+
+ owner = rwsem_owner_flags(sem, &flags);
+ state = rwsem_owner_state(owner, flags, nonspinnable);
+ if (state != OWNER_WRITER)
+ return state;
+
+ rcu_read_lock();
+ for (;;) {
+ if (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF) {
+ state = OWNER_NONSPINNABLE;
+ break;
+ }
+
+ new = rwsem_owner_flags(sem, &new_flags);
+ if ((new != owner) || (new_flags != flags)) {
+ state = rwsem_owner_state(new, new_flags, nonspinnable);
+ break;
+ }
+
+ /*
+ * Ensure we emit the owner->on_cpu, dereference _after_
+ * checking sem->owner still matches owner, if that fails,
+ * owner might point to free()d memory, if it still matches,
+ * the rcu_read_lock() ensures the memory stays valid.
+ */
+ barrier();
+
+ if (need_resched() || !owner_on_cpu(owner)) {
+ state = OWNER_NONSPINNABLE;
+ break;
+ }
+
+ cpu_relax();
+ }
+ rcu_read_unlock();
+
+ return state;
+}
+
+/*
+ * Calculate reader-owned rwsem spinning threshold for writer
+ *
+ * The more readers own the rwsem, the longer it will take for them to
+ * wind down and free the rwsem. So the empirical formula used to
+ * determine the actual spinning time limit here is:
+ *
+ * Spinning threshold = (10 + nr_readers/2)us
+ *
+ * The limit is capped to a maximum of 25us (30 readers). This is just
+ * a heuristic and is subjected to change in the future.
+ */
+static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
+{
+ long count = atomic_long_read(&sem->count);
+ int readers = count >> RWSEM_READER_SHIFT;
+ u64 delta;
+
+ if (readers > 30)
+ readers = 30;
+ delta = (20 + readers) * NSEC_PER_USEC / 2;
+
+ return sched_clock() + delta;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+{
+ bool taken = false;
+ int prev_owner_state = OWNER_NULL;
+ int loop = 0;
+ u64 rspin_threshold = 0;
+ unsigned long nonspinnable = wlock ? RWSEM_WR_NONSPINNABLE
+ : RWSEM_RD_NONSPINNABLE;
+
+ preempt_disable();
+
+ /* sem->wait_lock should not be held when doing optimistic spinning */
+ if (!osq_lock(&sem->osq))
+ goto done;
+
+ /*
+ * Optimistically spin on the owner field and attempt to acquire the
+ * lock whenever the owner changes. Spinning will be stopped when:
+ * 1) the owning writer isn't running; or
+ * 2) readers own the lock and spinning time has exceeded limit.
+ */
+ for (;;) {
+ enum owner_state owner_state;
+
+ owner_state = rwsem_spin_on_owner(sem, nonspinnable);
+ if (!(owner_state & OWNER_SPINNABLE))
+ break;
+
+ /*
+ * Try to acquire the lock
+ */
+ taken = wlock ? rwsem_try_write_lock_unqueued(sem)
+ : rwsem_try_read_lock_unqueued(sem);
+
+ if (taken)
+ break;
+
+ /*
+ * Time-based reader-owned rwsem optimistic spinning
+ */
+ if (wlock && (owner_state == OWNER_READER)) {
+ /*
+ * Re-initialize rspin_threshold every time when
+ * the owner state changes from non-reader to reader.
+ * This allows a writer to steal the lock in between
+ * 2 reader phases and have the threshold reset at
+ * the beginning of the 2nd reader phase.
+ */
+ if (prev_owner_state != OWNER_READER) {
+ if (rwsem_test_oflags(sem, nonspinnable))
+ break;
+ rspin_threshold = rwsem_rspin_threshold(sem);
+ loop = 0;
+ }
+
+ /*
+ * Check time threshold once every 16 iterations to
+ * avoid calling sched_clock() too frequently so
+ * as to reduce the average latency between the times
+ * when the lock becomes free and when the spinner
+ * is ready to do a trylock.
+ */
+ else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
+ rwsem_set_nonspinnable(sem);
+ lockevent_inc(rwsem_opt_nospin);
+ break;
+ }
+ }
+
+ /*
+ * An RT task cannot do optimistic spinning if it cannot
+ * be sure the lock holder is running or live-lock may
+ * happen if the current task and the lock holder happen
+ * to run in the same CPU. However, aborting optimistic
+ * spinning while a NULL owner is detected may miss some
+ * opportunity where spinning can continue without causing
+ * problem.
+ *
+ * There are 2 possible cases where an RT task may be able
+ * to continue spinning.
+ *
+ * 1) The lock owner is in the process of releasing the
+ * lock, sem->owner is cleared but the lock has not
+ * been released yet.
+ * 2) The lock was free and owner cleared, but another
+ * task just comes in and acquire the lock before
+ * we try to get it. The new owner may be a spinnable
+ * writer.
+ *
+ * To take advantage of two scenarios listed agove, the RT
+ * task is made to retry one more time to see if it can
+ * acquire the lock or continue spinning on the new owning
+ * writer. Of course, if the time lag is long enough or the
+ * new owner is not a writer or spinnable, the RT task will
+ * quit spinning.
+ *
+ * If the owner is a writer, the need_resched() check is
+ * done inside rwsem_spin_on_owner(). If the owner is not
+ * a writer, need_resched() check needs to be done here.
+ */
+ if (owner_state != OWNER_WRITER) {
+ if (need_resched())
+ break;
+ if (rt_task(current) &&
+ (prev_owner_state != OWNER_WRITER))
+ break;
+ }
+ prev_owner_state = owner_state;
+
+ /*
+ * The cpu_relax() call is a compiler barrier which forces
+ * everything in this loop to be re-loaded. We don't need
+ * memory barriers as we'll eventually observe the right
+ * values at the cost of a few extra spins.
+ */
+ cpu_relax();
+ }
+ osq_unlock(&sem->osq);
+done:
+ preempt_enable();
+ lockevent_cond_inc(rwsem_opt_fail, !taken);
+ return taken;
+}
+
+/*
+ * Clear the owner's RWSEM_WR_NONSPINNABLE bit if it is set. This should
+ * only be called when the reader count reaches 0.
+ *
+ * This give writers better chance to acquire the rwsem first before
+ * readers when the rwsem was being held by readers for a relatively long
+ * period of time. Race can happen that an optimistic spinner may have
+ * just stolen the rwsem and set the owner, but just clearing the
+ * RWSEM_WR_NONSPINNABLE bit will do no harm anyway.
+ */
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem)
+{
+ if (rwsem_test_oflags(sem, RWSEM_WR_NONSPINNABLE))
+ atomic_long_andnot(RWSEM_WR_NONSPINNABLE, &sem->owner);
+}
+
+/*
+ * This function is called when the reader fails to acquire the lock via
+ * optimistic spinning. In this case we will still attempt to do a trylock
+ * when comparing the rwsem state right now with the state when entering
+ * the slowpath indicates that the reader is still in a valid reader phase.
+ * This happens when the following conditions are true:
+ *
+ * 1) The lock is currently reader owned, and
+ * 2) The lock is previously not reader-owned or the last read owner changes.
+ *
+ * In the former case, we have transitioned from a writer phase to a
+ * reader-phase while spinning. In the latter case, it means the reader
+ * phase hasn't ended when we entered the optimistic spinning loop. In
+ * both cases, the reader is eligible to acquire the lock. This is the
+ * secondary path where a read lock is acquired optimistically.
+ *
+ * The reader non-spinnable bit wasn't set at time of entry or it will
+ * not be here at all.
+ */
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
+ unsigned long last_rowner)
+{
+ unsigned long owner = atomic_long_read(&sem->owner);
+
+ if (!(owner & RWSEM_READER_OWNED))
+ return false;
+
+ if (((owner ^ last_rowner) & ~RWSEM_OWNER_FLAGS_MASK) &&
+ rwsem_try_read_lock_unqueued(sem)) {
+ lockevent_inc(rwsem_opt_rlock2);
+ lockevent_add(rwsem_opt_fail, -1);
+ return true;
+ }
+ return false;
+}
+#else
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
+ unsigned long nonspinnable)
+{
+ return false;
+}
+
+static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem, bool wlock)
+{
+ return false;
+}
+
+static inline void clear_wr_nonspinnable(struct rw_semaphore *sem) { }
+
+static inline bool rwsem_reader_phase_trylock(struct rw_semaphore *sem,
+ unsigned long last_rowner)
+{
+ return false;
+}
+#endif
+
+/*
+ * Wait for the read lock to be granted
+ */
+static struct rw_semaphore __sched *
+rwsem_down_read_slowpath(struct rw_semaphore *sem, int state)
+{
+ long count, adjustment = -RWSEM_READER_BIAS;
+ struct rwsem_waiter waiter;
+ DEFINE_WAKE_Q(wake_q);
+ bool wake = false;
+
+ /*
+ * Save the current read-owner of rwsem, if available, and the
+ * reader nonspinnable bit.
+ */
+ waiter.last_rowner = atomic_long_read(&sem->owner);
+ if (!(waiter.last_rowner & RWSEM_READER_OWNED))
+ waiter.last_rowner &= RWSEM_RD_NONSPINNABLE;
+
+ if (!rwsem_can_spin_on_owner(sem, RWSEM_RD_NONSPINNABLE))
+ goto queue;
+
+ /*
+ * Undo read bias from down_read() and do optimistic spinning.
+ */
+ atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
+ adjustment = 0;
+ if (rwsem_optimistic_spin(sem, false)) {
+ /*
+ * Wake up other readers in the wait list if the front
+ * waiter is a reader.
+ */
+ if ((atomic_long_read(&sem->count) & RWSEM_FLAG_WAITERS)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (!list_empty(&sem->wait_list))
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
+ &wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+ }
+ return sem;
+ } else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
+ return sem;
+ }
+
+queue:
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_READ;
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (list_empty(&sem->wait_list)) {
+ /*
+ * In case the wait queue is empty and the lock isn't owned
+ * by a writer or has the handoff bit set, this reader can
+ * exit the slowpath and return immediately as its
+ * RWSEM_READER_BIAS has already been set in the count.
+ */
+ if (adjustment && !(atomic_long_read(&sem->count) &
+ (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+ raw_spin_unlock_irq(&sem->wait_lock);
+ rwsem_set_reader_owned(sem);
+ lockevent_inc(rwsem_rlock_fast);
+ return sem;
+ }
+ adjustment += RWSEM_FLAG_WAITERS;
+ }
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we're now waiting on the lock, but no longer actively locking */
+ if (adjustment)
+ count = atomic_long_add_return(adjustment, &sem->count);
+ else
+ count = atomic_long_read(&sem->count);
+
+ /*
+ * If there are no active locks, wake the front queued process(es).
+ *
+ * If there are no writers and we are first in the queue,
+ * wake our own waiter to join the existing active readers !
+ */
+ if (!(count & RWSEM_LOCK_MASK)) {
+ clear_wr_nonspinnable(sem);
+ wake = true;
+ }
+ if (wake || (!(count & RWSEM_WRITER_MASK) &&
+ (adjustment & RWSEM_FLAG_WAITERS)))
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+
+ /* wait to be given the lock */
+ while (true) {
+ set_current_state(state);
+ if (!waiter.task)
+ break;
+ if (signal_pending_state(state, current)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter.task)
+ goto out_nolock;
+ raw_spin_unlock_irq(&sem->wait_lock);
+ break;
+ }
+ schedule();
+ lockevent_inc(rwsem_sleep_reader);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock);
+ return sem;
+out_nolock:
+ list_del(&waiter.list);
+ if (list_empty(&sem->wait_list)) {
+ atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
+ &sem->count);
+ }
+ raw_spin_unlock_irq(&sem->wait_lock);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock_fail);
+ return ERR_PTR(-EINTR);
+}
+
+/*
+ * This function is called by the a write lock owner. So the owner value
+ * won't get changed by others.
+ */
+static inline void rwsem_disable_reader_optspin(struct rw_semaphore *sem,
+ bool disable)
+{
+ if (unlikely(disable)) {
+ atomic_long_or(RWSEM_RD_NONSPINNABLE, &sem->owner);
+ lockevent_inc(rwsem_opt_norspin);
+ }
+}
+
+/*
+ * Wait until we successfully acquire the write lock
+ */
+static struct rw_semaphore *
+rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+{
+ long count;
+ bool disable_rspin;
+ enum writer_wait_state wstate;
+ struct rwsem_waiter waiter;
+ struct rw_semaphore *ret = sem;
+ DEFINE_WAKE_Q(wake_q);
+
+ /* do optimistic spinning and steal lock if possible */
+ if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
+ rwsem_optimistic_spin(sem, true))
+ return sem;
+
+ /*
+ * Disable reader optimistic spinning for this rwsem after
+ * acquiring the write lock when the setting of the nonspinnable
+ * bits are observed.
+ */
+ disable_rspin = atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE;
+
+ /*
+ * Optimistic spinning failed, proceed to the slowpath
+ * and block until we can acquire the sem.
+ */
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_WRITE;
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+
+ /* account for this before adding a new element to the list */
+ wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we're now waiting on the lock */
+ if (wstate == WRITER_NOT_FIRST) {
+ count = atomic_long_read(&sem->count);
+
+ /*
+ * If there were already threads queued before us and:
+ * 1) there are no no active locks, wake the front
+ * queued process(es) as the handoff bit might be set.
+ * 2) there are no active writers and some readers, the lock
+ * must be read owned; so we try to wake any read lock
+ * waiters that were queued ahead of us.
+ */
+ if (count & RWSEM_WRITER_MASK)
+ goto wait;
+
+ rwsem_mark_wake(sem, (count & RWSEM_READER_MASK)
+ ? RWSEM_WAKE_READERS
+ : RWSEM_WAKE_ANY, &wake_q);
+
+ if (!wake_q_empty(&wake_q)) {
+ /*
+ * We want to minimize wait_lock hold time especially
+ * when a large number of readers are to be woken up.
+ */
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+ wake_q_init(&wake_q); /* Used again, reinit */
+ raw_spin_lock_irq(&sem->wait_lock);
+ }
+ } else {
+ atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
+ }
+
+wait:
+ /* wait until we successfully acquire the lock */
+ set_current_state(state);
+ while (true) {
+ if (rwsem_try_write_lock(sem, wstate))
+ break;
+
+ raw_spin_unlock_irq(&sem->wait_lock);
+
+ /* Block until there are no active lockers. */
+ for (;;) {
+ if (signal_pending_state(state, current))
+ goto out_nolock;
+
+ schedule();
+ lockevent_inc(rwsem_sleep_writer);
+ set_current_state(state);
+ /*
+ * If HANDOFF bit is set, unconditionally do
+ * a trylock.
+ */
+ if (wstate == WRITER_HANDOFF)
+ break;
+
+ if ((wstate == WRITER_NOT_FIRST) &&
+ (rwsem_first_waiter(sem) == &waiter))
+ wstate = WRITER_FIRST;
+
+ count = atomic_long_read(&sem->count);
+ if (!(count & RWSEM_LOCK_MASK))
+ break;
+
+ /*
+ * The setting of the handoff bit is deferred
+ * until rwsem_try_write_lock() is called.
+ */
+ if ((wstate == WRITER_FIRST) && (rt_task(current) ||
+ time_after(jiffies, waiter.timeout))) {
+ wstate = WRITER_HANDOFF;
+ lockevent_inc(rwsem_wlock_handoff);
+ break;
+ }
+ }
+
+ raw_spin_lock_irq(&sem->wait_lock);
+ }
+ __set_current_state(TASK_RUNNING);
+ list_del(&waiter.list);
+ rwsem_disable_reader_optspin(sem, disable_rspin);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ lockevent_inc(rwsem_wlock);
+
+ return ret;
+
+out_nolock:
+ __set_current_state(TASK_RUNNING);
+ raw_spin_lock_irq(&sem->wait_lock);
+ list_del(&waiter.list);
+
+ if (unlikely(wstate == WRITER_HANDOFF))
+ atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
+
+ if (list_empty(&sem->wait_list))
+ atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
+ else
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+ lockevent_inc(rwsem_wlock_fail);
+
+ return ERR_PTR(-EINTR);
+}
+
+/*
+ * handle waking up a waiter on the semaphore
+ * - up_read/up_write has decremented the active part of count if we come here
+ */
+static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem, long count)
+{
+ unsigned long flags;
+ DEFINE_WAKE_Q(wake_q);
+
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (!list_empty(&sem->wait_list))
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ wake_up_q(&wake_q);
+
+ return sem;
+}
+
+/*
+ * downgrade a write lock into a read lock
+ * - caller incremented waiting part of count and discovered it still negative
+ * - just wake up any readers at the front of the queue
+ */
+static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+ DEFINE_WAKE_Q(wake_q);
+
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (!list_empty(&sem->wait_list))
+ rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ wake_up_q(&wake_q);
+
+ return sem;
+}
+
+/*
+ * lock for reading
+ */
+inline void __down_read(struct rw_semaphore *sem)
+{
+ if (!rwsem_read_trylock(sem)) {
+ rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+ } else {
+ rwsem_set_reader_owned(sem);
+ }
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+ if (!rwsem_read_trylock(sem)) {
+ if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE)))
+ return -EINTR;
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+ } else {
+ rwsem_set_reader_owned(sem);
+ }
+ return 0;
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+ /*
+ * Optimize for the case when the rwsem is not locked at all.
+ */
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+ do {
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+ tmp + RWSEM_READER_BIAS)) {
+ rwsem_set_reader_owned(sem);
+ return 1;
+ }
+ } while (!(tmp & RWSEM_READ_FAILED_MASK));
+ return 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+ RWSEM_WRITER_LOCKED)))
+ rwsem_down_write_slowpath(sem, TASK_UNINTERRUPTIBLE);
+ else
+ rwsem_set_owner(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+ if (unlikely(!atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+ RWSEM_WRITER_LOCKED))) {
+ if (IS_ERR(rwsem_down_write_slowpath(sem, TASK_KILLABLE)))
+ return -EINTR;
+ } else {
+ rwsem_set_owner(sem);
+ }
+ return 0;
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+ RWSEM_WRITER_LOCKED)) {
+ rwsem_set_owner(sem);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * unlock after reading
+ */
+inline void __up_read(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+ rwsem_clear_reader_owned(sem);
+ tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
+ DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+ if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
+ RWSEM_FLAG_WAITERS)) {
+ clear_wr_nonspinnable(sem);
+ rwsem_wake(sem, tmp);
+ }
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ /*
+ * sem->owner may differ from current if the ownership is transferred
+ * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
+ */
+ DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
+ !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
+ rwsem_clear_owner(sem);
+ tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ rwsem_wake(sem, tmp);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ /*
+ * When downgrading from exclusive to shared ownership,
+ * anything inside the write-locked region cannot leak
+ * into the read side. In contrast, anything in the
+ * read-locked region is ok to be re-ordered into the
+ * write side. As such, rely on RELEASE semantics.
+ */
+ DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
+ tmp = atomic_long_fetch_add_release(
+ -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
+ rwsem_set_reader_owned(sem);
+ if (tmp & RWSEM_FLAG_WAITERS)
+ rwsem_downgrade_wake(sem);
+}
/*
* lock for reading
@@ -25,7 +1446,6 @@ void __sched down_read(struct rw_semaphore *sem)
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
-
EXPORT_SYMBOL(down_read);
int __sched down_read_killable(struct rw_semaphore *sem)
@@ -40,7 +1460,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
return 0;
}
-
EXPORT_SYMBOL(down_read_killable);
/*
@@ -54,7 +1473,6 @@ int down_read_trylock(struct rw_semaphore *sem)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
return ret;
}
-
EXPORT_SYMBOL(down_read_trylock);
/*
@@ -64,10 +1482,8 @@ void __sched down_write(struct rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
-
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
}
-
EXPORT_SYMBOL(down_write);
/*
@@ -78,14 +1494,14 @@ int __sched down_write_killable(struct rw_semaphore *sem)
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
+ __down_write_killable)) {
rwsem_release(&sem->dep_map, 1, _RET_IP_);
return -EINTR;
}
return 0;
}
-
EXPORT_SYMBOL(down_write_killable);
/*
@@ -100,7 +1516,6 @@ int down_write_trylock(struct rw_semaphore *sem)
return ret;
}
-
EXPORT_SYMBOL(down_write_trylock);
/*
@@ -109,10 +1524,8 @@ EXPORT_SYMBOL(down_write_trylock);
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
-
__up_read(sem);
}
-
EXPORT_SYMBOL(up_read);
/*
@@ -121,10 +1534,8 @@ EXPORT_SYMBOL(up_read);
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
-
__up_write(sem);
}
-
EXPORT_SYMBOL(up_write);
/*
@@ -133,10 +1544,8 @@ EXPORT_SYMBOL(up_write);
void downgrade_write(struct rw_semaphore *sem)
{
lock_downgrade(&sem->dep_map, _RET_IP_);
-
__downgrade_write(sem);
}
-
EXPORT_SYMBOL(downgrade_write);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -145,40 +1554,32 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
-
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
}
-
EXPORT_SYMBOL(down_read_nested);
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
{
might_sleep();
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
-
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
}
-
EXPORT_SYMBOL(_down_write_nest_lock);
void down_read_non_owner(struct rw_semaphore *sem)
{
might_sleep();
-
__down_read(sem);
__rwsem_set_reader_owned(sem, NULL);
}
-
EXPORT_SYMBOL(down_read_non_owner);
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
-
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
}
-
EXPORT_SYMBOL(down_write_nested);
int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
@@ -186,23 +1587,21 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
- if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
+ __down_write_killable)) {
rwsem_release(&sem->dep_map, 1, _RET_IP_);
return -EINTR;
}
return 0;
}
-
EXPORT_SYMBOL(down_write_killable_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
- sem);
+ DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
__up_read(sem);
}
-
EXPORT_SYMBOL(up_read_non_owner);
#endif
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 64877f5294e3..2534ce49f648 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,304 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * The least significant 2 bits of the owner value has the following
- * meanings when set.
- * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
- * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
- * i.e. the owner(s) cannot be readily determined. It can be reader
- * owned or the owning writer is indeterminate.
- *
- * When a writer acquires a rwsem, it puts its task_struct pointer
- * into the owner field. It is cleared after an unlock.
- *
- * When a reader acquires a rwsem, it will also puts its task_struct
- * pointer into the owner field with both the RWSEM_READER_OWNED and
- * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
- * largely be left untouched. So for a free or reader-owned rwsem,
- * the owner value may contain information about the last reader that
- * acquires the rwsem. The anonymous bit is set because that particular
- * reader may or may not still own the lock.
- *
- * That information may be helpful in debugging cases where the system
- * seems to hang on a reader owned rwsem especially if only one reader
- * is involved. Ideally we would like to track all the readers that own
- * a rwsem, but the overhead is simply too big.
- */
-#include "lock_events.h"
-#define RWSEM_READER_OWNED (1UL << 0)
-#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
+#ifndef __INTERNAL_RWSEM_H
+#define __INTERNAL_RWSEM_H
+#include <linux/rwsem.h>
-#ifdef CONFIG_DEBUG_RWSEMS
-# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
- if (!debug_locks_silent && \
- WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
- #c, atomic_long_read(&(sem)->count), \
- (long)((sem)->owner), (long)current, \
- list_empty(&(sem)->wait_list) ? "" : "not ")) \
- debug_locks_off(); \
- } while (0)
-#else
-# define DEBUG_RWSEMS_WARN_ON(c, sem)
-#endif
+extern void __down_read(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
-/*
- * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
- * Adapted largely from include/asm-i386/rwsem.h
- * by Paul Mackerras <paulus@samba.org>.
- */
-
-/*
- * the semaphore definition
- */
-#ifdef CONFIG_64BIT
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-/*
- * All writes to owner are protected by WRITE_ONCE() to make sure that
- * store tearing can't happen as optimistic spinners may read and use
- * the owner value concurrently without lock. Read from owner, however,
- * may not need READ_ONCE() as long as the pointer value is only used
- * for comparison and isn't being dereferenced.
- */
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
- WRITE_ONCE(sem->owner, current);
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
- WRITE_ONCE(sem->owner, NULL);
-}
-
-/*
- * The task_struct pointer of the last owning reader will be left in
- * the owner field.
- *
- * Note that the owner value just indicates the task has owned the rwsem
- * previously, it may not be the real owner or one of the real owners
- * anymore when that field is examined, so take it with a grain of salt.
- */
-static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
- struct task_struct *owner)
-{
- unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
- | RWSEM_ANONYMOUSLY_OWNED;
-
- WRITE_ONCE(sem->owner, (struct task_struct *)val);
-}
-
-static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
-{
- __rwsem_set_reader_owned(sem, current);
-}
-
-/*
- * Return true if the a rwsem waiter can spin on the rwsem's owner
- * and steal the lock, i.e. the lock is not anonymously owned.
- * N.B. !owner is considered spinnable.
- */
-static inline bool is_rwsem_owner_spinnable(struct task_struct *owner)
-{
- return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED);
-}
-
-/*
- * Return true if rwsem is owned by an anonymous writer or readers.
- */
-static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
-{
- return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
-}
-
-#ifdef CONFIG_DEBUG_RWSEMS
-/*
- * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
- * is a task pointer in owner of a reader-owned rwsem, it will be the
- * real owner or one of the real owners. The only exception is when the
- * unlock is done by up_read_non_owner().
- */
-#define rwsem_clear_reader_owned rwsem_clear_reader_owned
-static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
-{
- unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
- | RWSEM_ANONYMOUSLY_OWNED;
- if (READ_ONCE(sem->owner) == (struct task_struct *)val)
- cmpxchg_relaxed((unsigned long *)&sem->owner, val,
- RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
-}
-#endif
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
- struct task_struct *owner)
-{
-}
-
-static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
-{
-}
-#endif
-
-#ifndef rwsem_clear_reader_owned
-static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
-{
-}
-#endif
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
- rwsem_down_read_failed(sem);
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
- RWSEM_READER_OWNED), sem);
- } else {
- rwsem_set_reader_owned(sem);
- }
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
- RWSEM_READER_OWNED), sem);
- } else {
- rwsem_set_reader_owned(sem);
- }
- return 0;
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- /*
- * Optimize for the case when the rwsem is not locked at all.
- */
- long tmp = RWSEM_UNLOCKED_VALUE;
-
- lockevent_inc(rwsem_rtrylock);
- do {
- if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
- tmp + RWSEM_ACTIVE_READ_BIAS)) {
- rwsem_set_reader_owned(sem);
- return 1;
- }
- } while (tmp >= 0);
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count);
- if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
- rwsem_down_write_failed(sem);
- rwsem_set_owner(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count);
- if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- rwsem_set_owner(sem);
- return 0;
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long tmp;
-
- lockevent_inc(rwsem_wtrylock);
- tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- if (tmp == RWSEM_UNLOCKED_VALUE) {
- rwsem_set_owner(sem);
- return true;
- }
- return false;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
-
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
- sem);
- rwsem_clear_reader_owned(sem);
- tmp = atomic_long_dec_return_release(&sem->count);
- if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
- rwsem_clear_owner(sem);
- if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count) < 0))
- rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long tmp;
-
- /*
- * When downgrading from exclusive to shared ownership,
- * anything inside the write-locked region cannot leak
- * into the read side. In contrast, anything in the
- * read-locked region is ok to be re-ordered into the
- * write side. As such, rely on RELEASE semantics.
- */
- DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
- tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
- rwsem_set_reader_owned(sem);
- if (tmp < 0)
- rwsem_downgrade_wake(sem);
-}
+#endif /* __INTERNAL_RWSEM_H */
diff --git a/kernel/module.c b/kernel/module.c
index 80c7c09584cf..a2cee14a83f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3083,6 +3083,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->tracepoints_ptrs),
&mod->num_tracepoints);
#endif
+#ifdef CONFIG_TREE_SRCU
+ mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
+ sizeof(*mod->srcu_struct_ptrs),
+ &mod->num_srcu_structs);
+#endif
#ifdef CONFIG_BPF_EVENTS
mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
sizeof(*mod->bpf_raw_events),
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index f54bc7cb6c2d..6d726cef241c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -326,7 +326,7 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
}
read_lock(&tasklist_lock);
- force_sig(SIGKILL, pid_ns->child_reaper);
+ send_sig(SIGKILL, pid_ns->child_reaper, 1);
read_unlock(&tasklist_lock);
do_exit(0);
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 7d66ee68aaaf..0a9326f5f421 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -223,7 +223,7 @@ int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
* All CPUs of a domain must have the same micro-architecture
* since they all share the same table.
*/
- cap = arch_scale_cpu_capacity(NULL, cpu);
+ cap = arch_scale_cpu_capacity(cpu);
if (prev_cap && prev_cap != cap) {
pr_err("CPUs of %*pbl must have the same capacity\n",
cpumask_pr_args(span));
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8456b6e2205f..83a531cea2f3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
*/
static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
- rcu_read_lock();
- __ptrace_link(child, new_parent, __task_cred(new_parent));
- rcu_read_unlock();
+ __ptrace_link(child, new_parent, current_cred());
}
/**
@@ -118,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child)
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 390aab20115e..5290b01de534 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -446,6 +446,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
enum rcutorture_type {
RCU_FLAVOR,
RCU_TASKS_FLAVOR,
+ RCU_TRIVIAL_FLAVOR,
SRCU_FLAVOR,
INVALID_RCU_FLAVOR
};
@@ -479,6 +480,10 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
#endif
#endif
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
+long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
+#endif
+
#ifdef CONFIG_TINY_SRCU
static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index efaa5b3f4d3f..fce4e7e6f502 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -299,6 +299,7 @@ struct rcu_torture_ops {
int irq_capable;
int can_boost;
int extendables;
+ int slow_gps;
const char *name;
};
@@ -667,9 +668,51 @@ static struct rcu_torture_ops tasks_ops = {
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
+ .slow_gps = 1,
.name = "tasks"
};
+/*
+ * Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
+ * This implementation does not necessarily work well with CPU hotplug.
+ */
+
+static void synchronize_rcu_trivial(void)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
+ WARN_ON_ONCE(raw_smp_processor_id() != cpu);
+ }
+}
+
+static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
+{
+ preempt_disable();
+ return 0;
+}
+
+static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
+{
+ preempt_enable();
+}
+
+static struct rcu_torture_ops trivial_ops = {
+ .ttype = RCU_TRIVIAL_FLAVOR,
+ .init = rcu_sync_torture_init,
+ .readlock = rcu_torture_read_lock_trivial,
+ .read_delay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_torture_read_unlock_trivial,
+ .get_gp_seq = rcu_no_completed,
+ .sync = synchronize_rcu_trivial,
+ .exp_sync = synchronize_rcu_trivial,
+ .fqs = NULL,
+ .stats = NULL,
+ .irq_capable = 1,
+ .name = "trivial"
+};
+
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@@ -1010,10 +1053,17 @@ rcu_torture_writer(void *arg)
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
- if (stutter_wait("rcu_torture_writer"))
+ if (stutter_wait("rcu_torture_writer") &&
+ !READ_ONCE(rcu_fwd_cb_nodelay) &&
+ !cur_ops->slow_gps &&
+ !torture_must_stop())
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
- if (list_empty(&rcu_tortures[i].rtort_free))
- WARN_ON_ONCE(1);
+ if (list_empty(&rcu_tortures[i].rtort_free) &&
+ rcu_access_pointer(rcu_torture_current) !=
+ &rcu_tortures[i]) {
+ rcu_ftrace_dump(DUMP_ALL);
+ WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
+ }
} while (!torture_must_stop());
/* Reset expediting back to unexpedited. */
if (expediting > 0)
@@ -1358,8 +1408,9 @@ rcu_torture_stats_print(void)
}
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
- pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+ pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rcu_torture_current,
+ rcu_torture_current ? "ver" : "VER",
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
@@ -1661,6 +1712,17 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
}
+// Give the scheduler a chance, even on nohz_full CPUs.
+static void rcu_torture_fwd_prog_cond_resched(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) {
+ if (need_resched())
+ schedule();
+ } else {
+ cond_resched();
+ }
+}
+
/*
* Free all callbacks on the rcu_fwd_cb_head list, either because the
* test is over or because we hit an OOM event.
@@ -1674,16 +1736,18 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void)
for (;;) {
spin_lock_irqsave(&rcu_fwd_lock, flags);
rfcp = rcu_fwd_cb_head;
- if (!rfcp)
+ if (!rfcp) {
+ spin_unlock_irqrestore(&rcu_fwd_lock, flags);
break;
+ }
rcu_fwd_cb_head = rfcp->rfc_next;
if (!rcu_fwd_cb_head)
rcu_fwd_cb_tail = &rcu_fwd_cb_head;
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
kfree(rfcp);
freed++;
+ rcu_torture_fwd_prog_cond_resched();
}
- spin_unlock_irqrestore(&rcu_fwd_lock, flags);
return freed;
}
@@ -1707,6 +1771,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
}
/* Tight loop containing cond_resched(). */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, true);
+ cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
@@ -1724,7 +1790,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
udelay(10);
cur_ops->readunlock(idx);
if (!fwd_progress_need_resched || need_resched())
- cond_resched();
+ rcu_torture_fwd_prog_cond_resched();
}
(*tested_tries)++;
if (!time_before(jiffies, stopat) &&
@@ -1745,6 +1811,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
WARN_ON(READ_ONCE(fcs.stop) != 2);
destroy_rcu_head_on_stack(&fcs.rh);
}
+ schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
/* Carry out call_rcu() forward-progress testing. */
@@ -1765,6 +1833,8 @@ static void rcu_torture_fwd_prog_cr(void)
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
+ if (!cur_ops->call)
+ return; /* Can't do call_rcu() fwd prog without ->call. */
/* Loop continuously posting RCU callbacks. */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
@@ -1805,7 +1875,7 @@ static void rcu_torture_fwd_prog_cr(void)
rfcp->rfc_gps = 0;
}
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
- cond_resched();
+ rcu_torture_fwd_prog_cond_resched();
}
stoppedat = jiffies;
n_launders_cb_snap = READ_ONCE(n_launders_cb);
@@ -1814,7 +1884,6 @@ static void rcu_torture_fwd_prog_cr(void)
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree();
- WRITE_ONCE(rcu_fwd_cb_nodelay, false);
if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) {
WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED);
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n",
@@ -1825,6 +1894,8 @@ static void rcu_torture_fwd_prog_cr(void)
n_max_gps, n_max_cbs, cver, gps);
rcu_torture_fwd_cb_hist();
}
+ schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
+ WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
@@ -2240,7 +2311,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
- &busted_srcud_ops, &tasks_ops,
+ &busted_srcud_ops, &tasks_ops, &trivial_ops,
};
if (!torture_init_begin(torture_type, verbose))
@@ -2363,7 +2434,10 @@ rcu_torture_init(void)
if (stutter < 0)
stutter = 0;
if (stutter) {
- firsterr = torture_stutter_init(stutter * HZ);
+ int t;
+
+ t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
+ firsterr = torture_stutter_init(stutter * HZ, t);
if (firsterr)
goto unwind;
}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9b761e546de8..cf0e886314f2 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -831,8 +831,8 @@ static void srcu_leak_callback(struct rcu_head *rhp)
* srcu_read_lock(), and srcu_read_unlock() that are all passed the same
* srcu_struct structure.
*/
-void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
- rcu_callback_t func, bool do_norm)
+static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
+ rcu_callback_t func, bool do_norm)
{
unsigned long flags;
int idx;
@@ -1310,3 +1310,68 @@ void __init srcu_init(void)
queue_work(rcu_gp_wq, &ssp->work.work);
}
}
+
+#ifdef CONFIG_MODULES
+
+/* Initialize any global-scope srcu_struct structures used by this module. */
+static int srcu_module_coming(struct module *mod)
+{
+ int i;
+ struct srcu_struct **sspp = mod->srcu_struct_ptrs;
+ int ret;
+
+ for (i = 0; i < mod->num_srcu_structs; i++) {
+ ret = init_srcu_struct(*(sspp++));
+ if (WARN_ON_ONCE(ret))
+ return ret;
+ }
+ return 0;
+}
+
+/* Clean up any global-scope srcu_struct structures used by this module. */
+static void srcu_module_going(struct module *mod)
+{
+ int i;
+ struct srcu_struct **sspp = mod->srcu_struct_ptrs;
+
+ for (i = 0; i < mod->num_srcu_structs; i++)
+ cleanup_srcu_struct(*(sspp++));
+}
+
+/* Handle one module, either coming or going. */
+static int srcu_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ int ret = 0;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ ret = srcu_module_coming(mod);
+ break;
+ case MODULE_STATE_GOING:
+ srcu_module_going(mod);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static struct notifier_block srcu_module_nb = {
+ .notifier_call = srcu_module_notify,
+ .priority = 0,
+};
+
+static __init int init_srcu_module_notifier(void)
+{
+ int ret;
+
+ ret = register_module_notifier(&srcu_module_nb);
+ if (ret)
+ pr_warn("Failed to register srcu module notifier\n");
+ return ret;
+}
+late_initcall(init_srcu_module_notifier);
+
+#endif /* #ifdef CONFIG_MODULES */
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index a8304d90573f..d4558ab7a07d 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -10,65 +10,18 @@
#include <linux/rcu_sync.h>
#include <linux/sched.h>
-#ifdef CONFIG_PROVE_RCU
-#define __INIT_HELD(func) .held = func,
-#else
-#define __INIT_HELD(func)
-#endif
-
-static const struct {
- void (*sync)(void);
- void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
- void (*wait)(void);
-#ifdef CONFIG_PROVE_RCU
- int (*held)(void);
-#endif
-} gp_ops[] = {
- [RCU_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_held)
- },
- [RCU_SCHED_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_sched_held)
- },
- [RCU_BH_SYNC] = {
- .sync = synchronize_rcu,
- .call = call_rcu,
- .wait = rcu_barrier,
- __INIT_HELD(rcu_read_lock_bh_held)
- },
-};
-
-enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
-enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
+enum { GP_IDLE = 0, GP_ENTER, GP_PASSED, GP_EXIT, GP_REPLAY };
#define rss_lock gp_wait.lock
-#ifdef CONFIG_PROVE_RCU
-void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
-{
- RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
- "suspicious rcu_sync_is_idle() usage");
-}
-
-EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
-#endif
-
/**
* rcu_sync_init() - Initialize an rcu_sync structure
* @rsp: Pointer to rcu_sync structure to be initialized
- * @type: Flavor of RCU with which to synchronize rcu_sync structure
*/
-void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
+void rcu_sync_init(struct rcu_sync *rsp)
{
memset(rsp, 0, sizeof(*rsp));
init_waitqueue_head(&rsp->gp_wait);
- rsp->gp_type = type;
}
/**
@@ -86,56 +39,26 @@ void rcu_sync_enter_start(struct rcu_sync *rsp)
rsp->gp_state = GP_PASSED;
}
-/**
- * rcu_sync_enter() - Force readers onto slowpath
- * @rsp: Pointer to rcu_sync structure to use for synchronization
- *
- * This function is used by updaters who need readers to make use of
- * a slowpath during the update. After this function returns, all
- * subsequent calls to rcu_sync_is_idle() will return false, which
- * tells readers to stay off their fastpaths. A later call to
- * rcu_sync_exit() re-enables reader slowpaths.
- *
- * When called in isolation, rcu_sync_enter() must wait for a grace
- * period, however, closely spaced calls to rcu_sync_enter() can
- * optimize away the grace-period wait via a state machine implemented
- * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
- */
-void rcu_sync_enter(struct rcu_sync *rsp)
-{
- bool need_wait, need_sync;
- spin_lock_irq(&rsp->rss_lock);
- need_wait = rsp->gp_count++;
- need_sync = rsp->gp_state == GP_IDLE;
- if (need_sync)
- rsp->gp_state = GP_PENDING;
- spin_unlock_irq(&rsp->rss_lock);
+static void rcu_sync_func(struct rcu_head *rhp);
- WARN_ON_ONCE(need_wait && need_sync);
- if (need_sync) {
- gp_ops[rsp->gp_type].sync();
- rsp->gp_state = GP_PASSED;
- wake_up_all(&rsp->gp_wait);
- } else if (need_wait) {
- wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
- } else {
- /*
- * Possible when there's a pending CB from a rcu_sync_exit().
- * Nobody has yet been allowed the 'fast' path and thus we can
- * avoid doing any sync(). The callback will get 'dropped'.
- */
- WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
- }
+static void rcu_sync_call(struct rcu_sync *rsp)
+{
+ call_rcu(&rsp->cb_head, rcu_sync_func);
}
/**
* rcu_sync_func() - Callback function managing reader access to fastpath
* @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
*
- * This function is passed to one of the call_rcu() functions by
+ * This function is passed to call_rcu() function by rcu_sync_enter() and
* rcu_sync_exit(), so that it is invoked after a grace period following the
- * that invocation of rcu_sync_exit(). It takes action based on events that
+ * that invocation of enter/exit.
+ *
+ * If it is called by rcu_sync_enter() it signals that all the readers were
+ * switched onto slow path.
+ *
+ * If it is called by rcu_sync_exit() it takes action based on events that
* have taken place in the meantime, so that closely spaced rcu_sync_enter()
* and rcu_sync_exit() pairs need not wait for a grace period.
*
@@ -152,35 +75,88 @@ static void rcu_sync_func(struct rcu_head *rhp)
struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
unsigned long flags;
- WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
- WARN_ON_ONCE(rsp->cb_state == CB_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irqsave(&rsp->rss_lock, flags);
if (rsp->gp_count) {
/*
- * A new rcu_sync_begin() has happened; drop the callback.
+ * We're at least a GP after the GP_IDLE->GP_ENTER transition.
*/
- rsp->cb_state = CB_IDLE;
- } else if (rsp->cb_state == CB_REPLAY) {
+ WRITE_ONCE(rsp->gp_state, GP_PASSED);
+ wake_up_locked(&rsp->gp_wait);
+ } else if (rsp->gp_state == GP_REPLAY) {
/*
- * A new rcu_sync_exit() has happened; requeue the callback
- * to catch a later GP.
+ * A new rcu_sync_exit() has happened; requeue the callback to
+ * catch a later GP.
*/
- rsp->cb_state = CB_PENDING;
- gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ rcu_sync_call(rsp);
} else {
/*
- * We're at least a GP after rcu_sync_exit(); eveybody will now
- * have observed the write side critical section. Let 'em rip!.
+ * We're at least a GP after the last rcu_sync_exit(); eveybody
+ * will now have observed the write side critical section.
+ * Let 'em rip!.
*/
- rsp->cb_state = CB_IDLE;
- rsp->gp_state = GP_IDLE;
+ WRITE_ONCE(rsp->gp_state, GP_IDLE);
}
spin_unlock_irqrestore(&rsp->rss_lock, flags);
}
/**
- * rcu_sync_exit() - Allow readers back onto fast patch after grace period
+ * rcu_sync_enter() - Force readers onto slowpath
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * This function is used by updaters who need readers to make use of
+ * a slowpath during the update. After this function returns, all
+ * subsequent calls to rcu_sync_is_idle() will return false, which
+ * tells readers to stay off their fastpaths. A later call to
+ * rcu_sync_exit() re-enables reader slowpaths.
+ *
+ * When called in isolation, rcu_sync_enter() must wait for a grace
+ * period, however, closely spaced calls to rcu_sync_enter() can
+ * optimize away the grace-period wait via a state machine implemented
+ * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func().
+ */
+void rcu_sync_enter(struct rcu_sync *rsp)
+{
+ int gp_state;
+
+ spin_lock_irq(&rsp->rss_lock);
+ gp_state = rsp->gp_state;
+ if (gp_state == GP_IDLE) {
+ WRITE_ONCE(rsp->gp_state, GP_ENTER);
+ WARN_ON_ONCE(rsp->gp_count);
+ /*
+ * Note that we could simply do rcu_sync_call(rsp) here and
+ * avoid the "if (gp_state == GP_IDLE)" block below.
+ *
+ * However, synchronize_rcu() can be faster if rcu_expedited
+ * or rcu_blocking_is_gp() is true.
+ *
+ * Another reason is that we can't wait for rcu callback if
+ * we are called at early boot time but this shouldn't happen.
+ */
+ }
+ rsp->gp_count++;
+ spin_unlock_irq(&rsp->rss_lock);
+
+ if (gp_state == GP_IDLE) {
+ /*
+ * See the comment above, this simply does the "synchronous"
+ * call_rcu(rcu_sync_func) which does GP_ENTER -> GP_PASSED.
+ */
+ synchronize_rcu();
+ rcu_sync_func(&rsp->cb_head);
+ /* Not really needed, wait_event() would see GP_PASSED. */
+ return;
+ }
+
+ wait_event(rsp->gp_wait, READ_ONCE(rsp->gp_state) >= GP_PASSED);
+}
+
+/**
+ * rcu_sync_exit() - Allow readers back onto fast path after grace period
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* This function is used by updaters who have completed, and can therefore
@@ -191,13 +167,16 @@ static void rcu_sync_func(struct rcu_head *rhp)
*/
void rcu_sync_exit(struct rcu_sync *rsp)
{
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
+
spin_lock_irq(&rsp->rss_lock);
if (!--rsp->gp_count) {
- if (rsp->cb_state == CB_IDLE) {
- rsp->cb_state = CB_PENDING;
- gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
- } else if (rsp->cb_state == CB_PENDING) {
- rsp->cb_state = CB_REPLAY;
+ if (rsp->gp_state == GP_PASSED) {
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ rcu_sync_call(rsp);
+ } else if (rsp->gp_state == GP_EXIT) {
+ WRITE_ONCE(rsp->gp_state, GP_REPLAY);
}
}
spin_unlock_irq(&rsp->rss_lock);
@@ -209,18 +188,19 @@ void rcu_sync_exit(struct rcu_sync *rsp)
*/
void rcu_sync_dtor(struct rcu_sync *rsp)
{
- int cb_state;
+ int gp_state;
- WARN_ON_ONCE(rsp->gp_count);
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
+ WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irq(&rsp->rss_lock);
- if (rsp->cb_state == CB_REPLAY)
- rsp->cb_state = CB_PENDING;
- cb_state = rsp->cb_state;
+ if (rsp->gp_state == GP_REPLAY)
+ WRITE_ONCE(rsp->gp_state, GP_EXIT);
+ gp_state = rsp->gp_state;
spin_unlock_irq(&rsp->rss_lock);
- if (cb_state != CB_IDLE) {
- gp_ops[rsp->gp_type].wait();
- WARN_ON_ONCE(rsp->cb_state != CB_IDLE);
+ if (gp_state != GP_IDLE) {
+ rcu_barrier();
+ WARN_ON_ONCE(rsp->gp_state != GP_IDLE);
}
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 980ca3ca643f..a14e5fbbea46 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
#include "tree.h"
#include "rcu.h"
@@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
/* Dump rcu_node combining tree at boot to verify correct setup. */
static bool dump_tree;
module_param(dump_tree, bool, 0444);
+/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
+static bool use_softirq = 1;
+module_param(use_softirq, bool, 0444);
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
module_param(rcu_fanout_exact, bool, 0444);
@@ -138,7 +147,6 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
-static void invoke_rcu_callbacks(struct rcu_data *rdp);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
@@ -368,19 +376,33 @@ static void __maybe_unused rcu_momentary_dyntick_idle(void)
}
/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ * rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
*
- * If the current CPU is idle or running at a first-level (not nested)
+ * If the current CPU is idle and running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
- return __this_cpu_read(rcu_data.dynticks_nesting) <= 0 &&
- __this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 1;
+ /* Called only from within the scheduling-clock interrupt */
+ lockdep_assert_in_irq();
+
+ /* Check for counter underflows */
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
+ "RCU dynticks_nesting counter underflow!");
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
+ "RCU dynticks_nmi_nesting counter underflow/zero!");
+
+ /* Are we at first interrupt nesting level? */
+ if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
+ return false;
+
+ /* Does CPU appear to be idle from an RCU standpoint? */
+ return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
-#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */
+#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
+#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
static long qhimark = DEFAULT_RCU_QHIMARK;
@@ -2113,7 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
/* Reinstate batch limit if we have worked down the excess. */
count = rcu_segcblist_n_cbs(&rdp->cblist);
- if (rdp->blimit == LONG_MAX && count <= qlowmark)
+ if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
rdp->blimit = blimit;
/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
@@ -2253,7 +2275,7 @@ void rcu_force_quiescent_state(void)
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/* Perform RCU core processing work for the current CPU. */
-static __latent_entropy void rcu_core(struct softirq_action *unused)
+static __latent_entropy void rcu_core(void)
{
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2287,37 +2309,126 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
/* If there are callbacks ready, invoke them. */
- if (rcu_segcblist_ready_cbs(&rdp->cblist))
- invoke_rcu_callbacks(rdp);
+ if (rcu_segcblist_ready_cbs(&rdp->cblist) &&
+ likely(READ_ONCE(rcu_scheduler_fully_active)))
+ rcu_do_batch(rdp);
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
}
+static void rcu_core_si(struct softirq_action *h)
+{
+ rcu_core();
+}
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+ /*
+ * If the thread is yielding, only wake it when this
+ * is invoked from idle
+ */
+ if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+ wake_up_process(t);
+}
+
+static void invoke_rcu_core_kthread(void)
+{
+ struct task_struct *t;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+ t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+ if (t != NULL && t != current)
+ rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+ local_irq_restore(flags);
+}
+
/*
- * Schedule RCU callback invocation. If the running implementation of RCU
- * does not support RCU priority boosting, just do a direct call, otherwise
- * wake up the per-CPU kernel kthread. Note that because we are running
- * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
*/
-static void invoke_rcu_callbacks(struct rcu_data *rdp)
+static void invoke_rcu_core(void)
{
- if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
- return;
- if (likely(!rcu_state.boost)) {
- rcu_do_batch(rdp);
+ if (!cpu_online(smp_processor_id()))
return;
+ if (use_softirq)
+ raise_softirq(RCU_SOFTIRQ);
+ else
+ invoke_rcu_core_kthread();
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+ per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+ return __this_cpu_read(rcu_data.rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks. This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+ unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
+ char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+ int spincnt;
+
+ for (spincnt = 0; spincnt < 10; spincnt++) {
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+ local_bh_disable();
+ *statusp = RCU_KTHREAD_RUNNING;
+ local_irq_disable();
+ work = *workp;
+ *workp = 0;
+ local_irq_enable();
+ if (work)
+ rcu_core();
+ local_bh_enable();
+ if (*workp == 0) {
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+ *statusp = RCU_KTHREAD_WAITING;
+ return;
+ }
}
- invoke_rcu_callbacks_kthread();
+ *statusp = RCU_KTHREAD_YIELDING;
+ trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+ schedule_timeout_interruptible(2);
+ trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+ *statusp = RCU_KTHREAD_WAITING;
}
-static void invoke_rcu_core(void)
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+ .store = &rcu_data.rcu_cpu_kthread_task,
+ .thread_should_run = rcu_cpu_kthread_should_run,
+ .thread_fn = rcu_cpu_kthread,
+ .thread_comm = "rcuc/%u",
+ .setup = rcu_cpu_kthread_setup,
+ .park = rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
{
- if (cpu_online(smp_processor_id()))
- raise_softirq(RCU_SOFTIRQ);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
+ if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+ return 0;
+ WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
+ "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
+ return 0;
}
+early_initcall(rcu_spawn_core_kthreads);
/*
* Handle any core-RCU processing required by a call_rcu() invocation.
@@ -2354,7 +2465,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
} else {
/* Give the grace period a kick. */
- rdp->blimit = LONG_MAX;
+ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
rcu_force_quiescent_state();
@@ -3355,7 +3466,8 @@ void __init rcu_init(void)
rcu_init_one();
if (dump_tree)
rcu_dump_rcu_node_tree();
- open_softirq(RCU_SOFTIRQ, rcu_core);
+ if (use_softirq)
+ open_softirq(RCU_SOFTIRQ, rcu_core_si);
/*
* We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c4..7acaf3a62d39 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -154,13 +154,15 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
- bool deferred_qs; /* This CPU awaiting a deferred QS? */
+ bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
unsigned long ticks_this_gp; /* The number of scheduling-clock */
/* ticks this CPU has handled */
/* during and after the last grace */
/* period it is aware of. */
+ struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
+ bool defer_qs_iw_pending; /* Scheduler attention pending? */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
@@ -407,8 +409,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 9c990df880d1..af7e7b9c86af 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -250,7 +250,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
*/
static void rcu_report_exp_rdp(struct rcu_data *rdp)
{
- WRITE_ONCE(rdp->deferred_qs, false);
+ WRITE_ONCE(rdp->exp_deferred_qs, false);
rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
}
@@ -259,8 +259,7 @@ static bool sync_exp_work_done(unsigned long s)
{
if (rcu_exp_gp_seq_done(s)) {
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
- /* Ensure test happens before caller kfree(). */
- smp_mb__before_atomic(); /* ^^^ */
+ smp_mb(); /* Ensure test happens before caller kfree(). */
return true;
}
return false;
@@ -384,7 +383,12 @@ retry_ipi:
mask_ofl_test |= mask;
continue;
}
+ if (get_cpu() == cpu) {
+ put_cpu();
+ continue;
+ }
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
+ put_cpu();
if (!ret) {
mask_ofl_ipi &= ~mask;
continue;
@@ -611,7 +615,7 @@ static void rcu_exp_handler(void *unused)
rcu_dynticks_curr_cpu_in_eqs()) {
rcu_report_exp_rdp(rdp);
} else {
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
set_tsk_need_resched(t);
set_preempt_need_resched();
}
@@ -633,7 +637,7 @@ static void rcu_exp_handler(void *unused)
if (t->rcu_read_lock_nesting > 0) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
t->rcu_read_unlock_special.b.exp_hint = true;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -656,7 +660,7 @@ static void rcu_exp_handler(void *unused)
*
* Otherwise, force a context switch after the CPU enables everything.
*/
- rdp->deferred_qs = true;
+ rdp->exp_deferred_qs = true;
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
rcu_preempt_deferred_qs(t);
@@ -694,6 +698,16 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
#else /* #ifdef CONFIG_PREEMPT_RCU */
+/* Request an expedited quiescent state. */
+static void rcu_exp_need_qs(void)
+{
+ __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
+ /* Store .exp before .rcu_urgent_qs. */
+ smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
+ set_tsk_need_resched(current);
+ set_preempt_need_resched();
+}
+
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
@@ -709,25 +723,38 @@ static void rcu_exp_handler(void *unused)
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
return;
}
- __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
- /* Store .exp before .rcu_urgent_qs. */
- smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
- set_tsk_need_resched(current);
- set_preempt_need_resched();
+ rcu_exp_need_qs();
}
/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
+ unsigned long flags;
+ int my_cpu;
struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
rdp = per_cpu_ptr(&rcu_data, cpu);
rnp = rdp->mynode;
- if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+ my_cpu = get_cpu();
+ /* Quiescent state either not needed or already requested, leave. */
+ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+ __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
+ put_cpu();
+ return;
+ }
+ /* Quiescent state needed on current CPU, so set it up locally. */
+ if (my_cpu == cpu) {
+ local_irq_save(flags);
+ rcu_exp_need_qs();
+ local_irq_restore(flags);
+ put_cpu();
return;
+ }
+ /* Quiescent state needed on some other CPU, send IPI. */
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
+ put_cpu();
WARN_ON_ONCE(ret);
}
@@ -765,7 +792,6 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
- struct rcu_data *rdp;
struct rcu_exp_work rew;
struct rcu_node *rnp;
unsigned long s;
@@ -802,7 +828,6 @@ void synchronize_rcu_expedited(void)
}
/* Wait for expedited grace period to complete. */
- rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 1102765f91fd..acb225023ed1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
* Paul E. McKenney <paulmck@linux.ibm.com>
*/
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
#include "../locking/rtmutex_common.h"
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code. Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+ if (!use_softirq)
+ pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
pr_info("\tRCU debug extended QS entry/exit.\n");
rcupdate_announce_bootup_oddness();
@@ -257,10 +237,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* no need to check for a subsequent expedited GP. (Though we are
* still in a quiescent state in any case.)
*/
- if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
+ if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
else
- WARN_ON_ONCE(rdp->deferred_qs);
+ WARN_ON_ONCE(rdp->exp_deferred_qs);
}
/*
@@ -357,7 +337,7 @@ void rcu_note_context_switch(bool preempt)
* means that we continue to block the current grace period.
*/
rcu_qs();
- if (rdp->deferred_qs)
+ if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
@@ -471,14 +451,15 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
special = t->rcu_read_unlock_special;
rdp = this_cpu_ptr(&rcu_data);
- if (!special.s && !rdp->deferred_qs) {
+ if (!special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
+ t->rcu_read_unlock_special.b.deferred_qs = false;
if (special.b.need_qs) {
rcu_qs();
t->rcu_read_unlock_special.b.need_qs = false;
- if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
+ if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
@@ -490,7 +471,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
- if (rdp->deferred_qs) {
+ if (rdp->exp_deferred_qs) {
rcu_report_exp_rdp(rdp);
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
@@ -579,7 +560,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
- return (__this_cpu_read(rcu_data.deferred_qs) ||
+ return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
t->rcu_read_lock_nesting <= 0;
}
@@ -607,6 +588,17 @@ static void rcu_preempt_deferred_qs(struct task_struct *t)
}
/*
+ * Minimal handler to give the scheduler a chance to re-evaluate.
+ */
+static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
+{
+ struct rcu_data *rdp;
+
+ rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
+ rdp->defer_qs_iw_pending = false;
+}
+
+/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
@@ -625,16 +617,41 @@ static void rcu_read_unlock_special(struct task_struct *t)
local_irq_save(flags);
irqs_were_disabled = irqs_disabled_flags(flags);
if (preempt_bh_were_disabled || irqs_were_disabled) {
- WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
- /* Need to defer quiescent state until everything is enabled. */
- if (irqs_were_disabled) {
- /* Enabling irqs does not reschedule, so... */
+ bool exp;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ struct rcu_node *rnp = rdp->mynode;
+
+ t->rcu_read_unlock_special.b.exp_hint = false;
+ exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
+ (rdp->grpmask & rnp->expmask) ||
+ tick_nohz_full_cpu(rdp->cpu);
+ // Need to defer quiescent state until everything is enabled.
+ if ((exp || in_irq()) && irqs_were_disabled && use_softirq &&
+ (in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) {
+ // Using softirq, safe to awaken, and we get
+ // no help from enabling irqs, unlike bh/preempt.
raise_softirq_irqoff(RCU_SOFTIRQ);
+ } else if (exp && irqs_were_disabled && !use_softirq &&
+ !t->rcu_read_unlock_special.b.deferred_qs) {
+ // Safe to awaken and we get no help from enabling
+ // irqs, unlike bh/preempt.
+ invoke_rcu_core();
} else {
- /* Enabling BH or preempt does reschedule, so... */
+ // Enabling BH or preempt does reschedule, so...
+ // Also if no expediting or NO_HZ_FULL, slow is OK.
set_tsk_need_resched(current);
set_preempt_need_resched();
+ if (IS_ENABLED(CONFIG_IRQ_WORK) &&
+ !rdp->defer_qs_iw_pending && exp) {
+ // Get scheduler to re-evaluate and call hooks.
+ // If !IRQ_WORK, FQS scan will eventually IPI.
+ init_irq_work(&rdp->defer_qs_iw,
+ rcu_preempt_deferred_qs_handler);
+ rdp->defer_qs_iw_pending = true;
+ irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
+ }
}
+ t->rcu_read_unlock_special.b.deferred_qs = true;
local_irq_restore(flags);
return;
}
@@ -760,7 +777,7 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
pr_cont(" %p", lhp);
- if (++i >= 10)
+ if (++i >= ncheck)
break;
}
pr_cont("\n");
@@ -944,18 +961,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
#ifdef CONFIG_RCU_BOOST
+ struct sched_param sp;
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
- /*
- * If the thread is yielding, only wake it when this
- * is invoked from idle
- */
- if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
- wake_up_process(t);
+ sp.sched_priority = kthread_prio;
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
}
+#ifdef CONFIG_RCU_BOOST
+
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@@ -1091,23 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
}
/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
- if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
- current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
- rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
- __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
- }
- local_irq_restore(flags);
-}
-
-/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
*/
@@ -1160,59 +1163,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
return 0;
}
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
- struct sched_param sp;
-
- sp.sched_priority = kthread_prio;
- sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
- per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
- return __this_cpu_read(rcu_data.rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks. This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
- unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
- char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
- int spincnt;
-
- for (spincnt = 0; spincnt < 10; spincnt++) {
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
- local_bh_disable();
- *statusp = RCU_KTHREAD_RUNNING;
- local_irq_disable();
- work = *workp;
- *workp = 0;
- local_irq_enable();
- if (work)
- rcu_do_batch(this_cpu_ptr(&rcu_data));
- local_bh_enable();
- if (*workp == 0) {
- trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
- *statusp = RCU_KTHREAD_WAITING;
- return;
- }
- }
- *statusp = RCU_KTHREAD_YIELDING;
- trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
- schedule_timeout_interruptible(2);
- trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
- *statusp = RCU_KTHREAD_WAITING;
-}
-
/*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
@@ -1243,27 +1193,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
free_cpumask_var(cm);
}
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
- .store = &rcu_data.rcu_cpu_kthread_task,
- .thread_should_run = rcu_cpu_kthread_should_run,
- .thread_fn = rcu_cpu_kthread,
- .thread_comm = "rcuc/%u",
- .setup = rcu_cpu_kthread_setup,
- .park = rcu_cpu_kthread_park,
-};
-
/*
* Spawn boost kthreads -- called as soon as the scheduler is running.
*/
static void __init rcu_spawn_boost_kthreads(void)
{
struct rcu_node *rnp;
- int cpu;
- for_each_possible_cpu(cpu)
- per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
- if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
- return;
rcu_for_each_leaf_node(rnp)
(void)rcu_spawn_one_boost_kthread(rnp);
}
@@ -1286,11 +1222,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
-static void invoke_rcu_callbacks_kthread(void)
-{
- WARN_ON_ONCE(1);
-}
-
static bool rcu_is_callbacks_kthread(void)
{
return false;
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index f65a73a97323..065183391f75 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -630,7 +630,9 @@ static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
time_before(j, rcu_state.gp_activity + gpssdelay) ||
atomic_xchg(&warned, 1)) {
- raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+ if (rnp_root != rnp)
+ /* irqs remain disabled. */
+ raw_spin_unlock_rcu_node(rnp_root);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c3bf44ba42e5..61df2bf08563 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -423,6 +423,19 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
do { } while (0)
#endif
+#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
+/* Get rcutorture access to sched_setaffinity(). */
+long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(pid, in_mask);
+ WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
+#endif
+
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 9424ee90589e..27c48eb7de40 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -277,7 +277,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
error:
sig = ksig ? ksig->sig : 0;
- force_sigsegv(sig, t);
+ force_sigsegv(sig);
}
#ifdef CONFIG_DEBUG_RSEQ
@@ -296,7 +296,7 @@ void rseq_syscall(struct pt_regs *regs)
return;
if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
- force_sig(SIGSEGV, t);
+ force_sig(SIGSEGV);
}
#endif
diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c
index 2d4ff5353ded..2067080bb235 100644
--- a/kernel/sched/autogroup.c
+++ b/kernel/sched/autogroup.c
@@ -259,7 +259,6 @@ out:
}
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_SCHED_DEBUG
int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
if (!task_group_is_autogroup(tg))
@@ -267,4 +266,3 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
}
-#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 874c427742a9..fa43ce3962e7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -23,6 +23,17 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
+/*
+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
+ * associated with them) to allow external modules to probe them.
+ */
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_cfs_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_rt_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_dl_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_se_tp);
+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp);
+
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
@@ -761,6 +772,401 @@ static void set_load_weight(struct task_struct *p, bool update_load)
}
}
+#ifdef CONFIG_UCLAMP_TASK
+/* Max allowed minimum utilization */
+unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
+
+/* Max allowed maximum utilization */
+unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
+
+/* All clamps are required to be less or equal than these values */
+static struct uclamp_se uclamp_default[UCLAMP_CNT];
+
+/* Integer rounded range for each bucket */
+#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
+
+#define for_each_clamp_id(clamp_id) \
+ for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
+
+static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
+{
+ return clamp_value / UCLAMP_BUCKET_DELTA;
+}
+
+static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
+{
+ return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
+}
+
+static inline unsigned int uclamp_none(int clamp_id)
+{
+ if (clamp_id == UCLAMP_MIN)
+ return 0;
+ return SCHED_CAPACITY_SCALE;
+}
+
+static inline void uclamp_se_set(struct uclamp_se *uc_se,
+ unsigned int value, bool user_defined)
+{
+ uc_se->value = value;
+ uc_se->bucket_id = uclamp_bucket_id(value);
+ uc_se->user_defined = user_defined;
+}
+
+static inline unsigned int
+uclamp_idle_value(struct rq *rq, unsigned int clamp_id,
+ unsigned int clamp_value)
+{
+ /*
+ * Avoid blocked utilization pushing up the frequency when we go
+ * idle (which drops the max-clamp) by retaining the last known
+ * max-clamp.
+ */
+ if (clamp_id == UCLAMP_MAX) {
+ rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
+ return clamp_value;
+ }
+
+ return uclamp_none(UCLAMP_MIN);
+}
+
+static inline void uclamp_idle_reset(struct rq *rq, unsigned int clamp_id,
+ unsigned int clamp_value)
+{
+ /* Reset max-clamp retention only on idle exit */
+ if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
+ return;
+
+ WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
+}
+
+static inline
+unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id,
+ unsigned int clamp_value)
+{
+ struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
+ int bucket_id = UCLAMP_BUCKETS - 1;
+
+ /*
+ * Since both min and max clamps are max aggregated, find the
+ * top most bucket with tasks in.
+ */
+ for ( ; bucket_id >= 0; bucket_id--) {
+ if (!bucket[bucket_id].tasks)
+ continue;
+ return bucket[bucket_id].value;
+ }
+
+ /* No tasks -- default clamp values */
+ return uclamp_idle_value(rq, clamp_id, clamp_value);
+}
+
+/*
+ * The effective clamp bucket index of a task depends on, by increasing
+ * priority:
+ * - the task specific clamp value, when explicitly requested from userspace
+ * - the system default clamp value, defined by the sysadmin
+ */
+static inline struct uclamp_se
+uclamp_eff_get(struct task_struct *p, unsigned int clamp_id)
+{
+ struct uclamp_se uc_req = p->uclamp_req[clamp_id];
+ struct uclamp_se uc_max = uclamp_default[clamp_id];
+
+ /* System default restrictions always apply */
+ if (unlikely(uc_req.value > uc_max.value))
+ return uc_max;
+
+ return uc_req;
+}
+
+unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id)
+{
+ struct uclamp_se uc_eff;
+
+ /* Task currently refcounted: use back-annotated (effective) value */
+ if (p->uclamp[clamp_id].active)
+ return p->uclamp[clamp_id].value;
+
+ uc_eff = uclamp_eff_get(p, clamp_id);
+
+ return uc_eff.value;
+}
+
+/*
+ * When a task is enqueued on a rq, the clamp bucket currently defined by the
+ * task's uclamp::bucket_id is refcounted on that rq. This also immediately
+ * updates the rq's clamp value if required.
+ *
+ * Tasks can have a task-specific value requested from user-space, track
+ * within each bucket the maximum value for tasks refcounted in it.
+ * This "local max aggregation" allows to track the exact "requested" value
+ * for each bucket when all its RUNNABLE tasks require the same clamp.
+ */
+static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
+ unsigned int clamp_id)
+{
+ struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
+ struct uclamp_se *uc_se = &p->uclamp[clamp_id];
+ struct uclamp_bucket *bucket;
+
+ lockdep_assert_held(&rq->lock);
+
+ /* Update task effective clamp */
+ p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id);
+
+ bucket = &uc_rq->bucket[uc_se->bucket_id];
+ bucket->tasks++;
+ uc_se->active = true;
+
+ uclamp_idle_reset(rq, clamp_id, uc_se->value);
+
+ /*
+ * Local max aggregation: rq buckets always track the max
+ * "requested" clamp value of its RUNNABLE tasks.
+ */
+ if (bucket->tasks == 1 || uc_se->value > bucket->value)
+ bucket->value = uc_se->value;
+
+ if (uc_se->value > READ_ONCE(uc_rq->value))
+ WRITE_ONCE(uc_rq->value, uc_se->value);
+}
+
+/*
+ * When a task is dequeued from a rq, the clamp bucket refcounted by the task
+ * is released. If this is the last task reference counting the rq's max
+ * active clamp value, then the rq's clamp value is updated.
+ *
+ * Both refcounted tasks and rq's cached clamp values are expected to be
+ * always valid. If it's detected they are not, as defensive programming,
+ * enforce the expected state and warn.
+ */
+static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
+ unsigned int clamp_id)
+{
+ struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
+ struct uclamp_se *uc_se = &p->uclamp[clamp_id];
+ struct uclamp_bucket *bucket;
+ unsigned int bkt_clamp;
+ unsigned int rq_clamp;
+
+ lockdep_assert_held(&rq->lock);
+
+ bucket = &uc_rq->bucket[uc_se->bucket_id];
+ SCHED_WARN_ON(!bucket->tasks);
+ if (likely(bucket->tasks))
+ bucket->tasks--;
+ uc_se->active = false;
+
+ /*
+ * Keep "local max aggregation" simple and accept to (possibly)
+ * overboost some RUNNABLE tasks in the same bucket.
+ * The rq clamp bucket value is reset to its base value whenever
+ * there are no more RUNNABLE tasks refcounting it.
+ */
+ if (likely(bucket->tasks))
+ return;
+
+ rq_clamp = READ_ONCE(uc_rq->value);
+ /*
+ * Defensive programming: this should never happen. If it happens,
+ * e.g. due to future modification, warn and fixup the expected value.
+ */
+ SCHED_WARN_ON(bucket->value > rq_clamp);
+ if (bucket->value >= rq_clamp) {
+ bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
+ WRITE_ONCE(uc_rq->value, bkt_clamp);
+ }
+}
+
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
+{
+ unsigned int clamp_id;
+
+ if (unlikely(!p->sched_class->uclamp_enabled))
+ return;
+
+ for_each_clamp_id(clamp_id)
+ uclamp_rq_inc_id(rq, p, clamp_id);
+
+ /* Reset clamp idle holding when there is one RUNNABLE task */
+ if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
+ rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
+}
+
+static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
+{
+ unsigned int clamp_id;
+
+ if (unlikely(!p->sched_class->uclamp_enabled))
+ return;
+
+ for_each_clamp_id(clamp_id)
+ uclamp_rq_dec_id(rq, p, clamp_id);
+}
+
+int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int old_min, old_max;
+ static DEFINE_MUTEX(mutex);
+ int result;
+
+ mutex_lock(&mutex);
+ old_min = sysctl_sched_uclamp_util_min;
+ old_max = sysctl_sched_uclamp_util_max;
+
+ result = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (result)
+ goto undo;
+ if (!write)
+ goto done;
+
+ if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
+ sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
+ result = -EINVAL;
+ goto undo;
+ }
+
+ if (old_min != sysctl_sched_uclamp_util_min) {
+ uclamp_se_set(&uclamp_default[UCLAMP_MIN],
+ sysctl_sched_uclamp_util_min, false);
+ }
+ if (old_max != sysctl_sched_uclamp_util_max) {
+ uclamp_se_set(&uclamp_default[UCLAMP_MAX],
+ sysctl_sched_uclamp_util_max, false);
+ }
+
+ /*
+ * Updating all the RUNNABLE task is expensive, keep it simple and do
+ * just a lazy update at each next enqueue time.
+ */
+ goto done;
+
+undo:
+ sysctl_sched_uclamp_util_min = old_min;
+ sysctl_sched_uclamp_util_max = old_max;
+done:
+ mutex_unlock(&mutex);
+
+ return result;
+}
+
+static int uclamp_validate(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;
+ unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN)
+ lower_bound = attr->sched_util_min;
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX)
+ upper_bound = attr->sched_util_max;
+
+ if (lower_bound > upper_bound)
+ return -EINVAL;
+ if (upper_bound > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __setscheduler_uclamp(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ unsigned int clamp_id;
+
+ /*
+ * On scheduling class change, reset to default clamps for tasks
+ * without a task-specific value.
+ */
+ for_each_clamp_id(clamp_id) {
+ struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
+ unsigned int clamp_value = uclamp_none(clamp_id);
+
+ /* Keep using defined clamps across class changes */
+ if (uc_se->user_defined)
+ continue;
+
+ /* By default, RT tasks always get 100% boost */
+ if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
+ clamp_value = uclamp_none(UCLAMP_MAX);
+
+ uclamp_se_set(uc_se, clamp_value, false);
+ }
+
+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
+ return;
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
+ uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
+ attr->sched_util_min, true);
+ }
+
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
+ uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
+ attr->sched_util_max, true);
+ }
+}
+
+static void uclamp_fork(struct task_struct *p)
+{
+ unsigned int clamp_id;
+
+ for_each_clamp_id(clamp_id)
+ p->uclamp[clamp_id].active = false;
+
+ if (likely(!p->sched_reset_on_fork))
+ return;
+
+ for_each_clamp_id(clamp_id) {
+ unsigned int clamp_value = uclamp_none(clamp_id);
+
+ /* By default, RT tasks always get 100% boost */
+ if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
+ clamp_value = uclamp_none(UCLAMP_MAX);
+
+ uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
+ }
+}
+
+static void __init init_uclamp(void)
+{
+ struct uclamp_se uc_max = {};
+ unsigned int clamp_id;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq));
+ cpu_rq(cpu)->uclamp_flags = 0;
+ }
+
+ for_each_clamp_id(clamp_id) {
+ uclamp_se_set(&init_task.uclamp_req[clamp_id],
+ uclamp_none(clamp_id), false);
+ }
+
+ /* System defaults allow max clamp values for both indexes */
+ uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
+ for_each_clamp_id(clamp_id)
+ uclamp_default[clamp_id] = uc_max;
+}
+
+#else /* CONFIG_UCLAMP_TASK */
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
+static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
+static inline int uclamp_validate(struct task_struct *p,
+ const struct sched_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+static void __setscheduler_uclamp(struct task_struct *p,
+ const struct sched_attr *attr) { }
+static inline void uclamp_fork(struct task_struct *p) { }
+static inline void init_uclamp(void) { }
+#endif /* CONFIG_UCLAMP_TASK */
+
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (!(flags & ENQUEUE_NOCLOCK))
@@ -771,6 +1177,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, flags & ENQUEUE_WAKEUP);
}
+ uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
}
@@ -784,6 +1191,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP);
}
+ uclamp_rq_dec(rq, p);
p->sched_class->dequeue_task(rq, p, flags);
}
@@ -930,7 +1338,7 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
*/
static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
{
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
return false;
if (is_per_cpu_kthread(p))
@@ -1025,7 +1433,7 @@ static int migration_cpu_stop(void *data)
local_irq_disable();
/*
* We need to explicitly wake pending tasks before running
- * __migrate_task() such that we will not miss enforcing cpus_allowed
+ * __migrate_task() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
sched_ttwu_pending();
@@ -1056,7 +1464,7 @@ static int migration_cpu_stop(void *data)
*/
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
{
- cpumask_copy(&p->cpus_allowed, new_mask);
+ cpumask_copy(&p->cpus_mask, new_mask);
p->nr_cpus_allowed = cpumask_weight(new_mask);
}
@@ -1126,7 +1534,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}
- if (cpumask_equal(&p->cpus_allowed, new_mask))
+ if (cpumask_equal(p->cpus_ptr, new_mask))
goto out;
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
@@ -1286,10 +1694,10 @@ static int migrate_swap_stop(void *data)
if (task_cpu(arg->src_task) != arg->src_cpu)
goto unlock;
- if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
goto unlock;
- if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
goto unlock;
__migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1331,10 +1739,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
goto out;
- if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr))
goto out;
- if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
+ if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr))
goto out;
trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1479,7 +1887,7 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);
/*
- * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
*
* A few notes on cpu_active vs cpu_online:
*
@@ -1519,14 +1927,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
for_each_cpu(dest_cpu, nodemask) {
if (!cpu_active(dest_cpu))
continue;
- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ if (cpumask_test_cpu(dest_cpu, p->cpus_ptr))
return dest_cpu;
}
}
for (;;) {
/* Any allowed, online CPU? */
- for_each_cpu(dest_cpu, &p->cpus_allowed) {
+ for_each_cpu(dest_cpu, p->cpus_ptr) {
if (!is_cpu_allowed(p, dest_cpu))
continue;
@@ -1570,7 +1978,7 @@ out:
}
/*
- * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable.
*/
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
@@ -1580,11 +1988,11 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
else
- cpu = cpumask_any(&p->cpus_allowed);
+ cpu = cpumask_any(p->cpus_ptr);
/*
* In order not to call set_task_cpu() on a blocking task we need
- * to rely on ttwu() to place the task on a valid ->cpus_allowed
+ * to rely on ttwu() to place the task on a valid ->cpus_ptr
* CPU.
*
* Since this is common to all placement strategies, this lives here.
@@ -1991,6 +2399,29 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
unsigned long flags;
int cpu, success = 0;
+ if (p == current) {
+ /*
+ * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
+ * == smp_processor_id()'. Together this means we can special
+ * case the whole 'p->on_rq && ttwu_remote()' case below
+ * without taking any locks.
+ *
+ * In particular:
+ * - we rely on Program-Order guarantees for all the ordering,
+ * - we're serialized against set_special_state() by virtue of
+ * it disabling IRQs (this allows not taking ->pi_lock).
+ */
+ if (!(p->state & state))
+ return false;
+
+ success = 1;
+ cpu = task_cpu(p);
+ trace_sched_waking(p);
+ p->state = TASK_RUNNING;
+ trace_sched_wakeup(p);
+ goto out;
+ }
+
/*
* If we are going to wake up a thread waiting for CONDITION we
* need to ensure that CONDITION=1 done by the caller can not be
@@ -2000,7 +2431,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
raw_spin_lock_irqsave(&p->pi_lock, flags);
smp_mb__after_spinlock();
if (!(p->state & state))
- goto out;
+ goto unlock;
trace_sched_waking(p);
@@ -2030,7 +2461,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*/
smp_rmb();
if (p->on_rq && ttwu_remote(p, wake_flags))
- goto stat;
+ goto unlock;
#ifdef CONFIG_SMP
/*
@@ -2090,10 +2521,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
#endif /* CONFIG_SMP */
ttwu_queue(p, cpu, wake_flags);
-stat:
- ttwu_stat(p, cpu, wake_flags);
-out:
+unlock:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+out:
+ if (success)
+ ttwu_stat(p, cpu, wake_flags);
return success;
}
@@ -2300,6 +2732,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
*/
p->prio = current->normal_prio;
+ uclamp_fork(p);
+
/*
* Revert to default priority/policy on fork if requested.
*/
@@ -2395,7 +2829,7 @@ void wake_up_new_task(struct task_struct *p)
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
- * - cpus_allowed can change in the fork path
+ * - cpus_ptr can change in the fork path
* - any previously selected CPU might disappear through hotplug
*
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
@@ -3033,7 +3467,6 @@ void scheduler_tick(void)
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
- cpu_load_update_active(rq);
calc_global_load_tick(rq);
psi_task_tick(rq);
@@ -4071,6 +4504,13 @@ static void __setscheduler_params(struct task_struct *p,
static void __setscheduler(struct rq *rq, struct task_struct *p,
const struct sched_attr *attr, bool keep_boost)
{
+ /*
+ * If params can't change scheduling class changes aren't allowed
+ * either.
+ */
+ if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
+ return;
+
__setscheduler_params(p, attr);
/*
@@ -4208,6 +4648,13 @@ recheck:
return retval;
}
+ /* Update task specific "requested" clamps */
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
+ retval = uclamp_validate(p, attr);
+ if (retval)
+ return retval;
+ }
+
/*
* Make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
@@ -4237,6 +4684,8 @@ recheck:
goto change;
if (dl_policy(policy) && dl_param_changed(p, attr))
goto change;
+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
+ goto change;
p->sched_reset_on_fork = reset_on_fork;
task_rq_unlock(rq, p, &rf);
@@ -4267,7 +4716,7 @@ change:
* the entire root_domain to become SCHED_DEADLINE. We
* will also fail if there's no bandwidth available.
*/
- if (!cpumask_subset(span, &p->cpus_allowed) ||
+ if (!cpumask_subset(span, p->cpus_ptr) ||
rq->rd->dl_bw.bw == 0) {
task_rq_unlock(rq, p, &rf);
return -EPERM;
@@ -4317,7 +4766,9 @@ change:
put_prev_task(rq, p);
prev_class = p->sched_class;
+
__setscheduler(rq, p, attr, pi);
+ __setscheduler_uclamp(p, attr);
if (queued) {
/*
@@ -4493,6 +4944,10 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
if (ret)
return -EFAULT;
+ if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) &&
+ size < SCHED_ATTR_SIZE_VER1)
+ return -EINVAL;
+
/*
* XXX: Do we want to be lenient like existing syscalls; or do we want
* to be strict and return an error on out-of-bounds values?
@@ -4556,14 +5011,21 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
if ((int)attr.sched_policy < 0)
return -EINVAL;
+ if (attr.sched_flags & SCHED_FLAG_KEEP_POLICY)
+ attr.sched_policy = SETPARAM_POLICY;
rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
- if (p != NULL)
- retval = sched_setattr(p, &attr);
+ if (likely(p))
+ get_task_struct(p);
rcu_read_unlock();
+ if (likely(p)) {
+ retval = sched_setattr(p, &attr);
+ put_task_struct(p);
+ }
+
return retval;
}
@@ -4714,6 +5176,11 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
else
attr.sched_nice = task_nice(p);
+#ifdef CONFIG_UCLAMP_TASK
+ attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
+ attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
+#endif
+
rcu_read_unlock();
retval = sched_read_attr(uattr, &attr, size);
@@ -4866,7 +5333,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
goto out_unlock;
raw_spin_lock_irqsave(&p->pi_lock, flags);
- cpumask_and(mask, &p->cpus_allowed, cpu_active_mask);
+ cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out_unlock:
@@ -5123,7 +5590,7 @@ long __sched io_schedule_timeout(long timeout)
}
EXPORT_SYMBOL(io_schedule_timeout);
-void io_schedule(void)
+void __sched io_schedule(void)
{
int token;
@@ -5443,7 +5910,7 @@ int task_can_attach(struct task_struct *p,
* allowed nodes is unnecessary. Thus, cpusets are not
* applicable for such threads. This prevents checking for
* success of set_cpus_allowed_ptr() on all attached tasks
- * before cpus_allowed may be changed.
+ * before cpus_mask may be changed.
*/
if (p->flags & PF_NO_SETAFFINITY) {
ret = -EINVAL;
@@ -5470,7 +5937,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (curr_cpu == target_cpu)
return 0;
- if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;
/* TODO: This is not properly updating schedstats */
@@ -5608,7 +6075,7 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
put_prev_task(rq, next);
/*
- * Rules for changing task_struct::cpus_allowed are holding
+ * Rules for changing task_struct::cpus_mask are holding
* both pi_lock and rq->lock, such that holding either
* stabilizes the mask.
*
@@ -5902,8 +6369,8 @@ DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
void __init sched_init(void)
{
- int i, j;
unsigned long alloc_size = 0, ptr;
+ int i;
wait_bit_init();
@@ -6005,10 +6472,6 @@ void __init sched_init(void)
#ifdef CONFIG_RT_GROUP_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
-
- for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
- rq->cpu_load[j] = 0;
-
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
@@ -6063,6 +6526,8 @@ void __init sched_init(void)
psi_init();
+ init_uclamp();
+
scheduler_running = 1;
}
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index ec4e4a9aab5f..5cc4012572ec 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -120,14 +120,14 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask &&
- cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+ cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) {
return 1;
} else {
int best_cpu = cpudl_maximum(cp);
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
- if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
+ if (cpumask_test_cpu(best_cpu, p->cpus_ptr) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
if (later_mask)
cpumask_set_cpu(best_cpu, later_mask);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 962cf343f798..636ca6f88c8e 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -196,14 +196,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
* based on the task model parameters and gives the minimal utilization
* required to meet deadlines.
*/
-unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
- unsigned long max, enum schedutil_type type)
+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+ unsigned long max, enum schedutil_type type,
+ struct task_struct *p)
{
unsigned long dl_util, util, irq;
struct rq *rq = cpu_rq(cpu);
- if (type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt))
+ if (!IS_BUILTIN(CONFIG_UCLAMP_TASK) &&
+ type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
return max;
+ }
/*
* Early check to see if IRQ/steal time saturates the CPU, can be
@@ -219,9 +222,16 @@ unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
* CFS tasks and we use the same metric to track the effective
* utilization (PELT windows are synchronized) we can directly add them
* to obtain the CPU's actual utilization.
+ *
+ * CFS and RT utilization can be boosted or capped, depending on
+ * utilization clamp constraints requested by currently RUNNABLE
+ * tasks.
+ * When there are no CFS RUNNABLE tasks, clamps are released and
+ * frequency will be gracefully reduced with the utilization decay.
*/
- util = util_cfs;
- util += cpu_util_rt(rq);
+ util = util_cfs + cpu_util_rt(rq);
+ if (type == FREQUENCY_UTIL)
+ util = uclamp_util_with(rq, util, p);
dl_util = cpu_util_dl(rq);
@@ -276,12 +286,12 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
unsigned long util = cpu_util_cfs(rq);
- unsigned long max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
+ unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
sg_cpu->max = max;
sg_cpu->bw_dl = cpu_bw_dl(rq);
- return schedutil_freq_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL);
+ return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
}
/**
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 9c6480e6d62d..b7abca987d94 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -94,11 +94,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip)
continue;
- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+ if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids)
continue;
if (lowest_mask) {
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
/*
* We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 43901fa3f269..8b5bb2ac16e2 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -538,7 +538,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
* If we cannot preempt any rq, fall back to pick any
* online CPU:
*/
- cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+ cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
if (cpu >= nr_cpu_ids) {
/*
* Failed to find any suitable CPU.
@@ -1195,7 +1195,7 @@ static void update_curr_dl(struct rq *rq)
&curr->dl);
} else {
unsigned long scale_freq = arch_scale_freq_capacity(cpu);
- unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+ unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
scaled_delta_exec = cap_scale(delta_exec, scale_freq);
scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
@@ -1824,7 +1824,7 @@ static void set_curr_task_dl(struct rq *rq)
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_allowed))
+ cpumask_test_cpu(cpu, p->cpus_ptr))
return 1;
return 0;
}
@@ -1974,7 +1974,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
/* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq ||
- !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
+ !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) ||
task_running(rq, task) ||
!dl_task(task) ||
!task_on_rq_queued(task))) {
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 14c6a8716ba1..f7e4579e746c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -233,49 +233,35 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
*tablep = NULL;
}
-static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX-1;
-
static void
set_table_entry(struct ctl_table *entry,
const char *procname, void *data, int maxlen,
- umode_t mode, proc_handler *proc_handler,
- bool load_idx)
+ umode_t mode, proc_handler *proc_handler)
{
entry->procname = procname;
entry->data = data;
entry->maxlen = maxlen;
entry->mode = mode;
entry->proc_handler = proc_handler;
-
- if (load_idx) {
- entry->extra1 = &min_load_idx;
- entry->extra2 = &max_load_idx;
- }
}
static struct ctl_table *
sd_alloc_ctl_domain_table(struct sched_domain *sd)
{
- struct ctl_table *table = sd_alloc_ctl_entry(14);
+ struct ctl_table *table = sd_alloc_ctl_entry(9);
if (table == NULL)
return NULL;
- set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
- set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
- set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
- set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
- set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
- set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
- set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
- set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false);
- set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false);
- set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false);
- set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false);
- set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
- set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false);
- /* &table[13] is terminator */
+ set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
+ /* &table[8] is terminator */
return table;
}
@@ -653,8 +639,6 @@ do { \
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
P(nr_running);
- SEQ_printf(m, " .%-30s: %lu\n", "load",
- rq->load.weight);
P(nr_switches);
P(nr_load_updates);
P(nr_uninterruptible);
@@ -662,11 +646,6 @@ do { \
SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
PN(clock);
PN(clock_task);
- P(cpu_load[0]);
- P(cpu_load[1]);
- P(cpu_load[2]);
- P(cpu_load[3]);
- P(cpu_load[4]);
#undef P
#undef PN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f35930f5e528..036be95a87e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -275,6 +275,19 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return grp->my_q;
}
+static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
+{
+ if (!path)
+ return;
+
+ if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
+ autogroup_path(cfs_rq->tg, path, len);
+ else if (cfs_rq && cfs_rq->tg->css.cgroup)
+ cgroup_path(cfs_rq->tg->css.cgroup, path, len);
+ else
+ strlcpy(path, "(null)", len);
+}
+
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
@@ -449,6 +462,12 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
return NULL;
}
+static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
+{
+ if (path)
+ strlcpy(path, "(null)", len);
+}
+
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
return true;
@@ -764,7 +783,7 @@ void post_init_entity_util_avg(struct task_struct *p)
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
struct sched_avg *sa = &se->avg;
- long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+ long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
if (cap > 0) {
@@ -1466,9 +1485,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
}
-static unsigned long weighted_cpuload(struct rq *rq);
-static unsigned long source_load(int cpu, int type);
-static unsigned long target_load(int cpu, int type);
+static unsigned long cpu_runnable_load(struct rq *rq);
/* Cached statistics for all CPUs within a node */
struct numa_stats {
@@ -1489,7 +1506,7 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
for_each_cpu(cpu, cpumask_of_node(nid)) {
struct rq *rq = cpu_rq(cpu);
- ns->load += weighted_cpuload(rq);
+ ns->load += cpu_runnable_load(rq);
ns->compute_capacity += capacity_of(cpu);
}
@@ -1621,7 +1638,7 @@ static void task_numa_compare(struct task_numa_env *env,
* be incurred if the tasks were swapped.
*/
/* Skip this swap candidate if cannot move to the source cpu */
- if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
+ if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
goto unlock;
/*
@@ -1718,7 +1735,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
/* Skip this CPU if the source task cannot migrate */
- if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;
env->dst_cpu = cpu;
@@ -2686,8 +2703,6 @@ static void
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
update_load_add(&cfs_rq->load, se->load.weight);
- if (!parent_entity(se))
- update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
#ifdef CONFIG_SMP
if (entity_is_task(se)) {
struct rq *rq = rq_of(cfs_rq);
@@ -2703,8 +2718,6 @@ static void
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
update_load_sub(&cfs_rq->load, se->load.weight);
- if (!parent_entity(se))
- update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
#ifdef CONFIG_SMP
if (entity_is_task(se)) {
account_numa_dequeue(rq_of(cfs_rq), task_of(se));
@@ -3334,6 +3347,9 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
update_tg_cfs_util(cfs_rq, se, gcfs_rq);
update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
+ trace_pelt_cfs_tp(cfs_rq);
+ trace_pelt_se_tp(se);
+
return 1;
}
@@ -3486,6 +3502,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
cfs_rq_util_change(cfs_rq, flags);
+
+ trace_pelt_cfs_tp(cfs_rq);
}
/**
@@ -3505,6 +3523,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
cfs_rq_util_change(cfs_rq, 0);
+
+ trace_pelt_cfs_tp(cfs_rq);
}
/*
@@ -4100,7 +4120,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* least twice that of our own weight (i.e. dont track it
* when there are only lesser-weight tasks around):
*/
- if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
+ if (schedstat_enabled() &&
+ rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
schedstat_set(se->statistics.slice_max,
max((u64)schedstat_val(se->statistics.slice_max),
se->sum_exec_runtime - se->prev_sum_exec_runtime));
@@ -4734,6 +4755,11 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
if (runtime_refresh_within(cfs_b, min_left))
return;
+ /* don't push forwards an existing deferred unthrottle */
+ if (cfs_b->slack_started)
+ return;
+ cfs_b->slack_started = true;
+
hrtimer_start(&cfs_b->slack_timer,
ns_to_ktime(cfs_bandwidth_slack_period),
HRTIMER_MODE_REL);
@@ -4787,6 +4813,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
/* confirm we're still not at a refresh boundary */
raw_spin_lock_irqsave(&cfs_b->lock, flags);
+ cfs_b->slack_started = false;
if (cfs_b->distribute_running) {
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
return;
@@ -4950,6 +4977,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
cfs_b->distribute_running = 0;
+ cfs_b->slack_started = false;
}
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -5153,8 +5181,10 @@ static inline bool cpu_overutilized(int cpu)
static inline void update_overutilized_status(struct rq *rq)
{
- if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
+ if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
+ trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
+ }
}
#else
static inline void update_overutilized_status(struct rq *rq) { }
@@ -5325,71 +5355,6 @@ DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
#ifdef CONFIG_NO_HZ_COMMON
-/*
- * per rq 'load' arrray crap; XXX kill this.
- */
-
-/*
- * The exact cpuload calculated at every tick would be:
- *
- * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
- *
- * If a CPU misses updates for n ticks (as it was idle) and update gets
- * called on the n+1-th tick when CPU may be busy, then we have:
- *
- * load_n = (1 - 1/2^i)^n * load_0
- * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load
- *
- * decay_load_missed() below does efficient calculation of
- *
- * load' = (1 - 1/2^i)^n * load
- *
- * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
- * This allows us to precompute the above in said factors, thereby allowing the
- * reduction of an arbitrary n in O(log_2 n) steps. (See also
- * fixed_power_int())
- *
- * The calculation is approximated on a 128 point scale.
- */
-#define DEGRADE_SHIFT 7
-
-static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
-static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
- { 0, 0, 0, 0, 0, 0, 0, 0 },
- { 64, 32, 8, 0, 0, 0, 0, 0 },
- { 96, 72, 40, 12, 1, 0, 0, 0 },
- { 112, 98, 75, 43, 15, 1, 0, 0 },
- { 120, 112, 98, 76, 45, 16, 2, 0 }
-};
-
-/*
- * Update cpu_load for any missed ticks, due to tickless idle. The backlog
- * would be when CPU is idle and so we just decay the old load without
- * adding any new load.
- */
-static unsigned long
-decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
-{
- int j = 0;
-
- if (!missed_updates)
- return load;
-
- if (missed_updates >= degrade_zero_ticks[idx])
- return 0;
-
- if (idx == 1)
- return load >> missed_updates;
-
- while (missed_updates) {
- if (missed_updates % 2)
- load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
-
- missed_updates >>= 1;
- j++;
- }
- return load;
-}
static struct {
cpumask_var_t idle_cpus_mask;
@@ -5401,234 +5366,11 @@ static struct {
#endif /* CONFIG_NO_HZ_COMMON */
-/**
- * __cpu_load_update - update the rq->cpu_load[] statistics
- * @this_rq: The rq to update statistics for
- * @this_load: The current load
- * @pending_updates: The number of missed updates
- *
- * Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC).
- *
- * This function computes a decaying average:
- *
- * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
- *
- * Because of NOHZ it might not get called on every tick which gives need for
- * the @pending_updates argument.
- *
- * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
- * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
- * = A * (A * load[i]_n-2 + B) + B
- * = A * (A * (A * load[i]_n-3 + B) + B) + B
- * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
- * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
- * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
- * = (1 - 1/2^i)^n * (load[i]_0 - load) + load
- *
- * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
- * any change in load would have resulted in the tick being turned back on.
- *
- * For regular NOHZ, this reduces to:
- *
- * load[i]_n = (1 - 1/2^i)^n * load[i]_0
- *
- * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
- * term.
- */
-static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates)
-{
- unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
- int i, scale;
-
- this_rq->nr_load_updates++;
-
- /* Update our load: */
- this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
- for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
- unsigned long old_load, new_load;
-
- /* scale is effectively 1 << i now, and >> i divides by scale */
-
- old_load = this_rq->cpu_load[i];
-#ifdef CONFIG_NO_HZ_COMMON
- old_load = decay_load_missed(old_load, pending_updates - 1, i);
- if (tickless_load) {
- old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
- /*
- * old_load can never be a negative value because a
- * decayed tickless_load cannot be greater than the
- * original tickless_load.
- */
- old_load += tickless_load;
- }
-#endif
- new_load = this_load;
- /*
- * Round up the averaging division if load is increasing. This
- * prevents us from getting stuck on 9 if the load is 10, for
- * example.
- */
- if (new_load > old_load)
- new_load += scale - 1;
-
- this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
- }
-}
-
-/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(struct rq *rq)
+static unsigned long cpu_runnable_load(struct rq *rq)
{
return cfs_rq_runnable_load_avg(&rq->cfs);
}
-#ifdef CONFIG_NO_HZ_COMMON
-/*
- * There is no sane way to deal with nohz on smp when using jiffies because the
- * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
- *
- * Therefore we need to avoid the delta approach from the regular tick when
- * possible since that would seriously skew the load calculation. This is why we
- * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
- * jiffies deltas for updates happening while in nohz mode (idle ticks, idle
- * loop exit, nohz_idle_balance, nohz full exit...)
- *
- * This means we might still be one tick off for nohz periods.
- */
-
-static void cpu_load_update_nohz(struct rq *this_rq,
- unsigned long curr_jiffies,
- unsigned long load)
-{
- unsigned long pending_updates;
-
- pending_updates = curr_jiffies - this_rq->last_load_update_tick;
- if (pending_updates) {
- this_rq->last_load_update_tick = curr_jiffies;
- /*
- * In the regular NOHZ case, we were idle, this means load 0.
- * In the NOHZ_FULL case, we were non-idle, we should consider
- * its weighted load.
- */
- cpu_load_update(this_rq, load, pending_updates);
- }
-}
-
-/*
- * Called from nohz_idle_balance() to update the load ratings before doing the
- * idle balance.
- */
-static void cpu_load_update_idle(struct rq *this_rq)
-{
- /*
- * bail if there's load or we're actually up-to-date.
- */
- if (weighted_cpuload(this_rq))
- return;
-
- cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
-}
-
-/*
- * Record CPU load on nohz entry so we know the tickless load to account
- * on nohz exit. cpu_load[0] happens then to be updated more frequently
- * than other cpu_load[idx] but it should be fine as cpu_load readers
- * shouldn't rely into synchronized cpu_load[*] updates.
- */
-void cpu_load_update_nohz_start(void)
-{
- struct rq *this_rq = this_rq();
-
- /*
- * This is all lockless but should be fine. If weighted_cpuload changes
- * concurrently we'll exit nohz. And cpu_load write can race with
- * cpu_load_update_idle() but both updater would be writing the same.
- */
- this_rq->cpu_load[0] = weighted_cpuload(this_rq);
-}
-
-/*
- * Account the tickless load in the end of a nohz frame.
- */
-void cpu_load_update_nohz_stop(void)
-{
- unsigned long curr_jiffies = READ_ONCE(jiffies);
- struct rq *this_rq = this_rq();
- unsigned long load;
- struct rq_flags rf;
-
- if (curr_jiffies == this_rq->last_load_update_tick)
- return;
-
- load = weighted_cpuload(this_rq);
- rq_lock(this_rq, &rf);
- update_rq_clock(this_rq);
- cpu_load_update_nohz(this_rq, curr_jiffies, load);
- rq_unlock(this_rq, &rf);
-}
-#else /* !CONFIG_NO_HZ_COMMON */
-static inline void cpu_load_update_nohz(struct rq *this_rq,
- unsigned long curr_jiffies,
- unsigned long load) { }
-#endif /* CONFIG_NO_HZ_COMMON */
-
-static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
-{
-#ifdef CONFIG_NO_HZ_COMMON
- /* See the mess around cpu_load_update_nohz(). */
- this_rq->last_load_update_tick = READ_ONCE(jiffies);
-#endif
- cpu_load_update(this_rq, load, 1);
-}
-
-/*
- * Called from scheduler_tick()
- */
-void cpu_load_update_active(struct rq *this_rq)
-{
- unsigned long load = weighted_cpuload(this_rq);
-
- if (tick_nohz_tick_stopped())
- cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
- else
- cpu_load_update_periodic(this_rq, load);
-}
-
-/*
- * Return a low guess at the load of a migration-source CPU weighted
- * according to the scheduling class and "nice" value.
- *
- * We want to under-estimate the load of migration sources, to
- * balance conservatively.
- */
-static unsigned long source_load(int cpu, int type)
-{
- struct rq *rq = cpu_rq(cpu);
- unsigned long total = weighted_cpuload(rq);
-
- if (type == 0 || !sched_feat(LB_BIAS))
- return total;
-
- return min(rq->cpu_load[type-1], total);
-}
-
-/*
- * Return a high guess at the load of a migration-target CPU weighted
- * according to the scheduling class and "nice" value.
- */
-static unsigned long target_load(int cpu, int type)
-{
- struct rq *rq = cpu_rq(cpu);
- unsigned long total = weighted_cpuload(rq);
-
- if (type == 0 || !sched_feat(LB_BIAS))
- return total;
-
- return max(rq->cpu_load[type-1], total);
-}
-
static unsigned long capacity_of(int cpu)
{
return cpu_rq(cpu)->cpu_capacity;
@@ -5638,7 +5380,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running);
- unsigned long load_avg = weighted_cpuload(rq);
+ unsigned long load_avg = cpu_runnable_load(rq);
if (nr_running)
return load_avg / nr_running;
@@ -5736,7 +5478,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
s64 this_eff_load, prev_eff_load;
unsigned long task_load;
- this_eff_load = target_load(this_cpu, sd->wake_idx);
+ this_eff_load = cpu_runnable_load(cpu_rq(this_cpu));
if (sync) {
unsigned long current_load = task_h_load(current);
@@ -5754,7 +5496,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
this_eff_load *= 100;
this_eff_load *= capacity_of(prev_cpu);
- prev_eff_load = source_load(prev_cpu, sd->wake_idx);
+ prev_eff_load = cpu_runnable_load(cpu_rq(prev_cpu));
prev_eff_load -= task_load;
if (sched_feat(WA_BIAS))
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
@@ -5815,14 +5557,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
unsigned long this_runnable_load = ULONG_MAX;
unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX;
unsigned long most_spare = 0, this_spare = 0;
- int load_idx = sd->forkexec_idx;
int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
(sd->imbalance_pct-100) / 100;
- if (sd_flag & SD_BALANCE_WAKE)
- load_idx = sd->wake_idx;
-
do {
unsigned long load, avg_load, runnable_load;
unsigned long spare_cap, max_spare_cap;
@@ -5831,7 +5569,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
/* Skip over this group if it has no CPUs allowed */
if (!cpumask_intersects(sched_group_span(group),
- &p->cpus_allowed))
+ p->cpus_ptr))
continue;
local_group = cpumask_test_cpu(this_cpu,
@@ -5846,12 +5584,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
max_spare_cap = 0;
for_each_cpu(i, sched_group_span(group)) {
- /* Bias balancing toward CPUs of our domain */
- if (local_group)
- load = source_load(i, load_idx);
- else
- load = target_load(i, load_idx);
-
+ load = cpu_runnable_load(cpu_rq(i));
runnable_load += load;
avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
@@ -5963,7 +5696,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */
- for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
+ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
if (available_idle_cpu(i)) {
struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
@@ -5987,7 +5720,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
shallowest_idle_cpu = i;
}
} else if (shallowest_idle_cpu == -1) {
- load = weighted_cpuload(cpu_rq(i));
+ load = cpu_runnable_load(cpu_rq(i));
if (load < min_load) {
min_load = load;
least_loaded_cpu = i;
@@ -6003,7 +5736,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
{
int new_cpu = cpu;
- if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
+ if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
return prev_cpu;
/*
@@ -6120,7 +5853,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
if (!test_idle_cores(target, false))
return -1;
- cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
for_each_cpu_wrap(core, cpus, target) {
bool idle = true;
@@ -6154,7 +5887,7 @@ static int select_idle_smt(struct task_struct *p, int target)
return -1;
for_each_cpu(cpu, cpu_smt_mask(target)) {
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
if (available_idle_cpu(cpu))
return cpu;
@@ -6189,6 +5922,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
u64 time, cost;
s64 delta;
int cpu, nr = INT_MAX;
+ int this = smp_processor_id();
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -6212,18 +5946,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
nr = 4;
}
- time = local_clock();
+ time = cpu_clock(this);
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
if (!--nr)
return -1;
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
if (available_idle_cpu(cpu))
break;
}
- time = local_clock() - time;
+ time = cpu_clock(this) - time;
cost = this_sd->avg_scan_cost;
delta = (s64)(time - cost) / 8;
this_sd->avg_scan_cost += delta;
@@ -6254,7 +5988,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
available_idle_cpu(recent_used_cpu) &&
- cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
+ cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
/*
* Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake:
@@ -6498,11 +6232,21 @@ static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
static long
compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
{
- long util, max_util, sum_util, energy = 0;
+ unsigned int max_util, util_cfs, cpu_util, cpu_cap;
+ unsigned long sum_util, energy = 0;
+ struct task_struct *tsk;
int cpu;
for (; pd; pd = pd->next) {
+ struct cpumask *pd_mask = perf_domain_span(pd);
+
+ /*
+ * The energy model mandates all the CPUs of a performance
+ * domain have the same capacity.
+ */
+ cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
max_util = sum_util = 0;
+
/*
* The capacity state of CPUs of the current rd can be driven by
* CPUs of another rd if they belong to the same performance
@@ -6513,11 +6257,29 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
* it will not appear in its pd list and will not be accounted
* by compute_energy().
*/
- for_each_cpu_and(cpu, perf_domain_span(pd), cpu_online_mask) {
- util = cpu_util_next(cpu, p, dst_cpu);
- util = schedutil_energy_util(cpu, util);
- max_util = max(util, max_util);
- sum_util += util;
+ for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
+ util_cfs = cpu_util_next(cpu, p, dst_cpu);
+
+ /*
+ * Busy time computation: utilization clamping is not
+ * required since the ratio (sum_util / cpu_capacity)
+ * is already enough to scale the EM reported power
+ * consumption at the (eventually clamped) cpu_capacity.
+ */
+ sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap,
+ ENERGY_UTIL, NULL);
+
+ /*
+ * Performance domain frequency: utilization clamping
+ * must be considered since it affects the selection
+ * of the performance domain frequency.
+ * NOTE: in case RT tasks are running, by default the
+ * FREQUENCY_UTIL's utilization can be max OPP.
+ */
+ tsk = cpu == dst_cpu ? p : NULL;
+ cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap,
+ FREQUENCY_UTIL, tsk);
+ max_util = max(max_util, cpu_util);
}
energy += em_pd_energy(pd->em_pd, max_util, sum_util);
@@ -6600,7 +6362,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
int max_spare_cap_cpu = -1;
for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
- if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ if (!cpumask_test_cpu(cpu, p->cpus_ptr))
continue;
/* Skip CPUs that will be overutilized. */
@@ -6689,7 +6451,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
}
want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) &&
- cpumask_test_cpu(cpu, &p->cpus_allowed);
+ cpumask_test_cpu(cpu, p->cpus_ptr);
}
rcu_read_lock();
@@ -7445,14 +7207,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/*
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
- * 2) cannot be migrated to this CPU due to cpus_allowed, or
+ * 2) cannot be migrated to this CPU due to cpus_ptr, or
* 3) running (obviously), or
* 4) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
- if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
+ if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
int cpu;
schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -7472,7 +7234,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/* Prevent to re-select dst_cpu via env's CPUs: */
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
- if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+ if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
env->flags |= LBF_DST_PINNED;
env->new_dst_cpu = cpu;
break;
@@ -7558,7 +7320,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
static const unsigned int sched_nr_migrate_break = 32;
/*
- * detach_tasks() -- tries to detach up to imbalance weighted load from
+ * detach_tasks() -- tries to detach up to imbalance runnable load from
* busiest_rq, as part of a balancing operation within domain "sd".
*
* Returns number of detached tasks if successful and 0 otherwise.
@@ -7626,7 +7388,7 @@ static int detach_tasks(struct lb_env *env)
/*
* We only want to steal up to the prescribed amount of
- * weighted load.
+ * runnable load.
*/
if (env->imbalance <= 0)
break;
@@ -7695,6 +7457,7 @@ static void attach_tasks(struct lb_env *env)
rq_unlock(env->dst_rq, &rf);
}
+#ifdef CONFIG_NO_HZ_COMMON
static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
{
if (cfs_rq->avg.load_avg)
@@ -7722,6 +7485,19 @@ static inline bool others_have_blocked(struct rq *rq)
return false;
}
+static inline void update_blocked_load_status(struct rq *rq, bool has_blocked)
+{
+ rq->last_blocked_load_update_tick = jiffies;
+
+ if (!has_blocked)
+ rq->has_blocked_load = 0;
+}
+#else
+static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) { return false; }
+static inline bool others_have_blocked(struct rq *rq) { return false; }
+static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
+#endif
+
#ifdef CONFIG_FAIR_GROUP_SCHED
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
@@ -7787,11 +7563,7 @@ static void update_blocked_averages(int cpu)
if (others_have_blocked(rq))
done = false;
-#ifdef CONFIG_NO_HZ_COMMON
- rq->last_blocked_load_update_tick = jiffies;
- if (done)
- rq->has_blocked_load = 0;
-#endif
+ update_blocked_load_status(rq, !done);
rq_unlock_irqrestore(rq, &rf);
}
@@ -7857,11 +7629,7 @@ static inline void update_blocked_averages(int cpu)
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
-#ifdef CONFIG_NO_HZ_COMMON
- rq->last_blocked_load_update_tick = jiffies;
- if (!cfs_rq_has_blocked(cfs_rq) && !others_have_blocked(rq))
- rq->has_blocked_load = 0;
-#endif
+ update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
rq_unlock_irqrestore(rq, &rf);
}
@@ -7879,7 +7647,6 @@ static unsigned long task_h_load(struct task_struct *p)
struct sg_lb_stats {
unsigned long avg_load; /*Avg load across the CPUs of the group */
unsigned long group_load; /* Total load over the CPUs of the group */
- unsigned long sum_weighted_load; /* Weighted load of group's tasks */
unsigned long load_per_task;
unsigned long group_capacity;
unsigned long group_util; /* Total utilization of the group */
@@ -7933,38 +7700,10 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
};
}
-/**
- * get_sd_load_idx - Obtain the load index for a given sched domain.
- * @sd: The sched_domain whose load_idx is to be obtained.
- * @idle: The idle status of the CPU for whose sd load_idx is obtained.
- *
- * Return: The load index.
- */
-static inline int get_sd_load_idx(struct sched_domain *sd,
- enum cpu_idle_type idle)
-{
- int load_idx;
-
- switch (idle) {
- case CPU_NOT_IDLE:
- load_idx = sd->busy_idx;
- break;
-
- case CPU_NEWLY_IDLE:
- load_idx = sd->newidle_idx;
- break;
- default:
- load_idx = sd->idle_idx;
- break;
- }
-
- return load_idx;
-}
-
static unsigned long scale_rt_capacity(struct sched_domain *sd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long max = arch_scale_cpu_capacity(sd, cpu);
+ unsigned long max = arch_scale_cpu_capacity(cpu);
unsigned long used, free;
unsigned long irq;
@@ -7989,7 +7728,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
unsigned long capacity = scale_rt_capacity(sd, cpu);
struct sched_group *sdg = sd->groups;
- cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
+ cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
if (!capacity)
capacity = 1;
@@ -8099,7 +7838,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
/*
* Group imbalance indicates (and tries to solve) the problem where balancing
- * groups is inadequate due to ->cpus_allowed constraints.
+ * groups is inadequate due to ->cpus_ptr constraints.
*
* Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
* cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
@@ -8249,9 +7988,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
struct sg_lb_stats *sgs,
int *sg_status)
{
- int local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
- int load_idx = get_sd_load_idx(env->sd, env->idle);
- unsigned long load;
int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
@@ -8262,13 +7998,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
env->flags |= LBF_NOHZ_AGAIN;
- /* Bias balancing toward CPUs of our domain: */
- if (local_group)
- load = target_load(i, load_idx);
- else
- load = source_load(i, load_idx);
-
- sgs->group_load += load;
+ sgs->group_load += cpu_runnable_load(rq);
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
@@ -8283,7 +8013,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->nr_numa_running += rq->nr_numa_running;
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
- sgs->sum_weighted_load += weighted_cpuload(rq);
/*
* No need to call idle_cpu() if nr_running is not 0
*/
@@ -8302,7 +8031,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
if (sgs->sum_nr_running)
- sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
+ sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
sgs->group_weight = group->group_weight;
@@ -8516,8 +8245,12 @@ next_group:
/* Update over-utilization (tipping point, U >= 0) indicator */
WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
+ trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
} else if (sg_status & SG_OVERUTILIZED) {
- WRITE_ONCE(env->dst_rq->rd->overutilized, SG_OVERUTILIZED);
+ struct root_domain *rd = env->dst_rq->rd;
+
+ WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
+ trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
}
}
@@ -8723,7 +8456,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
* find_busiest_group - Returns the busiest group within the sched_domain
* if there is an imbalance.
*
- * Also calculates the amount of weighted load which should be moved
+ * Also calculates the amount of runnable load which should be moved
* to restore balance.
*
* @env: The load balancing environment.
@@ -8768,7 +8501,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
/*
* If the busiest group is imbalanced the below checks don't
* work because they assume all things are equal, which typically
- * isn't true due to cpus_allowed constraints and the like.
+ * isn't true due to cpus_ptr constraints and the like.
*/
if (busiest->group_type == group_imbalanced)
goto force_balance;
@@ -8842,7 +8575,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
int i;
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
- unsigned long capacity, wl;
+ unsigned long capacity, load;
enum fbq_type rt;
rq = cpu_rq(i);
@@ -8896,30 +8629,30 @@ static struct rq *find_busiest_queue(struct lb_env *env,
rq->nr_running == 1)
continue;
- wl = weighted_cpuload(rq);
+ load = cpu_runnable_load(rq);
/*
- * When comparing with imbalance, use weighted_cpuload()
+ * When comparing with imbalance, use cpu_runnable_load()
* which is not scaled with the CPU capacity.
*/
- if (rq->nr_running == 1 && wl > env->imbalance &&
+ if (rq->nr_running == 1 && load > env->imbalance &&
!check_cpu_capacity(rq, env->sd))
continue;
/*
* For the load comparisons with the other CPU's, consider
- * the weighted_cpuload() scaled with the CPU capacity, so
+ * the cpu_runnable_load() scaled with the CPU capacity, so
* that the load can be moved away from the CPU that is
* potentially running at a lower capacity.
*
- * Thus we're looking for max(wl_i / capacity_i), crosswise
+ * Thus we're looking for max(load_i / capacity_i), crosswise
* multiplication to rid ourselves of the division works out
- * to: wl_i * capacity_j > wl_j * capacity_i; where j is
+ * to: load_i * capacity_j > load_j * capacity_i; where j is
* our previous maximum.
*/
- if (wl * busiest_capacity > busiest_load * capacity) {
- busiest_load = wl;
+ if (load * busiest_capacity > busiest_load * capacity) {
+ busiest_load = load;
busiest_capacity = capacity;
busiest = rq;
}
@@ -9210,7 +8943,7 @@ more_balance:
* if the curr task on busiest CPU can't be
* moved to this_cpu:
*/
- if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
raw_spin_unlock_irqrestore(&busiest->lock,
flags);
env.flags |= LBF_ALL_PINNED;
@@ -9879,7 +9612,6 @@ static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- cpu_load_update_idle(rq);
rq_unlock_irqrestore(rq, &rf);
if (flags & NOHZ_BALANCE_KICK)
@@ -10690,6 +10422,10 @@ const struct sched_class fair_sched_class = {
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_change_group = task_change_group_fair,
#endif
+
+#ifdef CONFIG_UCLAMP_TASK
+ .uclamp_enabled = 1,
+#endif
};
#ifdef CONFIG_SCHED_DEBUG
@@ -10737,3 +10473,83 @@ __init void init_sched_fair_class(void)
#endif /* SMP */
}
+
+/*
+ * Helper functions to facilitate extracting info from tracepoints.
+ */
+
+const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_SMP
+ return cfs_rq ? &cfs_rq->avg : NULL;
+#else
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg);
+
+char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len)
+{
+ if (!cfs_rq) {
+ if (str)
+ strlcpy(str, "(null)", len);
+ else
+ return NULL;
+ }
+
+ cfs_rq_tg_path(cfs_rq, str, len);
+ return str;
+}
+EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path);
+
+int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1;
+}
+EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu);
+
+const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq ? &rq->avg_rt : NULL;
+#else
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt);
+
+const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq ? &rq->avg_dl : NULL;
+#else
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl);
+
+const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ)
+ return rq ? &rq->avg_irq : NULL;
+#else
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq);
+
+int sched_trace_rq_cpu(struct rq *rq)
+{
+ return rq ? cpu_of(rq) : -1;
+}
+EXPORT_SYMBOL_GPL(sched_trace_rq_cpu);
+
+const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
+{
+#ifdef CONFIG_SMP
+ return rd ? rd->span : NULL;
+#else
+ return NULL;
+#endif
+}
+EXPORT_SYMBOL_GPL(sched_trace_rd_span);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 858589b83377..2410db5e9a35 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -39,7 +39,6 @@ SCHED_FEAT(WAKEUP_PREEMPTION, true)
SCHED_FEAT(HRTICK, false)
SCHED_FEAT(DOUBLE_TICK, false)
-SCHED_FEAT(LB_BIAS, false)
/*
* Decrement CPU capacity based on time not spent running tasks
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index befce29bd882..a96db50d40e0 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -28,6 +28,8 @@
#include "sched.h"
#include "pelt.h"
+#include <trace/events/sched.h>
+
/*
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
@@ -265,6 +267,7 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
{
if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
+ trace_pelt_se_tp(se);
return 1;
}
@@ -278,6 +281,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
cfs_se_util_change(&se->avg);
+ trace_pelt_se_tp(se);
return 1;
}
@@ -292,6 +296,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
cfs_rq->curr != NULL)) {
___update_load_avg(&cfs_rq->avg, 1, 1);
+ trace_pelt_cfs_tp(cfs_rq);
return 1;
}
@@ -317,6 +322,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
running)) {
___update_load_avg(&rq->avg_rt, 1, 1);
+ trace_pelt_rt_tp(rq);
return 1;
}
@@ -340,6 +346,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
running)) {
___update_load_avg(&rq->avg_dl, 1, 1);
+ trace_pelt_dl_tp(rq);
return 1;
}
@@ -366,7 +373,7 @@ int update_irq_load_avg(struct rq *rq, u64 running)
* reflect the real amount of computation
*/
running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
- running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+ running = cap_scale(running, arch_scale_cpu_capacity(cpu_of(rq)));
/*
* We know the time that has been used by interrupt since last update
@@ -388,8 +395,10 @@ int update_irq_load_avg(struct rq *rq, u64 running)
1,
1);
- if (ret)
+ if (ret) {
___update_load_avg(&rq->avg_irq, 1, 1);
+ trace_pelt_irq_tp(rq);
+ }
return ret;
}
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 7489d5f56960..afff644da065 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -79,7 +79,7 @@ static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
* Scale the elapsed time to reflect the real amount of
* computation
*/
- delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+ delta = cap_scale(delta, arch_scale_cpu_capacity(cpu_of(rq)));
delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
rq->clock_pelt += delta;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 1e6b909dca36..a532558a5176 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1614,7 +1614,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
- cpumask_test_cpu(cpu, &p->cpus_allowed))
+ cpumask_test_cpu(cpu, p->cpus_ptr))
return 1;
return 0;
@@ -1751,7 +1751,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
* Also make sure that it wasn't scheduled on its rq.
*/
if (unlikely(task_rq(task) != rq ||
- !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
+ !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
task_running(rq, task) ||
!rt_task(task) ||
!task_on_rq_queued(task))) {
@@ -2400,6 +2400,10 @@ const struct sched_class rt_sched_class = {
.switched_to = switched_to_rt,
.update_curr = update_curr_rt,
+
+#ifdef CONFIG_UCLAMP_TASK
+ .uclamp_enabled = 1,
+#endif
};
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h
index a26473674fb7..c529706bed11 100644
--- a/kernel/sched/sched-pelt.h
+++ b/kernel/sched/sched-pelt.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Generated by Documentation/scheduler/sched-pelt; do not modify. */
-static const u32 runnable_avg_yN_inv[] = {
+static const u32 runnable_avg_yN_inv[] __maybe_unused = {
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b52ed1ada0be..802b1f3405f2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -96,12 +96,6 @@ extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq);
extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-#ifdef CONFIG_SMP
-extern void cpu_load_update_active(struct rq *this_rq);
-#else
-static inline void cpu_load_update_active(struct rq *this_rq) { }
-#endif
-
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
@@ -344,8 +338,10 @@ struct cfs_bandwidth {
u64 runtime_expires;
int expires_seq;
- short idle;
- short period_active;
+ u8 idle;
+ u8 period_active;
+ u8 distribute_running;
+ u8 slack_started;
struct hrtimer period_timer;
struct hrtimer slack_timer;
struct list_head throttled_cfs_rq;
@@ -354,8 +350,6 @@ struct cfs_bandwidth {
int nr_periods;
int nr_throttled;
u64 throttled_time;
-
- bool distribute_running;
#endif
};
@@ -797,6 +791,48 @@ extern void rto_push_irq_work_func(struct irq_work *work);
#endif
#endif /* CONFIG_SMP */
+#ifdef CONFIG_UCLAMP_TASK
+/*
+ * struct uclamp_bucket - Utilization clamp bucket
+ * @value: utilization clamp value for tasks on this clamp bucket
+ * @tasks: number of RUNNABLE tasks on this clamp bucket
+ *
+ * Keep track of how many tasks are RUNNABLE for a given utilization
+ * clamp value.
+ */
+struct uclamp_bucket {
+ unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
+ unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
+};
+
+/*
+ * struct uclamp_rq - rq's utilization clamp
+ * @value: currently active clamp values for a rq
+ * @bucket: utilization clamp buckets affecting a rq
+ *
+ * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
+ * A clamp value is affecting a rq when there is at least one task RUNNABLE
+ * (or actually running) with that value.
+ *
+ * There are up to UCLAMP_CNT possible different clamp values, currently there
+ * are only two: minimum utilization and maximum utilization.
+ *
+ * All utilization clamping values are MAX aggregated, since:
+ * - for util_min: we want to run the CPU at least at the max of the minimum
+ * utilization required by its currently RUNNABLE tasks.
+ * - for util_max: we want to allow the CPU to run up to the max of the
+ * maximum utilization allowed by its currently RUNNABLE tasks.
+ *
+ * Since on each system we expect only a limited number of different
+ * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
+ * the metrics required to compute all the per-rq utilization clamp values.
+ */
+struct uclamp_rq {
+ unsigned int value;
+ struct uclamp_bucket bucket[UCLAMP_BUCKETS];
+};
+#endif /* CONFIG_UCLAMP_TASK */
+
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -818,8 +854,6 @@ struct rq {
unsigned int nr_preferred_running;
unsigned int numa_migrate_on;
#endif
- #define CPU_LOAD_IDX_MAX 5
- unsigned long cpu_load[CPU_LOAD_IDX_MAX];
#ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP
unsigned long last_load_update_tick;
@@ -830,11 +864,16 @@ struct rq {
atomic_t nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */
- /* capture load from *all* tasks on this CPU: */
- struct load_weight load;
unsigned long nr_load_updates;
u64 nr_switches;
+#ifdef CONFIG_UCLAMP_TASK
+ /* Utilization clamp values based on CPU's RUNNABLE tasks */
+ struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
+ unsigned int uclamp_flags;
+#define UCLAMP_FLAG_IDLE 0x01
+#endif
+
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
@@ -1649,6 +1688,10 @@ extern const u32 sched_prio_to_wmult[40];
struct sched_class {
const struct sched_class *next;
+#ifdef CONFIG_UCLAMP_TASK
+ int uclamp_enabled;
+#endif
+
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
@@ -2222,6 +2265,48 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
+#ifdef CONFIG_UCLAMP_TASK
+unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id);
+
+static __always_inline
+unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
+ struct task_struct *p)
+{
+ unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
+ unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
+
+ if (p) {
+ min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
+ max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
+ }
+
+ /*
+ * Since CPU's {min,max}_util clamps are MAX aggregated considering
+ * RUNNABLE tasks with _different_ clamps, we can end up with an
+ * inversion. Fix it now when the clamps are applied.
+ */
+ if (unlikely(min_util >= max_util))
+ return min_util;
+
+ return clamp(util, min_util, max_util);
+}
+
+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+{
+ return uclamp_util_with(rq, util, NULL);
+}
+#else /* CONFIG_UCLAMP_TASK */
+static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
+ struct task_struct *p)
+{
+ return util;
+}
+static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+{
+ return util;
+}
+#endif /* CONFIG_UCLAMP_TASK */
+
#ifdef arch_scale_freq_capacity
# ifndef arch_scale_freq_invariant
# define arch_scale_freq_invariant() true
@@ -2237,7 +2322,6 @@ static inline unsigned long capacity_orig_of(int cpu)
}
#endif
-#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
/**
* enum schedutil_type - CPU utilization type
* @FREQUENCY_UTIL: Utilization used to select frequency
@@ -2253,15 +2337,11 @@ enum schedutil_type {
ENERGY_UTIL,
};
-unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
- unsigned long max, enum schedutil_type type);
-
-static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
-{
- unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
+#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
- return schedutil_freq_util(cpu, cfs, max, ENERGY_UTIL);
-}
+unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+ unsigned long max, enum schedutil_type type,
+ struct task_struct *p);
static inline unsigned long cpu_bw_dl(struct rq *rq)
{
@@ -2290,11 +2370,13 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
return READ_ONCE(rq->avg_rt.util_avg);
}
#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
-static inline unsigned long schedutil_energy_util(int cpu, unsigned long cfs)
+static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
+ unsigned long max, enum schedutil_type type,
+ struct task_struct *p)
{
- return cfs;
+ return 0;
}
-#endif
+#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
static inline unsigned long cpu_util_irq(struct rq *rq)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f53f89df837d..f751ce0b783e 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1344,11 +1344,6 @@ sd_init(struct sched_domain_topology_level *tl,
.imbalance_pct = 125,
.cache_nice_tries = 0,
- .busy_idx = 0,
- .idle_idx = 0,
- .newidle_idx = 0,
- .wake_idx = 0,
- .forkexec_idx = 0,
.flags = 1*SD_LOAD_BALANCE
| 1*SD_BALANCE_NEWIDLE
@@ -1400,13 +1395,10 @@ sd_init(struct sched_domain_topology_level *tl,
} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
sd->imbalance_pct = 117;
sd->cache_nice_tries = 1;
- sd->busy_idx = 2;
#ifdef CONFIG_NUMA
} else if (sd->flags & SD_NUMA) {
sd->cache_nice_tries = 2;
- sd->busy_idx = 3;
- sd->idle_idx = 2;
sd->flags &= ~SD_PREFER_SIBLING;
sd->flags |= SD_SERIALIZE;
@@ -1419,8 +1411,6 @@ sd_init(struct sched_domain_topology_level *tl,
#endif
} else {
sd->cache_nice_tries = 1;
- sd->busy_idx = 2;
- sd->idle_idx = 1;
}
/*
@@ -1884,10 +1874,10 @@ static struct sched_domain_topology_level
unsigned long cap;
/* Is there any asymmetry? */
- cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
+ cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
for_each_cpu(i, cpu_map) {
- if (arch_scale_cpu_capacity(NULL, i) != cap) {
+ if (arch_scale_cpu_capacity(i) != cap) {
asym = true;
break;
}
@@ -1902,7 +1892,7 @@ static struct sched_domain_topology_level
* to everyone.
*/
for_each_cpu(i, cpu_map) {
- unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
+ unsigned long max_capacity = arch_scale_cpu_capacity(i);
int tl_id = 0;
for_each_sd_topology(tl) {
@@ -1912,7 +1902,7 @@ static struct sched_domain_topology_level
for_each_cpu_and(j, tl->mask(i), cpu_map) {
unsigned long capacity;
- capacity = arch_scale_cpu_capacity(NULL, j);
+ capacity = arch_scale_cpu_capacity(j);
if (capacity <= max_capacity)
continue;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index fa0f9adfb752..c1e566a114ca 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -118,16 +118,12 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
bookmark.func = NULL;
INIT_LIST_HEAD(&bookmark.entry);
- spin_lock_irqsave(&wq_head->lock, flags);
- nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
- spin_unlock_irqrestore(&wq_head->lock, flags);
-
- while (bookmark.flags & WQ_FLAG_BOOKMARK) {
+ do {
spin_lock_irqsave(&wq_head->lock, flags);
nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
wake_flags, key, &bookmark);
spin_unlock_irqrestore(&wq_head->lock, flags);
- }
+ } while (bookmark.flags & WQ_FLAG_BOOKMARK);
}
/**
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 811b4a86cdf6..dba52a7db5e8 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -609,7 +609,7 @@ static void seccomp_send_sigsys(int syscall, int reason)
{
struct kernel_siginfo info;
seccomp_init_siginfo(&info, syscall, reason);
- force_sig_info(SIGSYS, &info, current);
+ force_sig_info(&info);
}
#endif /* CONFIG_SECCOMP_FILTER */
diff --git a/kernel/signal.c b/kernel/signal.c
index edf8915ddd54..91cb8ca41954 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -45,6 +45,7 @@
#include <linux/posix-timers.h>
#include <linux/livepatch.h>
#include <linux/cgroup.h>
+#include <linux/audit.h>
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
@@ -54,7 +55,6 @@
#include <asm/unistd.h>
#include <asm/siginfo.h>
#include <asm/cacheflush.h>
-#include "audit.h" /* audit_signal_info() */
/*
* SLAB caches for signal bits.
@@ -1057,29 +1057,8 @@ static inline bool legacy_queue(struct sigpending *signals, int sig)
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
-#ifdef CONFIG_USER_NS
-static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
-{
- if (current_user_ns() == task_cred_xxx(t, user_ns))
- return;
-
- if (SI_FROMKERNEL(info))
- return;
-
- rcu_read_lock();
- info->si_uid = from_kuid_munged(task_cred_xxx(t, user_ns),
- make_kuid(current_user_ns(), info->si_uid));
- rcu_read_unlock();
-}
-#else
-static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
-{
- return;
-}
-#endif
-
static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
- enum pid_type type, int from_ancestor_ns)
+ enum pid_type type, bool force)
{
struct sigpending *pending;
struct sigqueue *q;
@@ -1089,8 +1068,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
assert_spin_locked(&t->sighand->siglock);
result = TRACE_SIGNAL_IGNORED;
- if (!prepare_signal(sig, t,
- from_ancestor_ns || (info == SEND_SIG_PRIV)))
+ if (!prepare_signal(sig, t, force))
goto ret;
pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
@@ -1135,7 +1113,11 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
q->info.si_code = SI_USER;
q->info.si_pid = task_tgid_nr_ns(current,
task_active_pid_ns(t));
- q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ rcu_read_lock();
+ q->info.si_uid =
+ from_kuid_munged(task_cred_xxx(t, user_ns),
+ current_uid());
+ rcu_read_unlock();
break;
case (unsigned long) SEND_SIG_PRIV:
clear_siginfo(&q->info);
@@ -1147,30 +1129,24 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
break;
default:
copy_siginfo(&q->info, info);
- if (from_ancestor_ns)
- q->info.si_pid = 0;
break;
}
-
- userns_fixup_signal_uid(&q->info, t);
-
- } else if (!is_si_special(info)) {
- if (sig >= SIGRTMIN && info->si_code != SI_USER) {
- /*
- * Queue overflow, abort. We may abort if the
- * signal was rt and sent by user using something
- * other than kill().
- */
- result = TRACE_SIGNAL_OVERFLOW_FAIL;
- ret = -EAGAIN;
- goto ret;
- } else {
- /*
- * This is a silent loss of information. We still
- * send the signal, but the *info bits are lost.
- */
- result = TRACE_SIGNAL_LOSE_INFO;
- }
+ } else if (!is_si_special(info) &&
+ sig >= SIGRTMIN && info->si_code != SI_USER) {
+ /*
+ * Queue overflow, abort. We may abort if the
+ * signal was rt and sent by user using something
+ * other than kill().
+ */
+ result = TRACE_SIGNAL_OVERFLOW_FAIL;
+ ret = -EAGAIN;
+ goto ret;
+ } else {
+ /*
+ * This is a silent loss of information. We still
+ * send the signal, but the *info bits are lost.
+ */
+ result = TRACE_SIGNAL_LOSE_INFO;
}
out_set:
@@ -1197,17 +1173,62 @@ ret:
return ret;
}
+static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
+{
+ bool ret = false;
+ switch (siginfo_layout(info->si_signo, info->si_code)) {
+ case SIL_KILL:
+ case SIL_CHLD:
+ case SIL_RT:
+ ret = true;
+ break;
+ case SIL_TIMER:
+ case SIL_POLL:
+ case SIL_FAULT:
+ case SIL_FAULT_MCEERR:
+ case SIL_FAULT_BNDERR:
+ case SIL_FAULT_PKUERR:
+ case SIL_SYS:
+ ret = false;
+ break;
+ }
+ return ret;
+}
+
static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
enum pid_type type)
{
- int from_ancestor_ns = 0;
+ /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */
+ bool force = false;
-#ifdef CONFIG_PID_NS
- from_ancestor_ns = si_fromuser(info) &&
- !task_pid_nr_ns(current, task_active_pid_ns(t));
-#endif
+ if (info == SEND_SIG_NOINFO) {
+ /* Force if sent from an ancestor pid namespace */
+ force = !task_pid_nr_ns(current, task_active_pid_ns(t));
+ } else if (info == SEND_SIG_PRIV) {
+ /* Don't ignore kernel generated signals */
+ force = true;
+ } else if (has_si_pid_and_uid(info)) {
+ /* SIGKILL and SIGSTOP is special or has ids */
+ struct user_namespace *t_user_ns;
+
+ rcu_read_lock();
+ t_user_ns = task_cred_xxx(t, user_ns);
+ if (current_user_ns() != t_user_ns) {
+ kuid_t uid = make_kuid(current_user_ns(), info->si_uid);
+ info->si_uid = from_kuid_munged(t_user_ns, uid);
+ }
+ rcu_read_unlock();
- return __send_signal(sig, info, t, type, from_ancestor_ns);
+ /* A kernel generated signal? */
+ force = (info->si_code == SI_KERNEL);
+
+ /* From an ancestor pid namespace? */
+ if (!task_pid_nr_ns(current, task_active_pid_ns(t))) {
+ info->si_pid = 0;
+ force = true;
+ }
+ }
+ return __send_signal(sig, info, t, type, force);
}
static void print_fatal_signal(int signr)
@@ -1274,12 +1295,13 @@ int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p
* We don't want to have recursive SIGSEGV's etc, for example,
* that is why we also clear SIGNAL_UNKILLABLE.
*/
-int
-force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t)
+static int
+force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t)
{
unsigned long int flags;
int ret, blocked, ignored;
struct k_sigaction *action;
+ int sig = info->si_signo;
spin_lock_irqsave(&t->sighand->siglock, flags);
action = &t->sighand->action[sig-1];
@@ -1304,6 +1326,11 @@ force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t)
return ret;
}
+int force_sig_info(struct kernel_siginfo *info)
+{
+ return force_sig_info_to_task(info, current);
+}
+
/*
* Nuke all other threads in the group.
*/
@@ -1440,13 +1467,44 @@ static inline bool kill_as_cred_perm(const struct cred *cred,
uid_eq(cred->uid, pcred->uid);
}
-/* like kill_pid_info(), but doesn't use uid/euid of "current" */
-int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid,
- const struct cred *cred)
+/*
+ * The usb asyncio usage of siginfo is wrong. The glibc support
+ * for asyncio which uses SI_ASYNCIO assumes the layout is SIL_RT.
+ * AKA after the generic fields:
+ * kernel_pid_t si_pid;
+ * kernel_uid32_t si_uid;
+ * sigval_t si_value;
+ *
+ * Unfortunately when usb generates SI_ASYNCIO it assumes the layout
+ * after the generic fields is:
+ * void __user *si_addr;
+ *
+ * This is a practical problem when there is a 64bit big endian kernel
+ * and a 32bit userspace. As the 32bit address will encoded in the low
+ * 32bits of the pointer. Those low 32bits will be stored at higher
+ * address than appear in a 32 bit pointer. So userspace will not
+ * see the address it was expecting for it's completions.
+ *
+ * There is nothing in the encoding that can allow
+ * copy_siginfo_to_user32 to detect this confusion of formats, so
+ * handle this by requiring the caller of kill_pid_usb_asyncio to
+ * notice when this situration takes place and to store the 32bit
+ * pointer in sival_int, instead of sival_addr of the sigval_t addr
+ * parameter.
+ */
+int kill_pid_usb_asyncio(int sig, int errno, sigval_t addr,
+ struct pid *pid, const struct cred *cred)
{
- int ret = -EINVAL;
+ struct kernel_siginfo info;
struct task_struct *p;
unsigned long flags;
+ int ret = -EINVAL;
+
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = errno;
+ info.si_code = SI_ASYNCIO;
+ *((sigval_t *)&info.si_pid) = addr;
if (!valid_signal(sig))
return ret;
@@ -1457,17 +1515,17 @@ int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid,
ret = -ESRCH;
goto out_unlock;
}
- if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) {
+ if (!kill_as_cred_perm(cred, p)) {
ret = -EPERM;
goto out_unlock;
}
- ret = security_task_kill(p, info, sig, cred);
+ ret = security_task_kill(p, &info, sig, cred);
if (ret)
goto out_unlock;
if (sig) {
if (lock_task_sighand(p, &flags)) {
- ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
+ ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false);
unlock_task_sighand(p, &flags);
} else
ret = -ESRCH;
@@ -1476,7 +1534,7 @@ out_unlock:
rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL_GPL(kill_pid_info_as_cred);
+EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio);
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
@@ -1552,9 +1610,17 @@ send_sig(int sig, struct task_struct *p, int priv)
}
EXPORT_SYMBOL(send_sig);
-void force_sig(int sig, struct task_struct *p)
+void force_sig(int sig)
{
- force_sig_info(sig, SEND_SIG_PRIV, p);
+ struct kernel_siginfo info;
+
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 0;
+ info.si_uid = 0;
+ force_sig_info(&info);
}
EXPORT_SYMBOL(force_sig);
@@ -1564,18 +1630,20 @@ EXPORT_SYMBOL(force_sig);
* the problem was already a SIGSEGV, we'll want to
* make sure we don't even try to deliver the signal..
*/
-void force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig)
{
+ struct task_struct *p = current;
+
if (sig == SIGSEGV) {
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
- force_sig(SIGSEGV, p);
+ force_sig(SIGSEGV);
}
-int force_sig_fault(int sig, int code, void __user *addr
+int force_sig_fault_to_task(int sig, int code, void __user *addr
___ARCH_SI_TRAPNO(int trapno)
___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
, struct task_struct *t)
@@ -1595,7 +1663,16 @@ int force_sig_fault(int sig, int code, void __user *addr
info.si_flags = flags;
info.si_isr = isr;
#endif
- return force_sig_info(info.si_signo, &info, t);
+ return force_sig_info_to_task(&info, t);
+}
+
+int force_sig_fault(int sig, int code, void __user *addr
+ ___ARCH_SI_TRAPNO(int trapno)
+ ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr))
+{
+ return force_sig_fault_to_task(sig, code, addr
+ ___ARCH_SI_TRAPNO(trapno)
+ ___ARCH_SI_IA64(imm, flags, isr), current);
}
int send_sig_fault(int sig, int code, void __user *addr
@@ -1621,7 +1698,7 @@ int send_sig_fault(int sig, int code, void __user *addr
return send_sig_info(info.si_signo, &info, t);
}
-int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
+int force_sig_mceerr(int code, void __user *addr, short lsb)
{
struct kernel_siginfo info;
@@ -1632,7 +1709,7 @@ int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct
info.si_code = code;
info.si_addr = addr;
info.si_addr_lsb = lsb;
- return force_sig_info(info.si_signo, &info, t);
+ return force_sig_info(&info);
}
int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
@@ -1661,7 +1738,7 @@ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
info.si_addr = addr;
info.si_lower = lower;
info.si_upper = upper;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
#ifdef SEGV_PKUERR
@@ -1675,7 +1752,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
info.si_code = SEGV_PKUERR;
info.si_addr = addr;
info.si_pkey = pkey;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
#endif
@@ -1691,7 +1768,7 @@ int force_sig_ptrace_errno_trap(int errno, void __user *addr)
info.si_errno = errno;
info.si_code = TRAP_HWBKPT;
info.si_addr = addr;
- return force_sig_info(info.si_signo, &info, current);
+ return force_sig_info(&info);
}
int kill_pgrp(struct pid *pid, int sig, int priv)
@@ -2676,7 +2753,7 @@ static void signal_delivered(struct ksignal *ksig, int stepping)
void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
{
if (failed)
- force_sigsegv(ksig->sig, current);
+ force_sigsegv(ksig->sig);
else
signal_delivered(ksig, stepping);
}
@@ -4477,6 +4554,28 @@ static inline void siginfo_buildtime_checks(void)
CHECK_OFFSET(si_syscall);
CHECK_OFFSET(si_arch);
#undef CHECK_OFFSET
+
+ /* usb asyncio */
+ BUILD_BUG_ON(offsetof(struct siginfo, si_pid) !=
+ offsetof(struct siginfo, si_addr));
+ if (sizeof(int) == sizeof(void __user *)) {
+ BUILD_BUG_ON(sizeof_field(struct siginfo, si_pid) !=
+ sizeof(void __user *));
+ } else {
+ BUILD_BUG_ON((sizeof_field(struct siginfo, si_pid) +
+ sizeof_field(struct siginfo, si_uid)) !=
+ sizeof(void __user *));
+ BUILD_BUG_ON(offsetofend(struct siginfo, si_pid) !=
+ offsetof(struct siginfo, si_uid));
+ }
+#ifdef CONFIG_COMPAT
+ BUILD_BUG_ON(offsetof(struct compat_siginfo, si_pid) !=
+ offsetof(struct compat_siginfo, si_addr));
+ BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
+ sizeof(compat_uptr_t));
+ BUILD_BUG_ON(sizeof_field(struct compat_siginfo, si_pid) !=
+ sizeof_field(struct siginfo, si_pid));
+#endif
}
void __init signals_init(void)
diff --git a/kernel/smp.c b/kernel/smp.c
index d155374632eb..616d4d114847 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -34,7 +34,7 @@ struct call_function_data {
cpumask_var_t cpumask_ipi;
};
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
+static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
@@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many);
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function(smp_call_func_t func, void *info, int wait)
+void smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
-
- return 0;
}
EXPORT_SYMBOL(smp_call_function);
@@ -594,18 +592,16 @@ void __init smp_init(void)
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
-int on_each_cpu(void (*func) (void *info), void *info, int wait)
+void on_each_cpu(void (*func) (void *info), void *info, int wait)
{
unsigned long flags;
- int ret = 0;
preempt_disable();
- ret = smp_call_function(func, info, wait);
+ smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
preempt_enable();
- return ret;
}
EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a6b81c6b6bff..0427a86743a4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -649,7 +649,7 @@ static int takeover_tasklets(unsigned int cpu)
/* Find end, append list for that CPU. */
if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
- this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
+ __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
per_cpu(tasklet_vec, cpu).head = NULL;
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
}
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2b5a6754646f..b4f83f7bdf86 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -177,12 +177,18 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
+void __weak stop_machine_yield(const struct cpumask *cpumask)
+{
+ cpu_relax();
+}
+
/* This is the cpu_stop function which stops the CPU. */
static int multi_cpu_stop(void *data)
{
struct multi_stop_data *msdata = data;
enum multi_stop_state curstate = MULTI_STOP_NONE;
int cpu = smp_processor_id(), err = 0;
+ const struct cpumask *cpumask;
unsigned long flags;
bool is_active;
@@ -192,15 +198,18 @@ static int multi_cpu_stop(void *data)
*/
local_save_flags(flags);
- if (!msdata->active_cpus)
- is_active = cpu == cpumask_first(cpu_online_mask);
- else
- is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
+ if (!msdata->active_cpus) {
+ cpumask = cpu_online_mask;
+ is_active = cpu == cpumask_first(cpumask);
+ } else {
+ cpumask = msdata->active_cpus;
+ is_active = cpumask_test_cpu(cpu, cpumask);
+ }
/* Simple state machine */
do {
/* Chill out and ensure we re-read multi_stop_state. */
- cpu_relax_yield();
+ stop_machine_yield(cpumask);
if (msdata->state != curstate) {
curstate = msdata->state;
switch (curstate) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1beca96fb625..1c1ad1e14f21 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -452,6 +452,22 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rr_handler,
},
+#ifdef CONFIG_UCLAMP_TASK
+ {
+ .procname = "sched_util_clamp_min",
+ .data = &sysctl_sched_uclamp_util_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+ {
+ .procname = "sched_util_clamp_max",
+ .data = &sysctl_sched_uclamp_util_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sysctl_sched_uclamp_handler,
+ },
+#endif
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f1e46f338a9c..1867044800bb 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -16,5 +16,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
+obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0519a8805aab..57518efc3810 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -233,7 +233,6 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
- * @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3bcc19ceb073..fff5f64981c6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -105,12 +105,12 @@ static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
-static void inline clocksource_watchdog_lock(unsigned long *flags)
+static inline void clocksource_watchdog_lock(unsigned long *flags)
{
spin_lock_irqsave(&watchdog_lock, *flags);
}
-static void inline clocksource_watchdog_unlock(unsigned long *flags)
+static inline void clocksource_watchdog_unlock(unsigned long *flags)
{
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff23c1f9..5ee77f1a8a92 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -30,7 +30,6 @@
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
-#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched/signal.h>
@@ -1115,9 +1114,10 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
* @timer: hrtimer to stop
*
* Returns:
- * 0 when the timer was not active
- * 1 when the timer was active
- * -1 when the timer is currently executing the callback function and
+ *
+ * * 0 when the timer was not active
+ * * 1 when the timer was active
+ * * -1 when the timer is currently executing the callback function and
* cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8de4f789dc1b..65eb796610dc 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -43,6 +43,7 @@ static u64 tick_length_base;
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+#define MAX_TAI_OFFSET 100000
/*
* phase-lock loop variables
@@ -691,7 +692,8 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant >= 0)
+ if (txc->modes & ADJ_TAI &&
+ txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 29176635991f..d7f2d91acdac 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -980,23 +980,16 @@ retry_delete:
*/
static void itimer_delete(struct k_itimer *timer)
{
- unsigned long flags;
-
retry_delete:
- spin_lock_irqsave(&timer->it_lock, flags);
+ spin_lock_irq(&timer->it_lock);
if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
goto retry_delete;
}
list_del(&timer->list);
- /*
- * This keeps any tasks waiting on the spin lock from thinking
- * they got something (see the lock code above).
- */
- timer->it_signal = NULL;
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
release_posix_timer(timer, IT_ID_SET);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f4ee1a3428ae..be9707f68024 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -782,7 +782,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
*/
if (!ts->tick_stopped) {
calc_load_nohz_start();
- cpu_load_update_nohz_start();
quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -829,7 +828,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- cpu_load_update_nohz_stop();
/*
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 7f7d6914ddd5..5c54ca632d08 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -251,6 +251,10 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
if (tv) {
if (compat_get_timeval(&user_tv, tv))
return -EFAULT;
+
+ if (!timeval_valid(&user_tv))
+ return -EINVAL;
+
new_ts.tv_sec = user_tv.tv_sec;
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b726bab4bd..d911c8470149 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -819,7 +819,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
} while (read_seqcount_retry(&tk_core.seq, seq));
- return base + nsecs;
+ return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 98ba50dcb1b2..acb326f5f50a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -282,23 +282,6 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
SEQ_printf(m, "\n");
}
-static int timer_list_show(struct seq_file *m, void *v)
-{
- struct timer_list_iter *iter = v;
-
- if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, iter->now);
- else if (!iter->second_pass)
- print_cpu(m, iter->cpu, iter->now);
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
- else if (iter->cpu == -1 && iter->second_pass)
- timer_list_show_tickdevices_header(m);
- else
- print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
-#endif
- return 0;
-}
-
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
@@ -317,6 +300,24 @@ void sysrq_timer_list_show(void)
return;
}
+#ifdef CONFIG_PROC_FS
+static int timer_list_show(struct seq_file *m, void *v)
+{
+ struct timer_list_iter *iter = v;
+
+ if (iter->cpu == -1 && !iter->second_pass)
+ timer_list_header(m, iter->now);
+ else if (!iter->second_pass)
+ print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ else if (iter->cpu == -1 && iter->second_pass)
+ timer_list_show_tickdevices_header(m);
+ else
+ print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+ return 0;
+}
+
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
@@ -376,3 +377,4 @@ static int __init init_timer_list_procfs(void)
return 0;
}
__initcall(init_timer_list_procfs);
+#endif
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
new file mode 100644
index 000000000000..a80893180826
--- /dev/null
+++ b/kernel/time/vsyscall.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 ARM Ltd.
+ *
+ * Generic implementation of update_vsyscall and update_vsyscall_tz.
+ *
+ * Based on the x86 specific implementation.
+ */
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
+
+static inline void update_vdso_data(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+ vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
+ vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
+ vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
+ vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+ vdata[CS_RAW].mask = tk->tkr_raw.mask;
+ vdata[CS_RAW].mult = tk->tkr_raw.mult;
+ vdata[CS_RAW].shift = tk->tkr_raw.shift;
+
+ /* CLOCK_REALTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /* CLOCK_MONOTONIC */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
+ /* CLOCK_BOOTTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
+ ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_TAI */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
+ vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /*
+ * Read without the seqlock held by clock_getres().
+ * Note: No need to have a second copy.
+ */
+ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ if (__arch_update_vdso_data()) {
+ /*
+ * Some architectures might want to skip the update of the
+ * data page.
+ */
+ return;
+ }
+
+ /* copy vsyscall data */
+ vdso_write_begin(vdata);
+
+ vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
+ vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
+
+ /* CLOCK_REALTIME_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+
+ /* CLOCK_MONOTONIC_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec = nsec + tk->wall_to_monotonic.tv_nsec;
+ while (nsec >= NSEC_PER_SEC) {
+ nsec = nsec - NSEC_PER_SEC;
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ if (__arch_use_vsyscall(vdata))
+ update_vdso_data(vdata, tk);
+
+ __arch_update_vsyscall(vdata, tk);
+
+ vdso_write_end(vdata);
+
+ __arch_sync_vdso_data(vdata);
+}
+
+void update_vsyscall_tz(void)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+
+ if (__arch_use_vsyscall(vdata)) {
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
+ }
+
+ __arch_sync_vdso_data(vdata);
+}
diff --git a/kernel/torture.c b/kernel/torture.c
index 17b2be9bde12..a8d9bdfba7c3 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -570,6 +570,7 @@ static void torture_shutdown_cleanup(void)
static struct task_struct *stutter_task;
static int stutter_pause_test;
static int stutter;
+static int stutter_gap;
/*
* Block until the stutter interval ends. This must be called periodically
@@ -578,10 +579,12 @@ static int stutter;
bool stutter_wait(const char *title)
{
int spt;
+ bool ret = false;
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
+ ret = true;
if (spt == 1) {
schedule_timeout_interruptible(1);
} else if (spt == 2) {
@@ -592,7 +595,7 @@ bool stutter_wait(const char *title)
}
torture_shutdown_absorb(title);
}
- return !!spt;
+ return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
@@ -602,17 +605,24 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
+ int wtime;
+
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
- WRITE_ONCE(stutter_pause_test, 1);
- schedule_timeout_interruptible(stutter - 1);
+ wtime = stutter;
+ if (stutter > HZ + 1) {
+ WRITE_ONCE(stutter_pause_test, 1);
+ wtime = stutter - HZ - 1;
+ schedule_timeout_interruptible(wtime);
+ wtime = HZ + 1;
+ }
WRITE_ONCE(stutter_pause_test, 2);
- schedule_timeout_interruptible(1);
+ schedule_timeout_interruptible(wtime);
}
WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
- schedule_timeout_interruptible(stutter);
+ schedule_timeout_interruptible(stutter_gap);
torture_shutdown_absorb("torture_stutter");
} while (!torture_must_stop());
torture_kthread_stopping("torture_stutter");
@@ -622,9 +632,10 @@ static int torture_stutter(void *arg)
/*
* Initialize and kick off the torture_stutter kthread.
*/
-int torture_stutter_init(const int s)
+int torture_stutter_init(const int s, const int sgap)
{
stutter = s;
+ stutter_gap = sgap;
return torture_create_kthread(torture_stutter, NULL, stutter_task);
}
EXPORT_SYMBOL_GPL(torture_stutter_init);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..576c41644e77 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,7 +34,6 @@
#include <linux/hash.h>
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
-#include <linux/memory.h>
#include <trace/events/sched.h>
@@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command)
{
int ret;
- mutex_lock(&text_mutex);
-
ret = ftrace_arch_code_modify_prepare();
FTRACE_WARN_ON(ret);
if (ret)
- goto out_unlock;
+ return;
/*
* By default we use stop_machine() to modify the code.
@@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command)
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
-
-out_unlock:
- mutex_unlock(&text_mutex);
}
static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod)
struct ftrace_page *pg;
mutex_lock(&ftrace_lock);
- mutex_lock(&text_mutex);
if (ftrace_disabled)
goto out_unlock;
@@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod)
ftrace_arch_code_modify_post_process();
out_unlock:
- mutex_unlock(&text_mutex);
mutex_unlock(&ftrace_lock);
process_cached_mods(mod->name);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 83e08b78dbee..c3aabb576fe5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
break;
}
#endif
- if (!tr->allocated_snapshot) {
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+ &tr->trace_buffer, iter->cpu_file);
+ else
ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
local_irq_disable();
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
@@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file,
return count;
}
+static int tracing_err_log_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ return 0;
+}
+
static const struct file_operations tracing_err_log_fops = {
.open = tracing_err_log_open,
.write = tracing_err_log_write,
.read = seq_read,
.llseek = seq_lseek,
- .release = tracing_release_generic_tr,
+ .release = tracing_err_log_release,
};
static int tracing_buffers_open(struct inode *inode, struct file *filp)
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 1e6db9cbe4dc..fa95139445b2 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -277,7 +277,7 @@ static void move_to_next_cpu(void)
* of this thread, than stop migrating for the duration
* of the current test.
*/
- if (!cpumask_equal(current_mask, &current->cpus_allowed))
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
goto disable;
get_online_cpus();
diff --git a/kernel/up.c b/kernel/up.c
index 483c9962c999..862b460ab97a 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
}
EXPORT_SYMBOL(smp_call_function_single_async);
-int on_each_cpu(smp_call_func_t func, void *info, int wait)
+void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
- return 0;
}
EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/user.c b/kernel/user.c
index 78b17e36e705..5235d7f49982 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -63,9 +63,9 @@ struct user_namespace init_user_ns = {
.ns.ops = &userns_operations,
#endif
.flags = USERNS_INIT_FLAGS,
-#ifdef CONFIG_PERSISTENT_KEYRINGS
- .persistent_keyring_register_sem =
- __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
+#ifdef CONFIG_KEYS
+ .keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
+ .keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);
@@ -141,8 +141,6 @@ static void free_user(struct user_struct *up, unsigned long flags)
{
uid_hash_remove(up);
spin_unlock_irqrestore(&uidhash_lock, flags);
- key_put(up->uid_keyring);
- key_put(up->session_keyring);
kmem_cache_free(uid_cachep, up);
}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0eff45ce7703..8eadadc478f9 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -128,8 +128,9 @@ int create_user_ns(struct cred *new)
ns->flags = parent_ns->flags;
mutex_unlock(&userns_state_mutex);
-#ifdef CONFIG_PERSISTENT_KEYRINGS
- init_rwsem(&ns->persistent_keyring_register_sem);
+#ifdef CONFIG_KEYS
+ INIT_LIST_HEAD(&ns->keyring_name_list);
+ init_rwsem(&ns->keyring_sem);
#endif
ret = -ENOMEM;
if (!setup_userns_sysctls(ns))
@@ -191,9 +192,7 @@ static void free_user_ns(struct work_struct *work)
kfree(ns->projid_map.reverse);
}
retire_userns_sysctls(ns);
-#ifdef CONFIG_PERSISTENT_KEYRINGS
- key_put(ns->persistent_keyring_register);
-#endif
+ key_free_user_ns(ns);
ns_free_inum(&ns->ns);
kmem_cache_free(user_ns_cachep, ns);
dec_user_namespaces(ucounts);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 95aea04ff722..601d61150b65 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3329,7 +3329,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context);
*
* Undo alloc_workqueue_attrs().
*/
-void free_workqueue_attrs(struct workqueue_attrs *attrs)
+static void free_workqueue_attrs(struct workqueue_attrs *attrs)
{
if (attrs) {
free_cpumask_var(attrs->cpumask);
@@ -3339,21 +3339,20 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs)
/**
* alloc_workqueue_attrs - allocate a workqueue_attrs
- * @gfp_mask: allocation mask to use
*
* Allocate a new workqueue_attrs, initialize with default settings and
* return it.
*
* Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+static struct workqueue_attrs *alloc_workqueue_attrs(void)
{
struct workqueue_attrs *attrs;
- attrs = kzalloc(sizeof(*attrs), gfp_mask);
+ attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
if (!attrs)
goto fail;
- if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+ if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
goto fail;
cpumask_copy(attrs->cpumask, cpu_possible_mask);
@@ -3431,7 +3430,7 @@ static int init_worker_pool(struct worker_pool *pool)
pool->refcnt = 1;
/* shouldn't fail above this point */
- pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ pool->attrs = alloc_workqueue_attrs();
if (!pool->attrs)
return -ENOMEM;
return 0;
@@ -3896,8 +3895,8 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
- new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
- tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ new_attrs = alloc_workqueue_attrs();
+ tmp_attrs = alloc_workqueue_attrs();
if (!ctx || !new_attrs || !tmp_attrs)
goto out_free;
@@ -4033,7 +4032,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
*
* Return: 0 on success and -errno on failure.
*/
-int apply_workqueue_attrs(struct workqueue_struct *wq,
+static int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs)
{
int ret;
@@ -4044,7 +4043,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
return ret;
}
-EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/**
* wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
@@ -4242,7 +4240,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return NULL;
if (flags & WQ_UNBOUND) {
- wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ wq->unbound_attrs = alloc_workqueue_attrs();
if (!wq->unbound_attrs)
goto err_free_wq;
}
@@ -5395,7 +5393,7 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
lockdep_assert_held(&wq_pool_mutex);
- attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ attrs = alloc_workqueue_attrs();
if (!attrs)
return NULL;
@@ -5817,7 +5815,7 @@ static void __init wq_numa_init(void)
return;
}
- wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
+ wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
BUG_ON(!wq_update_unbound_numa_attrs_buf);
/*
@@ -5892,7 +5890,7 @@ int __init workqueue_init_early(void)
for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
struct workqueue_attrs *attrs;
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
attrs->nice = std_nice[i];
unbound_std_wq_attrs[i] = attrs;
@@ -5901,7 +5899,7 @@ int __init workqueue_init_early(void)
* guaranteed by max_active which is enforced by pwqs.
* Turn off NUMA so that dfl_pwq is used for all nodes.
*/
- BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ BUG_ON(!(attrs = alloc_workqueue_attrs()));
attrs->nice = std_nice[i];
attrs->no_numa = true;
ordered_wq_attrs[i] = attrs;
diff --git a/lib/Kconfig b/lib/Kconfig
index 90623a0e1942..8c8eefc5e54c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -576,6 +576,11 @@ config OID_REGISTRY
config UCS2_STRING
tristate
+#
+# generic vdso
+#
+source "lib/vdso/Kconfig"
+
source "lib/fonts/Kconfig"
config SG_SPLIT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cbdfae379896..06d9c9d70385 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1095,7 +1095,7 @@ config PROVE_LOCKING
select DEBUG_SPINLOCK
select DEBUG_MUTEXES
select DEBUG_RT_MUTEXES if RT_MUTEXES
- select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+ select DEBUG_RWSEMS
select DEBUG_WW_MUTEX_SLOWPATH
select DEBUG_LOCK_ALLOC
select TRACE_IRQFLAGS
@@ -1199,10 +1199,10 @@ config DEBUG_WW_MUTEX_SLOWPATH
config DEBUG_RWSEMS
bool "RW Semaphore debugging: basic checks"
- depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+ depends on DEBUG_KERNEL
help
- This debugging feature allows mismatched rw semaphore locks and unlocks
- to be detected and reported.
+ This debugging feature allows mismatched rw semaphore locks
+ and unlocks to be detected and reported.
config DEBUG_LOCK_ALLOC
bool "Lock debugging: detect incorrect freeing of live locks"
@@ -1754,6 +1754,18 @@ config RBTREE_TEST
A benchmark measuring the performance of the rbtree library.
Also includes rbtree invariant checks.
+config REED_SOLOMON_TEST
+ tristate "Reed-Solomon library test"
+ depends on DEBUG_KERNEL || m
+ select REED_SOLOMON
+ select REED_SOLOMON_ENC16
+ select REED_SOLOMON_DEC16
+ help
+ This option enables the self-test function of rslib at boot,
+ or at module load time.
+
+ If unsure, say N.
+
config INTERVAL_TREE_TEST
tristate "Interval tree test"
depends on DEBUG_KERNEL
@@ -1858,6 +1870,14 @@ config TEST_PARMAN
If unsure, say N.
+config TEST_IRQ_TIMINGS
+ bool "IRQ timings selftest"
+ depends on IRQ_TIMINGS
+ help
+ Enable this option to test the irq timings code on boot.
+
+ If unsure, say N.
+
config TEST_LKM
tristate "Test module loading with 'hello world' module"
depends on m
diff --git a/lib/Makefile b/lib/Makefile
index fb7697031a79..d3daedf93c5a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -102,7 +102,7 @@ endif
obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o
CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any)
-obj-y += math/
+obj-y += math/ crypto/
obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 7e6905751522..e98c85a99787 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -42,11 +42,11 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
}
-long long atomic64_read(const atomic64_t *v)
+s64 atomic64_read(const atomic64_t *v)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -55,7 +55,7 @@ long long atomic64_read(const atomic64_t *v)
}
EXPORT_SYMBOL(atomic64_read);
-void atomic64_set(atomic64_t *v, long long i)
+void atomic64_set(atomic64_t *v, s64 i)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
@@ -67,7 +67,7 @@ void atomic64_set(atomic64_t *v, long long i)
EXPORT_SYMBOL(atomic64_set);
#define ATOMIC64_OP(op, c_op) \
-void atomic64_##op(long long a, atomic64_t *v) \
+void atomic64_##op(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
@@ -79,11 +79,11 @@ void atomic64_##op(long long a, atomic64_t *v) \
EXPORT_SYMBOL(atomic64_##op);
#define ATOMIC64_OP_RETURN(op, c_op) \
-long long atomic64_##op##_return(long long a, atomic64_t *v) \
+s64 atomic64_##op##_return(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
- long long val; \
+ s64 val; \
\
raw_spin_lock_irqsave(lock, flags); \
val = (v->counter c_op a); \
@@ -93,11 +93,11 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
EXPORT_SYMBOL(atomic64_##op##_return);
#define ATOMIC64_FETCH_OP(op, c_op) \
-long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \
{ \
unsigned long flags; \
raw_spinlock_t *lock = lock_addr(v); \
- long long val; \
+ s64 val; \
\
raw_spin_lock_irqsave(lock, flags); \
val = v->counter; \
@@ -130,11 +130,11 @@ ATOMIC64_OPS(xor, ^=)
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
-long long atomic64_dec_if_positive(atomic64_t *v)
+s64 atomic64_dec_if_positive(atomic64_t *v)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter - 1;
@@ -145,11 +145,11 @@ long long atomic64_dec_if_positive(atomic64_t *v)
}
EXPORT_SYMBOL(atomic64_dec_if_positive);
-long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -160,11 +160,11 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
}
EXPORT_SYMBOL(atomic64_cmpxchg);
-long long atomic64_xchg(atomic64_t *v, long long new)
+s64 atomic64_xchg(atomic64_t *v, s64 new)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
@@ -174,11 +174,11 @@ long long atomic64_xchg(atomic64_t *v, long long new)
}
EXPORT_SYMBOL(atomic64_xchg);
-long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
+s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned long flags;
raw_spinlock_t *lock = lock_addr(v);
- long long val;
+ s64 val;
raw_spin_lock_irqsave(lock, flags);
val = v->counter;
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
new file mode 100644
index 000000000000..88195c34932d
--- /dev/null
+++ b/lib/crypto/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
+libarc4-y := arc4.o
diff --git a/lib/crypto/arc4.c b/lib/crypto/arc4.c
new file mode 100644
index 000000000000..c2020f19c652
--- /dev/null
+++ b/lib/crypto/arc4.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API
+ *
+ * ARC4 Cipher Algorithm
+ *
+ * Jon Oberheide <jon@oberheide.org>
+ */
+
+#include <crypto/arc4.h>
+#include <linux/module.h>
+
+int arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
+{
+ int i, j = 0, k = 0;
+
+ ctx->x = 1;
+ ctx->y = 0;
+
+ for (i = 0; i < 256; i++)
+ ctx->S[i] = i;
+
+ for (i = 0; i < 256; i++) {
+ u32 a = ctx->S[i];
+
+ j = (j + in_key[k] + a) & 0xff;
+ ctx->S[i] = ctx->S[j];
+ ctx->S[j] = a;
+ if (++k >= key_len)
+ k = 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(arc4_setkey);
+
+void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
+{
+ u32 *const S = ctx->S;
+ u32 x, y, a, b;
+ u32 ty, ta, tb;
+
+ if (len == 0)
+ return;
+
+ x = ctx->x;
+ y = ctx->y;
+
+ a = S[x];
+ y = (y + a) & 0xff;
+ b = S[y];
+
+ do {
+ S[y] = a;
+ a = (a + b) & 0xff;
+ S[x] = b;
+ x = (x + 1) & 0xff;
+ ta = S[x];
+ ty = (y + ta) & 0xff;
+ tb = S[ty];
+ *out++ = *in++ ^ S[a];
+ if (--len == 0)
+ break;
+ y = ty;
+ a = ta;
+ b = tb;
+ } while (true);
+
+ ctx->x = x;
+ ctx->y = y;
+}
+EXPORT_SYMBOL(arc4_crypt);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 55437fd5128b..61261195f5b6 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -25,16 +25,37 @@
#define ODEBUG_POOL_SIZE 1024
#define ODEBUG_POOL_MIN_LEVEL 256
+#define ODEBUG_POOL_PERCPU_SIZE 64
+#define ODEBUG_BATCH_SIZE 16
#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1))
+/*
+ * We limit the freeing of debug objects via workqueue at a maximum
+ * frequency of 10Hz and about 1024 objects for each freeing operation.
+ * So it is freeing at most 10k debug objects per second.
+ */
+#define ODEBUG_FREE_WORK_MAX 1024
+#define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10)
+
struct debug_bucket {
struct hlist_head list;
raw_spinlock_t lock;
};
+/*
+ * Debug object percpu free list
+ * Access is protected by disabling irq
+ */
+struct debug_percpu_free {
+ struct hlist_head free_objs;
+ int obj_free;
+};
+
+static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
+
static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
@@ -44,13 +65,20 @@ static DEFINE_RAW_SPINLOCK(pool_lock);
static HLIST_HEAD(obj_pool);
static HLIST_HEAD(obj_to_free);
+/*
+ * Because of the presence of percpu free pools, obj_pool_free will
+ * under-count those in the percpu free pools. Similarly, obj_pool_used
+ * will over-count those in the percpu free pools. Adjustments will be
+ * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
+ * can be off.
+ */
static int obj_pool_min_free = ODEBUG_POOL_SIZE;
static int obj_pool_free = ODEBUG_POOL_SIZE;
static int obj_pool_used;
static int obj_pool_max_used;
+static bool obj_freeing;
/* The number of objs on the global free list */
static int obj_nr_tofree;
-static struct kmem_cache *obj_cache;
static int debug_objects_maxchain __read_mostly;
static int __maybe_unused debug_objects_maxchecked __read_mostly;
@@ -63,6 +91,7 @@ static int debug_objects_pool_size __read_mostly
static int debug_objects_pool_min_level __read_mostly
= ODEBUG_POOL_MIN_LEVEL;
static struct debug_obj_descr *descr_test __read_mostly;
+static struct kmem_cache *obj_cache __read_mostly;
/*
* Track numbers of kmem_cache_alloc()/free() calls done.
@@ -71,7 +100,7 @@ static int debug_objects_allocated;
static int debug_objects_freed;
static void free_obj_work(struct work_struct *work);
-static DECLARE_WORK(debug_obj_work, free_obj_work);
+static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
static int __init enable_object_debug(char *str)
{
@@ -100,7 +129,7 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
static void fill_pool(void)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- struct debug_obj *new, *obj;
+ struct debug_obj *obj;
unsigned long flags;
if (likely(obj_pool_free >= debug_objects_pool_min_level))
@@ -116,7 +145,7 @@ static void fill_pool(void)
* Recheck with the lock held as the worker thread might have
* won the race and freed the global free list already.
*/
- if (obj_nr_tofree) {
+ while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
hlist_del(&obj->node);
obj_nr_tofree--;
@@ -130,15 +159,23 @@ static void fill_pool(void)
return;
while (obj_pool_free < debug_objects_pool_min_level) {
+ struct debug_obj *new[ODEBUG_BATCH_SIZE];
+ int cnt;
- new = kmem_cache_zalloc(obj_cache, gfp);
- if (!new)
+ for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+ new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
+ if (!new[cnt])
+ break;
+ }
+ if (!cnt)
return;
raw_spin_lock_irqsave(&pool_lock, flags);
- hlist_add_head(&new->node, &obj_pool);
- debug_objects_allocated++;
- obj_pool_free++;
+ while (cnt) {
+ hlist_add_head(&new[--cnt]->node, &obj_pool);
+ debug_objects_allocated++;
+ obj_pool_free++;
+ }
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
}
@@ -163,36 +200,81 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
}
/*
+ * Allocate a new object from the hlist
+ */
+static struct debug_obj *__alloc_object(struct hlist_head *list)
+{
+ struct debug_obj *obj = NULL;
+
+ if (list->first) {
+ obj = hlist_entry(list->first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ }
+
+ return obj;
+}
+
+/*
* Allocate a new object. If the pool is empty, switch off the debugger.
* Must be called with interrupts disabled.
*/
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
{
- struct debug_obj *obj = NULL;
+ struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
+ struct debug_obj *obj;
- raw_spin_lock(&pool_lock);
- if (obj_pool.first) {
- obj = hlist_entry(obj_pool.first, typeof(*obj), node);
+ if (likely(obj_cache)) {
+ obj = __alloc_object(&percpu_pool->free_objs);
+ if (obj) {
+ percpu_pool->obj_free--;
+ goto init_obj;
+ }
+ }
- obj->object = addr;
- obj->descr = descr;
- obj->state = ODEBUG_STATE_NONE;
- obj->astate = 0;
- hlist_del(&obj->node);
+ raw_spin_lock(&pool_lock);
+ obj = __alloc_object(&obj_pool);
+ if (obj) {
+ obj_pool_used++;
+ obj_pool_free--;
- hlist_add_head(&obj->node, &b->list);
+ /*
+ * Looking ahead, allocate one batch of debug objects and
+ * put them into the percpu free pool.
+ */
+ if (likely(obj_cache)) {
+ int i;
+
+ for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
+ struct debug_obj *obj2;
+
+ obj2 = __alloc_object(&obj_pool);
+ if (!obj2)
+ break;
+ hlist_add_head(&obj2->node,
+ &percpu_pool->free_objs);
+ percpu_pool->obj_free++;
+ obj_pool_used++;
+ obj_pool_free--;
+ }
+ }
- obj_pool_used++;
if (obj_pool_used > obj_pool_max_used)
obj_pool_max_used = obj_pool_used;
- obj_pool_free--;
if (obj_pool_free < obj_pool_min_free)
obj_pool_min_free = obj_pool_free;
}
raw_spin_unlock(&pool_lock);
+init_obj:
+ if (obj) {
+ obj->object = addr;
+ obj->descr = descr;
+ obj->state = ODEBUG_STATE_NONE;
+ obj->astate = 0;
+ hlist_add_head(&obj->node, &b->list);
+ }
return obj;
}
@@ -209,13 +291,19 @@ static void free_obj_work(struct work_struct *work)
unsigned long flags;
HLIST_HEAD(tofree);
+ WRITE_ONCE(obj_freeing, false);
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
return;
+ if (obj_pool_free >= debug_objects_pool_size)
+ goto free_objs;
+
/*
* The objs on the pool list might be allocated before the work is
* run, so recheck if pool list it full or not, if not fill pool
- * list from the global free list
+ * list from the global free list. As it is likely that a workload
+ * may be gearing up to use more and more objects, don't free any
+ * of them until the next round.
*/
while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
@@ -224,7 +312,10 @@ static void free_obj_work(struct work_struct *work)
obj_pool_free++;
obj_nr_tofree--;
}
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ return;
+free_objs:
/*
* Pool list is already full and there are still objs on the free
* list. Move remaining free objs to a temporary list to free the
@@ -243,24 +334,86 @@ static void free_obj_work(struct work_struct *work)
}
}
-static bool __free_object(struct debug_obj *obj)
+static void __free_object(struct debug_obj *obj)
{
+ struct debug_obj *objs[ODEBUG_BATCH_SIZE];
+ struct debug_percpu_free *percpu_pool;
+ int lookahead_count = 0;
unsigned long flags;
bool work;
- raw_spin_lock_irqsave(&pool_lock, flags);
- work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
+ local_irq_save(flags);
+ if (!obj_cache)
+ goto free_to_obj_pool;
+
+ /*
+ * Try to free it into the percpu pool first.
+ */
+ percpu_pool = this_cpu_ptr(&percpu_obj_pool);
+ if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
+ hlist_add_head(&obj->node, &percpu_pool->free_objs);
+ percpu_pool->obj_free++;
+ local_irq_restore(flags);
+ return;
+ }
+
+ /*
+ * As the percpu pool is full, look ahead and pull out a batch
+ * of objects from the percpu pool and free them as well.
+ */
+ for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
+ objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
+ if (!objs[lookahead_count])
+ break;
+ percpu_pool->obj_free--;
+ }
+
+free_to_obj_pool:
+ raw_spin_lock(&pool_lock);
+ work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
+ (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
obj_pool_used--;
if (work) {
obj_nr_tofree++;
hlist_add_head(&obj->node, &obj_to_free);
+ if (lookahead_count) {
+ obj_nr_tofree += lookahead_count;
+ obj_pool_used -= lookahead_count;
+ while (lookahead_count) {
+ hlist_add_head(&objs[--lookahead_count]->node,
+ &obj_to_free);
+ }
+ }
+
+ if ((obj_pool_free > debug_objects_pool_size) &&
+ (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
+ int i;
+
+ /*
+ * Free one more batch of objects from obj_pool.
+ */
+ for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
+ obj = __alloc_object(&obj_pool);
+ hlist_add_head(&obj->node, &obj_to_free);
+ obj_pool_free--;
+ obj_nr_tofree++;
+ }
+ }
} else {
obj_pool_free++;
hlist_add_head(&obj->node, &obj_pool);
+ if (lookahead_count) {
+ obj_pool_free += lookahead_count;
+ obj_pool_used -= lookahead_count;
+ while (lookahead_count) {
+ hlist_add_head(&objs[--lookahead_count]->node,
+ &obj_pool);
+ }
+ }
}
- raw_spin_unlock_irqrestore(&pool_lock, flags);
- return work;
+ raw_spin_unlock(&pool_lock);
+ local_irq_restore(flags);
}
/*
@@ -269,8 +422,11 @@ static bool __free_object(struct debug_obj *obj)
*/
static void free_object(struct debug_obj *obj)
{
- if (__free_object(obj))
- schedule_work(&debug_obj_work);
+ __free_object(obj);
+ if (!obj_freeing && obj_nr_tofree) {
+ WRITE_ONCE(obj_freeing, true);
+ schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
+ }
}
/*
@@ -372,6 +528,7 @@ static void
__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
+ bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -391,7 +548,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
debug_objects_oom();
return;
}
- debug_object_is_on_stack(addr, onstack);
+ check_stack = true;
}
switch (obj->state) {
@@ -402,20 +559,23 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "init");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "init");
debug_object_fixup(descr->fixup_init, addr, state);
return;
case ODEBUG_STATE_DESTROYED:
+ raw_spin_unlock_irqrestore(&db->lock, flags);
debug_print_object(obj, "init");
- break;
+ return;
default:
break;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (check_stack)
+ debug_object_is_on_stack(addr, onstack);
}
/**
@@ -473,6 +633,8 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
obj = lookup_object(addr, db);
if (obj) {
+ bool print_object = false;
+
switch (obj->state) {
case ODEBUG_STATE_INIT:
case ODEBUG_STATE_INACTIVE:
@@ -481,14 +643,14 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "activate");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "activate");
ret = debug_object_fixup(descr->fixup_activate, addr, state);
return ret ? 0 : -EINVAL;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "activate");
+ print_object = true;
ret = -EINVAL;
break;
default:
@@ -496,10 +658,13 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
break;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (print_object)
+ debug_print_object(obj, "activate");
return ret;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+
/*
* We are here when a static object is activated. We
* let the type specific code confirm whether this is
@@ -531,6 +696,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
@@ -548,24 +714,27 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
if (!obj->astate)
obj->state = ODEBUG_STATE_INACTIVE;
else
- debug_print_object(obj, "deactivate");
+ print_object = true;
break;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "deactivate");
+ print_object = true;
break;
default:
break;
}
- } else {
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (!obj) {
struct debug_obj o = { .object = addr,
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };
debug_print_object(&o, "deactivate");
+ } else if (print_object) {
+ debug_print_object(obj, "deactivate");
}
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
}
EXPORT_SYMBOL_GPL(debug_object_deactivate);
@@ -580,6 +749,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
@@ -599,20 +769,22 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
obj->state = ODEBUG_STATE_DESTROYED;
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "destroy");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "destroy");
debug_object_fixup(descr->fixup_destroy, addr, state);
return;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "destroy");
+ print_object = true;
break;
default:
break;
}
out_unlock:
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (print_object)
+ debug_print_object(obj, "destroy");
}
EXPORT_SYMBOL_GPL(debug_object_destroy);
@@ -641,9 +813,9 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
switch (obj->state) {
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "free");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "free");
debug_object_fixup(descr->fixup_free, addr, state);
return;
default:
@@ -716,6 +888,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
@@ -731,22 +904,25 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
if (obj->astate == expect)
obj->astate = next;
else
- debug_print_object(obj, "active_state");
+ print_object = true;
break;
default:
- debug_print_object(obj, "active_state");
+ print_object = true;
break;
}
- } else {
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (!obj) {
struct debug_obj o = { .object = addr,
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };
debug_print_object(&o, "active_state");
+ } else if (print_object) {
+ debug_print_object(obj, "active_state");
}
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
}
EXPORT_SYMBOL_GPL(debug_object_active_state);
@@ -760,7 +936,6 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
struct hlist_node *tmp;
struct debug_obj *obj;
int cnt, objs_checked = 0;
- bool work = false;
saddr = (unsigned long) address;
eaddr = saddr + size;
@@ -782,16 +957,16 @@ repeat:
switch (obj->state) {
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "free");
descr = obj->descr;
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "free");
debug_object_fixup(descr->fixup_free,
(void *) oaddr, state);
goto repeat;
default:
hlist_del(&obj->node);
- work |= __free_object(obj);
+ __free_object(obj);
break;
}
}
@@ -807,8 +982,10 @@ repeat:
debug_objects_maxchecked = objs_checked;
/* Schedule work to actually kmem_cache_free() objects */
- if (work)
- schedule_work(&debug_obj_work);
+ if (!obj_freeing && obj_nr_tofree) {
+ WRITE_ONCE(obj_freeing, true);
+ schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
+ }
}
void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -822,13 +999,19 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
static int debug_stats_show(struct seq_file *m, void *v)
{
+ int cpu, obj_percpu_free = 0;
+
+ for_each_possible_cpu(cpu)
+ obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
+
seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
seq_printf(m, "warnings :%d\n", debug_objects_warnings);
seq_printf(m, "fixups :%d\n", debug_objects_fixups);
- seq_printf(m, "pool_free :%d\n", obj_pool_free);
+ seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free);
+ seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
- seq_printf(m, "pool_used :%d\n", obj_pool_used);
+ seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free);
seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
seq_printf(m, "on_free_list :%d\n", obj_nr_tofree);
seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
@@ -850,26 +1033,16 @@ static const struct file_operations debug_stats_fops = {
static int __init debug_objects_init_debugfs(void)
{
- struct dentry *dbgdir, *dbgstats;
+ struct dentry *dbgdir;
if (!debug_objects_enabled)
return 0;
dbgdir = debugfs_create_dir("debug_objects", NULL);
- if (!dbgdir)
- return -ENOMEM;
- dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL,
- &debug_stats_fops);
- if (!dbgstats)
- goto err;
+ debugfs_create_file("stats", 0444, dbgdir, NULL, &debug_stats_fops);
return 0;
-
-err:
- debugfs_remove(dbgdir);
-
- return -ENOMEM;
}
__initcall(debug_objects_init_debugfs);
@@ -1175,9 +1348,20 @@ free:
*/
void __init debug_objects_mem_init(void)
{
+ int cpu, extras;
+
if (!debug_objects_enabled)
return;
+ /*
+ * Initialize the percpu object pools
+ *
+ * Initialization is not strictly necessary, but was done for
+ * completeness.
+ */
+ for_each_possible_cpu(cpu)
+ INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
+
obj_cache = kmem_cache_create("debug_objects_cache",
sizeof (struct debug_obj), 0,
SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
@@ -1194,6 +1378,7 @@ void __init debug_objects_mem_init(void)
* Increase the thresholds for allocating and freeing objects
* according to the number of possible CPUs available in the system.
*/
- debug_objects_pool_size += num_possible_cpus() * 32;
- debug_objects_pool_min_level += num_possible_cpus() * 4;
+ extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
+ debug_objects_pool_size += extras;
+ debug_objects_pool_min_level += extras;
}
diff --git a/lib/devres.c b/lib/devres.c
index 69bed2f38306..6a0e9bd6524a 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -131,7 +131,8 @@ EXPORT_SYMBOL(devm_iounmap);
* if (IS_ERR(base))
* return PTR_ERR(base);
*/
-void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
+void __iomem *devm_ioremap_resource(struct device *dev,
+ const struct resource *res)
{
resource_size_t size;
void __iomem *dest_ptr;
diff --git a/lib/digsig.c b/lib/digsig.c
index 3cf89c775ab2..ab0800f98eaf 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -218,13 +218,13 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen,
/* search in specific keyring */
key_ref_t kref;
kref = keyring_search(make_key_ref(keyring, 1UL),
- &key_type_user, name);
+ &key_type_user, name, true);
if (IS_ERR(kref))
key = ERR_CAST(kref);
else
key = key_ref_to_ptr(kref);
} else {
- key = request_key(&key_type_user, name, NULL);
+ key = request_key(&key_type_user, name, NULL, NULL);
}
if (IS_ERR(key)) {
pr_err("key not found, id: %s\n", name);
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
index 82b19e4f1189..2fd7a46d55ec 100644
--- a/lib/mpi/mpi-pow.c
+++ b/lib/mpi/mpi-pow.c
@@ -24,6 +24,7 @@
int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
{
mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+ struct karatsuba_ctx karactx = {};
mpi_ptr_t xp_marker = NULL;
mpi_ptr_t tspace = NULL;
mpi_ptr_t rp, ep, mp, bp;
@@ -150,13 +151,11 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
- struct karatsuba_ctx karactx;
xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
if (!xp)
goto enomem;
- memset(&karactx, 0, sizeof karactx);
negative_result = (ep[0] & 1) && base->sign;
i = esize - 1;
@@ -281,8 +280,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
if (mod_shift_cnt)
mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE(rp, rsize);
-
- mpihelp_release_karatsuba_ctx(&karactx);
}
if (negative_result && rsize) {
@@ -299,6 +296,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
leave:
rc = 0;
enomem:
+ mpihelp_release_karatsuba_ctx(&karactx);
if (assign_rp)
mpi_assign_limb_space(res, rp, size);
if (mp_marker)
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index 914ebe98fc21..9e597e1f91a4 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -60,7 +60,7 @@ static inline void LOAD_DATA(int x, u8 *ptr)
typedef struct { u8 _[16 * $#]; } addrtype;
register addrtype *__ptr asm("1") = (addrtype *) ptr;
- asm volatile ("VLM %2,%3,0,%r1"
+ asm volatile ("VLM %2,%3,0,%1"
: : "m" (*__ptr), "a" (__ptr), "i" (x),
"i" (x + $# - 1));
}
diff --git a/lib/reed_solomon/Makefile b/lib/reed_solomon/Makefile
index ba9d7a3329eb..5d4fa68f26cb 100644
--- a/lib/reed_solomon/Makefile
+++ b/lib/reed_solomon/Makefile
@@ -4,4 +4,4 @@
#
obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o
-
+obj-$(CONFIG_REED_SOLOMON_TEST) += test_rslib.o
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index 1db74eb098d0..805de84ae83d 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -22,6 +22,7 @@
uint16_t *index_of = rs->index_of;
uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
int count = 0;
+ int num_corrected;
uint16_t msk = (uint16_t) rs->nn;
/*
@@ -39,11 +40,21 @@
/* Check length parameter for validity */
pad = nn - nroots - len;
- BUG_ON(pad < 0 || pad >= nn);
+ BUG_ON(pad < 0 || pad >= nn - nroots);
/* Does the caller provide the syndrome ? */
- if (s != NULL)
- goto decode;
+ if (s != NULL) {
+ for (i = 0; i < nroots; i++) {
+ /* The syndrome is in index form,
+ * so nn represents zero
+ */
+ if (s[i] != nn)
+ goto decode;
+ }
+
+ /* syndrome is zero, no errors to correct */
+ return 0;
+ }
/* form the syndromes; i.e., evaluate data(x) at roots of
* g(x) */
@@ -88,8 +99,7 @@
/* if syndrome is zero, data[] is a codeword and there are no
* errors to correct. So return data[] unmodified
*/
- count = 0;
- goto finish;
+ return 0;
}
decode:
@@ -99,9 +109,9 @@
if (no_eras > 0) {
/* Init lambda to be the erasure locator polynomial */
lambda[1] = alpha_to[rs_modnn(rs,
- prim * (nn - 1 - eras_pos[0]))];
+ prim * (nn - 1 - (eras_pos[0] + pad)))];
for (i = 1; i < no_eras; i++) {
- u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i]));
+ u = rs_modnn(rs, prim * (nn - 1 - (eras_pos[i] + pad)));
for (j = i + 1; j > 0; j--) {
tmp = index_of[lambda[j - 1]];
if (tmp != nn) {
@@ -175,6 +185,15 @@
if (lambda[i] != nn)
deg_lambda = i;
}
+
+ if (deg_lambda == 0) {
+ /*
+ * deg(lambda) is zero even though the syndrome is non-zero
+ * => uncorrectable error detected
+ */
+ return -EBADMSG;
+ }
+
/* Find roots of error+erasure locator polynomial by Chien search */
memcpy(&reg[1], &lambda[1], nroots * sizeof(reg[0]));
count = 0; /* Number of roots of lambda(x) */
@@ -188,6 +207,12 @@
}
if (q != 0)
continue; /* Not a root */
+
+ if (k < pad) {
+ /* Impossible error location. Uncorrectable error. */
+ return -EBADMSG;
+ }
+
/* store root (index-form) and error location number */
root[count] = i;
loc[count] = k;
@@ -202,8 +227,7 @@
* deg(lambda) unequal to number of roots => uncorrectable
* error detected
*/
- count = -EBADMSG;
- goto finish;
+ return -EBADMSG;
}
/*
* Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
@@ -223,7 +247,9 @@
/*
* Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
* inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form
+ * Note: we reuse the buffer for b to store the correction pattern
*/
+ num_corrected = 0;
for (j = count - 1; j >= 0; j--) {
num1 = 0;
for (i = deg_omega; i >= 0; i--) {
@@ -231,6 +257,13 @@
num1 ^= alpha_to[rs_modnn(rs, omega[i] +
i * root[j])];
}
+
+ if (num1 == 0) {
+ /* Nothing to correct at this position */
+ b[j] = 0;
+ continue;
+ }
+
num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)];
den = 0;
@@ -242,30 +275,52 @@
i * root[j])];
}
}
- /* Apply error to data */
- if (num1 != 0 && loc[j] >= pad) {
- uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] +
- index_of[num2] +
- nn - index_of[den])];
- /* Store the error correction pattern, if a
- * correction buffer is available */
- if (corr) {
- corr[j] = cor;
- } else {
- /* If a data buffer is given and the
- * error is inside the message,
- * correct it */
- if (data && (loc[j] < (nn - nroots)))
- data[loc[j] - pad] ^= cor;
- }
+
+ b[j] = alpha_to[rs_modnn(rs, index_of[num1] +
+ index_of[num2] +
+ nn - index_of[den])];
+ num_corrected++;
+ }
+
+ /*
+ * We compute the syndrome of the 'error' and check that it matches
+ * the syndrome of the received word
+ */
+ for (i = 0; i < nroots; i++) {
+ tmp = 0;
+ for (j = 0; j < count; j++) {
+ if (b[j] == 0)
+ continue;
+
+ k = (fcr + i) * prim * (nn-loc[j]-1);
+ tmp ^= alpha_to[rs_modnn(rs, index_of[b[j]] + k)];
}
+
+ if (tmp != alpha_to[s[i]])
+ return -EBADMSG;
}
-finish:
- if (eras_pos != NULL) {
- for (i = 0; i < count; i++)
- eras_pos[i] = loc[i] - pad;
+ /*
+ * Store the error correction pattern, if a
+ * correction buffer is available
+ */
+ if (corr && eras_pos) {
+ j = 0;
+ for (i = 0; i < count; i++) {
+ if (b[i]) {
+ corr[j] = b[i];
+ eras_pos[j++] = loc[i] - pad;
+ }
+ }
+ } else if (data && par) {
+ /* Apply error to data and parity */
+ for (i = 0; i < count; i++) {
+ if (loc[i] < (nn - nroots))
+ data[loc[i] - pad] ^= b[i];
+ else
+ par[loc[i] - pad - len] ^= b[i];
+ }
}
- return count;
+ return num_corrected;
}
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index e5fdc8b9e856..bbc01bad3053 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -340,7 +340,8 @@ EXPORT_SYMBOL_GPL(encode_rs8);
* @data: data field of a given type
* @par: received parity data field
* @len: data length
- * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @s: syndrome data field, must be in index form
+ * (if NULL, syndrome is calculated)
* @no_eras: number of erasures
* @eras_pos: position of erasures, can be NULL
* @invmsk: invert data mask (will be xored on data, not on parity!)
@@ -354,7 +355,8 @@ EXPORT_SYMBOL_GPL(encode_rs8);
* decoding, so the caller has to ensure that decoder invocations are
* serialized.
*
- * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ * Returns the number of corrected symbols or -EBADMSG for uncorrectable
+ * errors. The count includes errors in the parity.
*/
int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
@@ -391,7 +393,8 @@ EXPORT_SYMBOL_GPL(encode_rs16);
* @data: data field of a given type
* @par: received parity data field
* @len: data length
- * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @s: syndrome data field, must be in index form
+ * (if NULL, syndrome is calculated)
* @no_eras: number of erasures
* @eras_pos: position of erasures, can be NULL
* @invmsk: invert data mask (will be xored on data, not on parity!)
@@ -403,7 +406,8 @@ EXPORT_SYMBOL_GPL(encode_rs16);
* decoding, so the caller has to ensure that decoder invocations are
* serialized.
*
- * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ * Returns the number of corrected symbols or -EBADMSG for uncorrectable
+ * errors. The count includes errors in the parity.
*/
int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
diff --git a/lib/reed_solomon/test_rslib.c b/lib/reed_solomon/test_rslib.c
new file mode 100644
index 000000000000..4eb29f365ece
--- /dev/null
+++ b/lib/reed_solomon/test_rslib.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for Generic Reed Solomon encoder / decoder library
+ *
+ * Written by Ferdinand Blomqvist
+ * Based on previous work by Phil Karn, KA9Q
+ */
+#include <linux/rslib.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+enum verbosity {
+ V_SILENT,
+ V_PROGRESS,
+ V_CSUMMARY
+};
+
+enum method {
+ CORR_BUFFER,
+ CALLER_SYNDROME,
+ IN_PLACE
+};
+
+#define __param(type, name, init, msg) \
+ static type name = init; \
+ module_param(name, type, 0444); \
+ MODULE_PARM_DESC(name, msg)
+
+__param(int, v, V_PROGRESS, "Verbosity level");
+__param(int, ewsc, 1, "Erasures without symbol corruption");
+__param(int, bc, 1, "Test for correct behaviour beyond error correction capacity");
+
+struct etab {
+ int symsize;
+ int genpoly;
+ int fcs;
+ int prim;
+ int nroots;
+ int ntrials;
+};
+
+/* List of codes to test */
+static struct etab Tab[] = {
+ {2, 0x7, 1, 1, 1, 100000 },
+ {3, 0xb, 1, 1, 2, 100000 },
+ {3, 0xb, 1, 1, 3, 100000 },
+ {3, 0xb, 2, 1, 4, 100000 },
+ {4, 0x13, 1, 1, 4, 10000 },
+ {5, 0x25, 1, 1, 6, 1000 },
+ {6, 0x43, 3, 1, 8, 1000 },
+ {7, 0x89, 1, 1, 14, 500 },
+ {8, 0x11d, 1, 1, 30, 100 },
+ {8, 0x187, 112, 11, 32, 100 },
+ {9, 0x211, 1, 1, 33, 80 },
+ {0, 0, 0, 0, 0, 0},
+};
+
+
+struct estat {
+ int dwrong;
+ int irv;
+ int wepos;
+ int nwords;
+};
+
+struct bcstat {
+ int rfail;
+ int rsuccess;
+ int noncw;
+ int nwords;
+};
+
+struct wspace {
+ uint16_t *c; /* sent codeword */
+ uint16_t *r; /* received word */
+ uint16_t *s; /* syndrome */
+ uint16_t *corr; /* correction buffer */
+ int *errlocs;
+ int *derrlocs;
+};
+
+struct pad {
+ int mult;
+ int shift;
+};
+
+static struct pad pad_coef[] = {
+ { 0, 0 },
+ { 1, 2 },
+ { 1, 1 },
+ { 3, 2 },
+ { 1, 0 },
+};
+
+static void free_ws(struct wspace *ws)
+{
+ if (!ws)
+ return;
+
+ kfree(ws->errlocs);
+ kfree(ws->c);
+ kfree(ws);
+}
+
+static struct wspace *alloc_ws(struct rs_codec *rs)
+{
+ int nroots = rs->nroots;
+ struct wspace *ws;
+ int nn = rs->nn;
+
+ ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+ if (!ws)
+ return NULL;
+
+ ws->c = kmalloc_array(2 * (nn + nroots),
+ sizeof(uint16_t), GFP_KERNEL);
+ if (!ws->c)
+ goto err;
+
+ ws->r = ws->c + nn;
+ ws->s = ws->r + nn;
+ ws->corr = ws->s + nroots;
+
+ ws->errlocs = kmalloc_array(nn + nroots, sizeof(int), GFP_KERNEL);
+ if (!ws->errlocs)
+ goto err;
+
+ ws->derrlocs = ws->errlocs + nn;
+ return ws;
+
+err:
+ free_ws(ws);
+ return NULL;
+}
+
+
+/*
+ * Generates a random codeword and stores it in c. Generates random errors and
+ * erasures, and stores the random word with errors in r. Erasure positions are
+ * stored in derrlocs, while errlocs has one of three values in every position:
+ *
+ * 0 if there is no error in this position;
+ * 1 if there is a symbol error in this position;
+ * 2 if there is an erasure without symbol corruption.
+ *
+ * Returns the number of corrupted symbols.
+ */
+static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
+ int len, int errs, int eras)
+{
+ int nroots = rs->codec->nroots;
+ int *derrlocs = ws->derrlocs;
+ int *errlocs = ws->errlocs;
+ int dlen = len - nroots;
+ int nn = rs->codec->nn;
+ uint16_t *c = ws->c;
+ uint16_t *r = ws->r;
+ int errval;
+ int errloc;
+ int i;
+
+ /* Load c with random data and encode */
+ for (i = 0; i < dlen; i++)
+ c[i] = prandom_u32() & nn;
+
+ memset(c + dlen, 0, nroots * sizeof(*c));
+ encode_rs16(rs, c, dlen, c + dlen, 0);
+
+ /* Make copyand add errors and erasures */
+ memcpy(r, c, len * sizeof(*r));
+ memset(errlocs, 0, len * sizeof(*errlocs));
+ memset(derrlocs, 0, nroots * sizeof(*derrlocs));
+
+ /* Generating random errors */
+ for (i = 0; i < errs; i++) {
+ do {
+ /* Error value must be nonzero */
+ errval = prandom_u32() & nn;
+ } while (errval == 0);
+
+ do {
+ /* Must not choose the same location twice */
+ errloc = prandom_u32() % len;
+ } while (errlocs[errloc] != 0);
+
+ errlocs[errloc] = 1;
+ r[errloc] ^= errval;
+ }
+
+ /* Generating random erasures */
+ for (i = 0; i < eras; i++) {
+ do {
+ /* Must not choose the same location twice */
+ errloc = prandom_u32() % len;
+ } while (errlocs[errloc] != 0);
+
+ derrlocs[i] = errloc;
+
+ if (ewsc && (prandom_u32() & 1)) {
+ /* Erasure with the symbol intact */
+ errlocs[errloc] = 2;
+ } else {
+ /* Erasure with corrupted symbol */
+ do {
+ /* Error value must be nonzero */
+ errval = prandom_u32() & nn;
+ } while (errval == 0);
+
+ errlocs[errloc] = 1;
+ r[errloc] ^= errval;
+ errs++;
+ }
+ }
+
+ return errs;
+}
+
+static void fix_err(uint16_t *data, int nerrs, uint16_t *corr, int *errlocs)
+{
+ int i;
+
+ for (i = 0; i < nerrs; i++)
+ data[errlocs[i]] ^= corr[i];
+}
+
+static void compute_syndrome(struct rs_control *rsc, uint16_t *data,
+ int len, uint16_t *syn)
+{
+ struct rs_codec *rs = rsc->codec;
+ uint16_t *alpha_to = rs->alpha_to;
+ uint16_t *index_of = rs->index_of;
+ int nroots = rs->nroots;
+ int prim = rs->prim;
+ int fcr = rs->fcr;
+ int i, j;
+
+ /* Calculating syndrome */
+ for (i = 0; i < nroots; i++) {
+ syn[i] = data[0];
+ for (j = 1; j < len; j++) {
+ if (syn[i] == 0) {
+ syn[i] = data[j];
+ } else {
+ syn[i] = data[j] ^
+ alpha_to[rs_modnn(rs, index_of[syn[i]]
+ + (fcr + i) * prim)];
+ }
+ }
+ }
+
+ /* Convert to index form */
+ for (i = 0; i < nroots; i++)
+ syn[i] = rs->index_of[syn[i]];
+}
+
+/* Test up to error correction capacity */
+static void test_uc(struct rs_control *rs, int len, int errs,
+ int eras, int trials, struct estat *stat,
+ struct wspace *ws, int method)
+{
+ int dlen = len - rs->codec->nroots;
+ int *derrlocs = ws->derrlocs;
+ int *errlocs = ws->errlocs;
+ uint16_t *corr = ws->corr;
+ uint16_t *c = ws->c;
+ uint16_t *r = ws->r;
+ uint16_t *s = ws->s;
+ int derrs, nerrs;
+ int i, j;
+
+ for (j = 0; j < trials; j++) {
+ nerrs = get_rcw_we(rs, ws, len, errs, eras);
+
+ switch (method) {
+ case CORR_BUFFER:
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+ break;
+ case CALLER_SYNDROME:
+ compute_syndrome(rs, r, len, s);
+ derrs = decode_rs16(rs, NULL, NULL, dlen,
+ s, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+ break;
+ case IN_PLACE:
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, NULL);
+ break;
+ default:
+ continue;
+ }
+
+ if (derrs != nerrs)
+ stat->irv++;
+
+ if (method != IN_PLACE) {
+ for (i = 0; i < derrs; i++) {
+ if (errlocs[derrlocs[i]] != 1)
+ stat->wepos++;
+ }
+ }
+
+ if (memcmp(r, c, len * sizeof(*r)))
+ stat->dwrong++;
+ }
+ stat->nwords += trials;
+}
+
+static int ex_rs_helper(struct rs_control *rs, struct wspace *ws,
+ int len, int trials, int method)
+{
+ static const char * const desc[] = {
+ "Testing correction buffer interface...",
+ "Testing with caller provided syndrome...",
+ "Testing in-place interface..."
+ };
+
+ struct estat stat = {0, 0, 0, 0};
+ int nroots = rs->codec->nroots;
+ int errs, eras, retval;
+
+ if (v >= V_PROGRESS)
+ pr_info(" %s\n", desc[method]);
+
+ for (errs = 0; errs <= nroots / 2; errs++)
+ for (eras = 0; eras <= nroots - 2 * errs; eras++)
+ test_uc(rs, len, errs, eras, trials, &stat, ws, method);
+
+ if (v >= V_CSUMMARY) {
+ pr_info(" Decodes wrong: %d / %d\n",
+ stat.dwrong, stat.nwords);
+ pr_info(" Wrong return value: %d / %d\n",
+ stat.irv, stat.nwords);
+ if (method != IN_PLACE)
+ pr_info(" Wrong error position: %d\n", stat.wepos);
+ }
+
+ retval = stat.dwrong + stat.wepos + stat.irv;
+ if (retval && v >= V_PROGRESS)
+ pr_warn(" FAIL: %d decoding failures!\n", retval);
+
+ return retval;
+}
+
+static int exercise_rs(struct rs_control *rs, struct wspace *ws,
+ int len, int trials)
+{
+
+ int retval = 0;
+ int i;
+
+ if (v >= V_PROGRESS)
+ pr_info("Testing up to error correction capacity...\n");
+
+ for (i = 0; i <= IN_PLACE; i++)
+ retval |= ex_rs_helper(rs, ws, len, trials, i);
+
+ return retval;
+}
+
+/* Tests for correct behaviour beyond error correction capacity */
+static void test_bc(struct rs_control *rs, int len, int errs,
+ int eras, int trials, struct bcstat *stat,
+ struct wspace *ws)
+{
+ int nroots = rs->codec->nroots;
+ int dlen = len - nroots;
+ int *derrlocs = ws->derrlocs;
+ uint16_t *corr = ws->corr;
+ uint16_t *r = ws->r;
+ int derrs, j;
+
+ for (j = 0; j < trials; j++) {
+ get_rcw_we(rs, ws, len, errs, eras);
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+
+ if (derrs >= 0) {
+ stat->rsuccess++;
+
+ /*
+ * We check that the returned word is actually a
+ * codeword. The obious way to do this would be to
+ * compute the syndrome, but we don't want to replicate
+ * that code here. However, all the codes are in
+ * systematic form, and therefore we can encode the
+ * returned word, and see whether the parity changes or
+ * not.
+ */
+ memset(corr, 0, nroots * sizeof(*corr));
+ encode_rs16(rs, r, dlen, corr, 0);
+
+ if (memcmp(r + dlen, corr, nroots * sizeof(*corr)))
+ stat->noncw++;
+ } else {
+ stat->rfail++;
+ }
+ }
+ stat->nwords += trials;
+}
+
+static int exercise_rs_bc(struct rs_control *rs, struct wspace *ws,
+ int len, int trials)
+{
+ struct bcstat stat = {0, 0, 0, 0};
+ int nroots = rs->codec->nroots;
+ int errs, eras, cutoff;
+
+ if (v >= V_PROGRESS)
+ pr_info("Testing beyond error correction capacity...\n");
+
+ for (errs = 1; errs <= nroots; errs++) {
+ eras = nroots - 2 * errs + 1;
+ if (eras < 0)
+ eras = 0;
+
+ cutoff = nroots <= len - errs ? nroots : len - errs;
+ for (; eras <= cutoff; eras++)
+ test_bc(rs, len, errs, eras, trials, &stat, ws);
+ }
+
+ if (v >= V_CSUMMARY) {
+ pr_info(" decoder gives up: %d / %d\n",
+ stat.rfail, stat.nwords);
+ pr_info(" decoder returns success: %d / %d\n",
+ stat.rsuccess, stat.nwords);
+ pr_info(" not a codeword: %d / %d\n",
+ stat.noncw, stat.rsuccess);
+ }
+
+ if (stat.noncw && v >= V_PROGRESS)
+ pr_warn(" FAIL: %d silent failures!\n", stat.noncw);
+
+ return stat.noncw;
+}
+
+static int run_exercise(struct etab *e)
+{
+ int nn = (1 << e->symsize) - 1;
+ int kk = nn - e->nroots;
+ struct rs_control *rsc;
+ int retval = -ENOMEM;
+ int max_pad = kk - 1;
+ int prev_pad = -1;
+ struct wspace *ws;
+ int i;
+
+ rsc = init_rs(e->symsize, e->genpoly, e->fcs, e->prim, e->nroots);
+ if (!rsc)
+ return retval;
+
+ ws = alloc_ws(rsc->codec);
+ if (!ws)
+ goto err;
+
+ retval = 0;
+ for (i = 0; i < ARRAY_SIZE(pad_coef); i++) {
+ int pad = (pad_coef[i].mult * max_pad) >> pad_coef[i].shift;
+ int len = nn - pad;
+
+ if (pad == prev_pad)
+ continue;
+
+ prev_pad = pad;
+ if (v >= V_PROGRESS) {
+ pr_info("Testing (%d,%d)_%d code...\n",
+ len, kk - pad, nn + 1);
+ }
+
+ retval |= exercise_rs(rsc, ws, len, e->ntrials);
+ if (bc)
+ retval |= exercise_rs_bc(rsc, ws, len, e->ntrials);
+ }
+
+ free_ws(ws);
+
+err:
+ free_rs(rsc);
+ return retval;
+}
+
+static int __init test_rslib_init(void)
+{
+ int i, fail = 0;
+
+ for (i = 0; Tab[i].symsize != 0 ; i++) {
+ int retval;
+
+ retval = run_exercise(Tab + i);
+ if (retval < 0)
+ return -ENOMEM;
+
+ fail |= retval;
+ }
+
+ if (fail)
+ pr_warn("rslib: test failed\n");
+ else
+ pr_info("rslib: test ok\n");
+
+ return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void __exit test_rslib_exit(void)
+{
+}
+
+module_init(test_rslib_init)
+module_exit(test_rslib_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ferdinand Blomqvist");
+MODULE_DESCRIPTION("Reed-Solomon library test");
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 2882d9ba6607..eacb82468437 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -676,17 +676,18 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
{
if (!miter->__remaining) {
struct scatterlist *sg;
- unsigned long pgoffset;
if (!__sg_page_iter_next(&miter->piter))
return false;
sg = miter->piter.sg;
- pgoffset = miter->piter.sg_pgoffset;
- miter->__offset = pgoffset ? 0 : sg->offset;
+ miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
+ miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
+ miter->__offset &= PAGE_SIZE - 1;
miter->__remaining = sg->offset + sg->length -
- (pgoffset << PAGE_SHIFT) - miter->__offset;
+ (miter->piter.sg_pgoffset << PAGE_SHIFT) -
+ miter->__offset;
miter->__remaining = min_t(unsigned long, miter->__remaining,
PAGE_SIZE - miter->__offset);
}
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 157d9e31f6c2..60ba93fc42ce 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
* Kernel threads bound to a single CPU can safely use
* smp_processor_id():
*/
- if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
+ if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
goto out;
/*
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
new file mode 100644
index 000000000000..cc00364bd2c2
--- /dev/null
+++ b/lib/vdso/Kconfig
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config HAVE_GENERIC_VDSO
+ bool
+
+if HAVE_GENERIC_VDSO
+
+config GENERIC_GETTIMEOFDAY
+ bool
+ help
+ This is a generic implementation of gettimeofday vdso.
+ Each architecture that enables this feature has to
+ provide the fallback implementation.
+
+config GENERIC_VDSO_32
+ bool
+ depends on GENERIC_GETTIMEOFDAY && !64BIT
+ help
+ This config option helps to avoid possible performance issues
+ in 32 bit only architectures.
+
+config GENERIC_COMPAT_VDSO
+ bool
+ help
+ This config option enables the compat VDSO layer.
+
+config CROSS_COMPILE_COMPAT_VDSO
+ string "32 bit Toolchain prefix for compat vDSO"
+ default ""
+ depends on GENERIC_COMPAT_VDSO
+ help
+ Defines the cross-compiler prefix for compiling compat vDSO.
+ If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
+ 32 bit, it does not need to define this parameter.
+
+endif
diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile
new file mode 100644
index 000000000000..c415a685d61b
--- /dev/null
+++ b/lib/vdso/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH))
+
+c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c)
+
+# This cmd checks that the vdso library does not contain absolute relocation
+# It has to be called after the linking of the vdso library and requires it
+# as a parameter.
+#
+# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds
+# to the absolute relocation types printed by "objdump -R" and accepted by the
+# dynamic linker.
+ifndef ARCH_REL_TYPE_ABS
+$(error ARCH_REL_TYPE_ABS is not set)
+endif
+
+quiet_cmd_vdso_check = VDSOCHK $@
+ cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \
+ then (echo >&2 "$@: dynamic relocations are not supported"; \
+ rm -f $@; /bin/false); fi
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
new file mode 100644
index 000000000000..2d1c1f241fd9
--- /dev/null
+++ b/lib/vdso/gettimeofday.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/compiler.h>
+#include <linux/math64.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/hrtimer_defs.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+
+/*
+ * The generic vDSO implementation requires that gettimeofday.h
+ * provides:
+ * - __arch_get_vdso_data(): to get the vdso datapage.
+ * - __arch_get_hw_counter(): to get the hw counter based on the
+ * clock_mode.
+ * - gettimeofday_fallback(): fallback for gettimeofday.
+ * - clock_gettime_fallback(): fallback for clock_gettime.
+ * - clock_getres_fallback(): fallback for clock_getres.
+ */
+#ifdef ENABLE_COMPAT_VDSO
+#include <asm/vdso/compat_gettimeofday.h>
+#else
+#include <asm/vdso/gettimeofday.h>
+#endif /* ENABLE_COMPAT_VDSO */
+
+#ifndef vdso_calc_delta
+/*
+ * Default implementation which works for all sane clocksources. That
+ * obviously excludes x86/TSC.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return ((cycles - last) & mask) * mult;
+}
+#endif
+
+static int do_hres(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u64 cycles, last, sec, ns;
+ u32 seq;
+
+ do {
+ seq = vdso_read_begin(vd);
+ cycles = __arch_get_hw_counter(vd->clock_mode);
+ ns = vdso_ts->nsec;
+ last = vd->cycle_last;
+ if (unlikely((s64)cycles < 0))
+ return clock_gettime_fallback(clk, ts);
+
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
+ ns >>= vd->shift;
+ sec = vdso_ts->sec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static void do_coarse(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u32 seq;
+
+ do {
+ seq = vdso_read_begin(vd);
+ ts->tv_sec = vdso_ts->sec;
+ ts->tv_nsec = vdso_ts->nsec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+}
+
+static __maybe_unused int
+__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ u32 msk;
+
+ /* Check for negative values or invalid clocks */
+ if (unlikely((u32) clock >= MAX_CLOCKS))
+ goto fallback;
+
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (likely(msk & VDSO_HRES)) {
+ return do_hres(&vd[CS_HRES_COARSE], clock, ts);
+ } else if (msk & VDSO_COARSE) {
+ do_coarse(&vd[CS_HRES_COARSE], clock, ts);
+ return 0;
+ } else if (msk & VDSO_RAW) {
+ return do_hres(&vd[CS_RAW], clock, ts);
+ }
+
+fallback:
+ return clock_gettime_fallback(clock, ts);
+}
+
+static __maybe_unused int
+__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
+{
+ struct __kernel_timespec ts;
+ int ret;
+
+ if (res == NULL)
+ goto fallback;
+
+ ret = __cvdso_clock_gettime(clock, &ts);
+
+ if (ret == 0) {
+ res->tv_sec = ts.tv_sec;
+ res->tv_nsec = ts.tv_nsec;
+ }
+
+ return ret;
+
+fallback:
+ return clock_gettime_fallback(clock, (struct __kernel_timespec *)res);
+}
+
+static __maybe_unused int
+__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+
+ if (likely(tv != NULL)) {
+ struct __kernel_timespec ts;
+
+ if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
+ return gettimeofday_fallback(tv, tz);
+
+ tv->tv_sec = ts.tv_sec;
+ tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
+ }
+
+ if (unlikely(tz != NULL)) {
+ tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
+ tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
+ }
+
+ return 0;
+}
+
+#ifdef VDSO_HAS_TIME
+static __maybe_unused time_t __cvdso_time(time_t *time)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
+
+ if (time)
+ *time = t;
+
+ return t;
+}
+#endif /* VDSO_HAS_TIME */
+
+#ifdef VDSO_HAS_CLOCK_GETRES
+static __maybe_unused
+int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ u64 ns;
+ u32 msk;
+ u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
+
+ /* Check for negative values or invalid clocks */
+ if (unlikely((u32) clock >= MAX_CLOCKS))
+ goto fallback;
+
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (msk & VDSO_HRES) {
+ /*
+ * Preserves the behaviour of posix_get_hrtimer_res().
+ */
+ ns = hrtimer_res;
+ } else if (msk & VDSO_COARSE) {
+ /*
+ * Preserves the behaviour of posix_get_coarse_res().
+ */
+ ns = LOW_RES_NSEC;
+ } else if (msk & VDSO_RAW) {
+ /*
+ * Preserves the behaviour of posix_get_hrtimer_res().
+ */
+ ns = hrtimer_res;
+ } else {
+ goto fallback;
+ }
+
+ if (res) {
+ res->tv_sec = 0;
+ res->tv_nsec = ns;
+ }
+
+ return 0;
+
+fallback:
+ return clock_getres_fallback(clock, res);
+}
+
+static __maybe_unused int
+__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
+{
+ struct __kernel_timespec ts;
+ int ret;
+
+ if (res == NULL)
+ goto fallback;
+
+ ret = __cvdso_clock_getres(clock, &ts);
+
+ if (ret == 0) {
+ res->tv_sec = ts.tv_sec;
+ res->tv_nsec = ts.tv_nsec;
+ }
+
+ return ret;
+
+fallback:
+ return clock_getres_fallback(clock, (struct __kernel_timespec *)res);
+}
+#endif /* VDSO_HAS_CLOCK_GETRES */
diff --git a/mm/filemap.c b/mm/filemap.c
index df2006ba0cfa..6dd9a2274c80 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -281,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
* @pvec: pagevec with pages to delete
*
* The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
+ * from the mapping. The function expects @pvec to be sorted by page index.
* It tolerates holes in @pvec (mapping entries at those indices are not
* modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
*
* The function expects the i_pages lock to be held.
*/
@@ -294,44 +294,40 @@ static void page_cache_delete_batch(struct address_space *mapping,
{
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
int total_pages = 0;
- int i = 0;
+ int i = 0, tail_pages = 0;
struct page *page;
mapping_set_update(&xas, mapping);
xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec))
+ if (i >= pagevec_count(pvec) && !tail_pages)
break;
-
- /* A swap/dax/shadow entry got inserted? Skip it. */
if (xa_is_value(page))
continue;
- /*
- * A page got inserted in our range? Skip it. We have our
- * pages locked so they are protected from being removed.
- * If we see a page whose index is higher than ours, it
- * means our page has been removed, which shouldn't be
- * possible because we're holding the PageLock.
- */
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
- page);
- continue;
- }
-
- WARN_ON_ONCE(!PageLocked(page));
-
- if (page->index == xas.xa_index)
+ if (!tail_pages) {
+ /*
+ * Some page got inserted in our range? Skip it. We
+ * have our pages locked so they are protected from
+ * being removed.
+ */
+ if (page != pvec->pages[i]) {
+ VM_BUG_ON_PAGE(page->index >
+ pvec->pages[i]->index, page);
+ continue;
+ }
+ WARN_ON_ONCE(!PageLocked(page));
+ if (PageTransHuge(page) && !PageHuge(page))
+ tail_pages = HPAGE_PMD_NR - 1;
page->mapping = NULL;
- /* Leave page->index set: truncation lookup relies on it */
-
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + (1UL << compound_order(page)) - 1 ==
- xas.xa_index)
+ /*
+ * Leave page->index set: truncation lookup relies
+ * upon it
+ */
i++;
+ } else {
+ VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
+ != pvec->pages[i]->index, page);
+ tail_pages--;
+ }
xas_store(&xas, NULL);
total_pages++;
}
@@ -1498,7 +1494,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
XA_STATE(xas, &mapping->i_pages, offset);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
repeat:
@@ -1513,19 +1509,25 @@ repeat:
if (!page || xa_is_value(page))
goto out;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
+ goto repeat;
+
+ /* The page was split under us? */
+ if (compound_head(page) != head) {
+ put_page(head);
goto repeat;
+ }
/*
- * Has the page moved or been split?
+ * Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != xas_reload(&xas))) {
- put_page(page);
+ put_page(head);
goto repeat;
}
- page = find_subpage(page, offset);
out:
rcu_read_unlock();
@@ -1707,6 +1709,7 @@ unsigned find_get_entries(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, page, ULONG_MAX) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1717,13 +1720,17 @@ unsigned find_get_entries(struct address_space *mapping,
if (xa_is_value(page))
goto export;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
@@ -1732,7 +1739,7 @@ export:
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1774,27 +1781,33 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
rcu_read_lock();
xas_for_each(&xas, page, end) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1839,6 +1852,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
rcu_read_lock();
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1848,19 +1862,24 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
if (xa_is_value(page))
break;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages)
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -1896,6 +1915,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
rcu_read_lock();
xas_for_each_marked(&xas, page, end, tag) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
@@ -1906,21 +1926,26 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*index = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
@@ -2603,7 +2628,7 @@ void filemap_map_pages(struct vm_fault *vmf,
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
@@ -2612,19 +2637,24 @@ void filemap_map_pages(struct vm_fault *vmf,
if (xa_is_value(page))
goto next;
+ head = compound_head(page);
+
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(page))
+ if (PageLocked(head))
goto next;
- if (!page_cache_get_speculative(page))
+ if (!page_cache_get_speculative(head))
goto next;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto skip;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto skip;
- page = find_subpage(page, xas.xa_index);
if (!PageUptodate(page) ||
PageReadahead(page) ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bb8b617e34ed..885642c82aaa 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2496,9 +2496,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
shmem_uncharge(head->mapping->host, 1);
put_page(head + i);
- } else if (!PageAnon(page)) {
- __xa_store(&head->mapping->i_pages, head[i].index,
- head + i, 0);
}
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0f7419938008..eaaa21b23215 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1378,7 +1378,7 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_FAIL;
goto xa_locked;
}
- xas_store(&xas, new_page);
+ xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
nr_none++;
continue;
}
@@ -1454,7 +1454,7 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */
- xas_store(&xas, new_page);
+ xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
continue;
out_unlock:
unlock_page(page);
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c..650e65a46b9c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -39,7 +39,6 @@ static void memfd_tag_pins(struct xa_state *xas)
xas_for_each(xas, page, ULONG_MAX) {
if (xa_is_value(page))
continue;
- page = find_subpage(page, xas->xa_index);
if (page_count(page) - page_mapcount(page) > 1)
xas_set_mark(xas, MEMFD_TAG_PINNED);
@@ -89,7 +88,6 @@ static int memfd_wait_for_pins(struct address_space *mapping)
bool clear = true;
if (xa_is_value(page))
continue;
- page = find_subpage(page, xas.xa_index);
if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d9cc6606f409..f045514d8d20 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -218,7 +218,7 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) {
ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr,
- addr_lsb, current);
+ addr_lsb);
} else {
/*
* Don't use force here, it's convenient if the signal
diff --git a/mm/migrate.c b/mm/migrate.c
index f2ecc2855a12..e9594bc0d406 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -463,7 +463,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
for (i = 1; i < HPAGE_PMD_NR; i++) {
xas_next(&xas);
- xas_store(&xas, newpage);
+ xas_store(&xas, newpage + i);
}
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..8e3bc949ebcc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1826,7 +1826,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
first_deferred_pfn)) {
pgdat->first_deferred_pfn = ULONG_MAX;
pgdat_resize_unlock(pgdat, &flags);
- return true;
+ /* Retry only once. */
+ return first_deferred_pfn != ULONG_MAX;
}
/*
diff --git a/mm/page_io.c b/mm/page_io.c
index 189415852077..a39aac2f8c8d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -137,8 +137,10 @@ out:
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
bio_put(bio);
- blk_wake_io_task(waiter);
- put_task_struct(waiter);
+ if (waiter) {
+ blk_wake_io_task(waiter);
+ put_task_struct(waiter);
+ }
}
int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -395,11 +397,12 @@ int swap_readpage(struct page *page, bool synchronous)
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
*/
- get_task_struct(current);
- bio->bi_private = current;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- if (synchronous)
+ if (synchronous) {
bio->bi_opf |= REQ_HIPRI;
+ get_task_struct(current);
+ bio->bi_private = current;
+ }
count_vm_event(PSWPIN);
bio_get(bio);
qc = submit_bio(bio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1bb3b8dc8bb2..f4dce9c8670d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -614,7 +614,7 @@ static int shmem_add_to_page_cache(struct page *page,
if (xas_error(&xas))
goto unlock;
next:
- xas_store(&xas, page);
+ xas_store(&xas, page + i);
if (++i < nr) {
xas_next(&xas);
goto next;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index eb714165afd2..85245fdec8d9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -132,7 +132,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
for (i = 0; i < nr; i++) {
VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
set_page_private(page + i, entry.val + i);
- xas_store(&xas, page);
+ xas_store(&xas, page + i);
xas_next(&xas);
}
address_space->nrpages += nr;
@@ -167,7 +167,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
for (i = 0; i < nr; i++) {
void *entry = xas_store(&xas, NULL);
- VM_BUG_ON_PAGE(entry != page, entry);
+ VM_BUG_ON_PAGE(entry != page + i, entry);
set_page_private(page + i, 0);
xas_next(&xas);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0f76cca32a1c..030a544e6602 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2128,17 +2128,6 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
int flush_dmap = 0;
int i;
- /*
- * The below block can be removed when all architectures that have
- * direct map permissions also have set_direct_map_() implementations.
- * This is concerned with resetting the direct map any an vm alias with
- * execute permissions, without leaving a RW+X window.
- */
- if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
- set_memory_nx((unsigned long)area->addr, area->nr_pages);
- set_memory_rw((unsigned long)area->addr, area->nr_pages);
- }
-
remove_vm_area(area->addr);
/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7889f583ced9..910e02c793ff 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3644,19 +3644,18 @@ out:
}
/*
- * pgdat->kswapd_classzone_idx is the highest zone index that a recent
- * allocation request woke kswapd for. When kswapd has not woken recently,
- * the value is MAX_NR_ZONES which is not a valid index. This compares a
- * given classzone and returns it or the highest classzone index kswapd
- * was recently woke for.
+ * The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be
+ * reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not
+ * a valid index then either kswapd runs for first time or kswapd couldn't sleep
+ * after previous reclaim attempt (node is still unbalanced). In that case
+ * return the zone index of the previous kswapd reclaim cycle.
*/
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
- enum zone_type classzone_idx)
+ enum zone_type prev_classzone_idx)
{
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- return classzone_idx;
-
- return max(pgdat->kswapd_classzone_idx, classzone_idx);
+ return prev_classzone_idx;
+ return pgdat->kswapd_classzone_idx;
}
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
@@ -3797,7 +3796,7 @@ kswapd_try_sleep:
/* Read the new order and classzone_idx */
alloc_order = reclaim_order = pgdat->kswapd_order;
- classzone_idx = kswapd_classzone_idx(pgdat, 0);
+ classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
pgdat->kswapd_order = 0;
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
@@ -3851,8 +3850,12 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
if (!cpuset_zone_allowed(zone, gfp_flags))
return;
pgdat = zone->zone_pgdat;
- pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
- classzone_idx);
+
+ if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
+ pgdat->kswapd_classzone_idx = classzone_idx;
+ else
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
+ classzone_idx);
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9f77432dbe38..5406d7cd46ad 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1353,7 +1353,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
* actually encrypted before enforcing a key size.
*/
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
- hcon->enc_key_size > HCI_MIN_ENC_KEY_SIZE);
+ hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE);
}
static void l2cap_do_start(struct l2cap_chan *chan)
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 7ee4fea93637..c0f0990f30b6 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -22,7 +22,7 @@ static void shutdown_umh(void)
tsk = get_pid_task(find_vpid(bpfilter_ops.info.pid), PIDTYPE_PID);
if (tsk) {
- force_sig(SIGKILL, tsk);
+ send_sig(SIGKILL, tsk, 1);
put_task_struct(tsk);
}
}
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 1c811c74bfc0..38de80d01aae 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -306,7 +306,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
int err = 0;
struct ceph_crypto_key *ckey;
- ukey = request_key(&key_type_ceph, name, NULL);
+ ukey = request_key(&key_type_ceph, name, NULL, NULL);
if (IS_ERR(ukey)) {
/* request_key errors don't map nicely to mount(2)
errors; don't even try, but still printk */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index cd0b094468b6..a33402c99321 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1887,7 +1887,8 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
return -EINVAL;
/* do dns_resolve upcall */
- ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL, false);
+ ip_len = dns_query(current->nsproxy->net_ns,
+ NULL, name, end - name, NULL, &ip_addr, NULL, false);
if (ip_len > 0)
ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
else
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 15f68842ac6b..f7b6dda798e0 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -39,9 +39,16 @@ EXPORT_SYMBOL_GPL(net_namespace_list);
DECLARE_RWSEM(net_rwsem);
EXPORT_SYMBOL_GPL(net_rwsem);
+#ifdef CONFIG_KEYS
+static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
+#endif
+
struct net init_net = {
.count = REFCOUNT_INIT(1),
.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
+#ifdef CONFIG_KEYS
+ .key_domain = &init_net_key_domain,
+#endif
};
EXPORT_SYMBOL(init_net);
@@ -387,10 +394,22 @@ static struct net *net_alloc(void)
if (!net)
goto out_free;
+#ifdef CONFIG_KEYS
+ net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
+ if (!net->key_domain)
+ goto out_free_2;
+ refcount_set(&net->key_domain->usage, 1);
+#endif
+
rcu_assign_pointer(net->gen, ng);
out:
return net;
+#ifdef CONFIG_KEYS
+out_free_2:
+ kmem_cache_free(net_cachep, net);
+ net = NULL;
+#endif
out_free:
kfree(ng);
goto out;
@@ -567,6 +586,7 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
dec_net_namespaces(net->ucounts);
+ key_remove_domain(net->key_domain);
put_user_ns(net->user_ns);
net_drop_ns(net);
}
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index a65d553e730d..6b201531b165 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -46,6 +46,15 @@ const struct cred *dns_resolver_cache;
#define DNS_ERRORNO_OPTION "dnserror"
+static struct key_acl dns_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_CLEAR),
+ }
+};
+
/*
* Preparse instantiation data for a dns_resolver key.
*
@@ -314,6 +323,7 @@ static long dns_resolver_read(const struct key *key,
struct key_type key_type_dns_resolver = {
.name = "dns_resolver",
+ .flags = KEY_TYPE_NET_DOMAIN,
.preparse = dns_resolver_preparse,
.free_preparse = dns_resolver_free_preparse,
.instantiate = generic_key_instantiate,
@@ -342,8 +352,7 @@ static int __init init_dns_resolver(void)
keyring = keyring_alloc(".dns_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ,
+ &dns_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 2d260432b3be..236baf2bfa4c 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -40,14 +40,26 @@
#include <linux/cred.h>
#include <linux/dns_resolver.h>
#include <linux/err.h>
+#include <net/net_namespace.h>
#include <keys/dns_resolver-type.h>
#include <keys/user-type.h>
#include "internal.h"
+static struct key_acl dns_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_SEARCH | KEY_ACE_READ),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_INVAL),
+ }
+};
+
/**
* dns_query - Query the DNS
+ * @net: The network namespace to operate in.
* @type: Query type (or NULL for straight host->IP lookup)
* @name: Name to look up
* @namelen: Length of name
@@ -69,7 +81,8 @@
*
* Returns the size of the result on success, -ve error code otherwise.
*/
-int dns_query(const char *type, const char *name, size_t namelen,
+int dns_query(struct net *net,
+ const char *type, const char *name, size_t namelen,
const char *options, char **_result, time64_t *_expiry,
bool invalidate)
{
@@ -122,7 +135,8 @@ int dns_query(const char *type, const char *name, size_t namelen,
* add_key() to preinstall malicious redirections
*/
saved_cred = override_creds(dns_resolver_cache);
- rkey = request_key(&key_type_dns_resolver, desc, options);
+ rkey = request_key_net(&key_type_dns_resolver, desc, net, options,
+ &dns_key_acl);
revert_creds(saved_cred);
kfree(desc);
if (IS_ERR(rkey)) {
@@ -132,8 +146,6 @@ int dns_query(const char *type, const char *name, size_t namelen,
down_read(&rkey->sem);
set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
- rkey->perm |= KEY_USR_VIEW;
-
ret = key_validate(rkey);
if (ret < 0)
goto put;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 0227cce9685e..0c93b1b7a826 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -3,7 +3,7 @@ config MAC80211
tristate "Generic IEEE 802.11 Networking Stack (mac80211)"
depends on CFG80211
select CRYPTO
- select CRYPTO_ARC4
+ select CRYPTO_LIB_ARC4
select CRYPTO_AES
select CRYPTO_CCM
select CRYPTO_GCM
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a1973a26c7fc..3fae902937fd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <net/net_namespace.h>
#include <linux/rcupdate.h>
+#include <linux/fips.h>
#include <linux/if_ether.h>
#include <net/cfg80211.h>
#include "ieee80211_i.h"
@@ -402,9 +403,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_TKIP:
case WLAN_CIPHER_SUITE_WEP104:
- if (IS_ERR(local->wep_tx_tfm))
+ if (WARN_ON_ONCE(fips_enabled))
return -EINVAL;
- break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
case WLAN_CIPHER_SUITE_AES_CMAC:
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6396d46a9a71..004e2e3adb88 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1255,8 +1255,8 @@ struct ieee80211_local {
struct rate_control_ref *rate_ctrl;
- struct crypto_cipher *wep_tx_tfm;
- struct crypto_cipher *wep_rx_tfm;
+ struct arc4_ctx wep_tx_ctx;
+ struct arc4_ctx wep_rx_ctx;
u32 wep_iv;
/* see iface.c */
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index be118c39433f..b8b9cd743bf4 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/crypto.h>
#include <linux/rcupdate.h>
+#include <crypto/arc4.h>
#include <net/mac80211.h>
#define NUM_DEFAULT_KEYS 4
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 55583b71ffaf..087c90e461b5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -10,6 +10,7 @@
#include <net/mac80211.h>
#include <linux/module.h>
+#include <linux/fips.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/types.h>
@@ -730,8 +731,7 @@ EXPORT_SYMBOL(ieee80211_alloc_hw_nm);
static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
{
- bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
- IS_ERR(local->wep_rx_tfm));
+ bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */
bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
int n_suites = 0, r = 0, w = 0;
u32 *suites;
@@ -1298,7 +1298,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
fail_rate:
rtnl_unlock();
ieee80211_led_exit(local);
- ieee80211_wep_free(local);
fail_flows:
destroy_workqueue(local->workqueue);
fail_workqueue:
@@ -1355,7 +1354,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
- ieee80211_wep_free(local);
ieee80211_led_exit(local);
kfree(local->int_scan_req);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 379d2ab6d327..a00b703bfdfd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -12,6 +12,7 @@
*/
#include <linux/delay.h>
+#include <linux/fips.h>
#include <linux/if_ether.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
@@ -5045,7 +5046,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
auth_alg = WLAN_AUTH_OPEN;
break;
case NL80211_AUTHTYPE_SHARED_KEY:
- if (IS_ERR(local->wep_tx_tfm))
+ if (fips_enabled)
return -EOPNOTSUPP;
auth_alg = WLAN_AUTH_SHARED_KEY;
break;
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 7914b8e3ce8c..727dc9f3f3b3 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(ieee80211_get_tkip_p2k);
* @payload_len is the length of payload (_not_ including IV/ICV length).
* @ta is the transmitter addresses.
*/
-int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_encrypt_data(struct arc4_ctx *ctx,
struct ieee80211_key *key,
struct sk_buff *skb,
u8 *payload, size_t payload_len)
@@ -228,7 +228,7 @@ int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
ieee80211_get_tkip_p2k(&key->conf, skb, rc4key);
- return ieee80211_wep_encrypt_data(tfm, rc4key, 16,
+ return ieee80211_wep_encrypt_data(ctx, rc4key, 16,
payload, payload_len);
}
@@ -236,7 +236,7 @@ int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
* beginning of the buffer containing IEEE 802.11 header payload, i.e.,
* including IV, Ext. IV, real data, Michael MIC, ICV. @payload_len is the
* length of payload, including IV, Ext. IV, MIC, ICV. */
-int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
struct ieee80211_key *key,
u8 *payload, size_t payload_len, u8 *ta,
u8 *ra, int only_iv, int queue,
@@ -294,7 +294,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
tkip_mixing_phase2(tk, &rx_ctx->ctx, iv16, rc4key);
- res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
+ res = ieee80211_wep_decrypt_data(ctx, rc4key, 16, pos, payload_len - 12);
done:
if (res == TKIP_DECRYPT_OK) {
/*
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index 676a7babdf5d..9d2f8bd36cc7 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -10,7 +10,7 @@
#include <linux/crypto.h>
#include "key.h"
-int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_encrypt_data(struct arc4_ctx *ctx,
struct ieee80211_key *key,
struct sk_buff *skb,
u8 *payload, size_t payload_len);
@@ -21,7 +21,7 @@ enum {
TKIP_DECRYPT_INVALID_KEYIDX = -2,
TKIP_DECRYPT_REPLAY = -3,
};
-int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
+int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
struct ieee80211_key *key,
u8 *payload, size_t payload_len, u8 *ta,
u8 *ra, int only_iv, int queue,
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 3d9e92867ef0..b75c2c54e665 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -27,30 +27,9 @@ int ieee80211_wep_init(struct ieee80211_local *local)
/* start WEP IV from a random value */
get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN);
- local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(local->wep_tx_tfm)) {
- local->wep_rx_tfm = ERR_PTR(-EINVAL);
- return PTR_ERR(local->wep_tx_tfm);
- }
-
- local->wep_rx_tfm = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(local->wep_rx_tfm)) {
- crypto_free_cipher(local->wep_tx_tfm);
- local->wep_tx_tfm = ERR_PTR(-EINVAL);
- return PTR_ERR(local->wep_rx_tfm);
- }
-
return 0;
}
-void ieee80211_wep_free(struct ieee80211_local *local)
-{
- if (!IS_ERR(local->wep_tx_tfm))
- crypto_free_cipher(local->wep_tx_tfm);
- if (!IS_ERR(local->wep_rx_tfm))
- crypto_free_cipher(local->wep_rx_tfm);
-}
-
static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen)
{
/*
@@ -128,21 +107,17 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
/* Perform WEP encryption using given key. data buffer must have tailroom
* for 4-byte ICV. data_len must not include this ICV. Note: this function
* does _not_ add IV. data = RC4(data | CRC32(data)) */
-int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_encrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
size_t klen, u8 *data, size_t data_len)
{
__le32 icv;
- int i;
-
- if (IS_ERR(tfm))
- return -1;
icv = cpu_to_le32(~crc32_le(~0, data, data_len));
put_unaligned(icv, (__le32 *)(data + data_len));
- crypto_cipher_setkey(tfm, rc4key, klen);
- for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
- crypto_cipher_encrypt_one(tfm, data + i, data + i);
+ arc4_setkey(ctx, rc4key, klen);
+ arc4_crypt(ctx, data, data, data_len + IEEE80211_WEP_ICV_LEN);
+ memzero_explicit(ctx, sizeof(*ctx));
return 0;
}
@@ -181,7 +156,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
/* Add room for ICV */
skb_put(skb, IEEE80211_WEP_ICV_LEN);
- return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
+ return ieee80211_wep_encrypt_data(&local->wep_tx_ctx, rc4key, keylen + 3,
iv + IEEE80211_WEP_IV_LEN, len);
}
@@ -189,18 +164,14 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
/* Perform WEP decryption using given key. data buffer includes encrypted
* payload, including 4-byte ICV, but _not_ IV. data_len must not include ICV.
* Return 0 on success and -1 on ICV mismatch. */
-int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_decrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
size_t klen, u8 *data, size_t data_len)
{
__le32 crc;
- int i;
-
- if (IS_ERR(tfm))
- return -1;
- crypto_cipher_setkey(tfm, rc4key, klen);
- for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++)
- crypto_cipher_decrypt_one(tfm, data + i, data + i);
+ arc4_setkey(ctx, rc4key, klen);
+ arc4_crypt(ctx, data, data, data_len + IEEE80211_WEP_ICV_LEN);
+ memzero_explicit(ctx, sizeof(*ctx));
crc = cpu_to_le32(~crc32_le(~0, data, data_len));
if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0)
@@ -253,7 +224,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
/* Copy rest of the WEP key (the secret part) */
memcpy(rc4key + 3, key->conf.key, key->conf.keylen);
- if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen,
+ if (ieee80211_wep_decrypt_data(&local->wep_rx_ctx, rc4key, klen,
skb->data + hdrlen +
IEEE80211_WEP_IV_LEN, len))
ret = -1;
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index 866a6798c9ef..997a034233c2 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -14,13 +14,12 @@
#include "key.h"
int ieee80211_wep_init(struct ieee80211_local *local);
-void ieee80211_wep_free(struct ieee80211_local *local);
-int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_encrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
size_t klen, u8 *data, size_t data_len);
int ieee80211_wep_encrypt(struct ieee80211_local *local,
struct sk_buff *skb,
const u8 *key, int keylen, int keyidx);
-int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key,
+int ieee80211_wep_decrypt_data(struct arc4_ctx *ctx, u8 *rc4key,
size_t klen, u8 *data, size_t data_len);
ieee80211_rx_result
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index a51c7909366e..ee72779729e5 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -239,7 +239,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
/* Add room for ICV */
skb_put(skb, IEEE80211_TKIP_ICV_LEN);
- return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
+ return ieee80211_tkip_encrypt_data(&tx->local->wep_tx_ctx,
key, skb, pos, len);
}
@@ -290,7 +290,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
if (status->flag & RX_FLAG_DECRYPTED)
hwaccel = 1;
- res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
+ res = ieee80211_tkip_decrypt_data(&rx->local->wep_rx_ctx,
key, skb->data + hdrlen,
skb->len - hdrlen, rx->sta->sta.addr,
hdr->addr1, hwaccel, rx->security_idx,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 21025c2c605b..66cc11742355 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -906,7 +906,7 @@ config NETFILTER_XT_TARGET_LED
echo netfilter-ssh > /sys/class/leds/<ledname>/trigger
For more information on the LEDs available on your system, see
- Documentation/leds/leds-class.txt
+ Documentation/leds/leds-class.rst
config NETFILTER_XT_TARGET_LOG
tristate "LOG target support"
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 83e3357a65a6..2032f6a8225e 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -23,6 +23,14 @@
#include <keys/user-type.h>
#include "ar-internal.h"
+static struct key_acl rxrpc_null_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 1,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_READ),
+ }
+};
+
static int rxrpc_vet_description_s(const char *);
static int rxrpc_preparse(struct key_preparsed_payload *);
static int rxrpc_preparse_s(struct key_preparsed_payload *);
@@ -39,6 +47,7 @@ static long rxrpc_read(const struct key *, char __user *, size_t);
*/
struct key_type key_type_rxrpc = {
.name = "rxrpc",
+ .flags = KEY_TYPE_NET_DOMAIN,
.preparse = rxrpc_preparse,
.free_preparse = rxrpc_free_preparse,
.instantiate = generic_key_instantiate,
@@ -54,6 +63,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
*/
struct key_type key_type_rxrpc_s = {
.name = "rxrpc_s",
+ .flags = KEY_TYPE_NET_DOMAIN,
.vet_description = rxrpc_vet_description_s,
.preparse = rxrpc_preparse_s,
.free_preparse = rxrpc_free_preparse_s,
@@ -908,7 +918,8 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
if (IS_ERR(description))
return PTR_ERR(description);
- key = request_key(&key_type_rxrpc, description, NULL);
+ key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk),
+ NULL, NULL);
if (IS_ERR(key)) {
kfree(description);
_leave(" = %ld", PTR_ERR(key));
@@ -939,7 +950,8 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
if (IS_ERR(description))
return PTR_ERR(description);
- key = request_key(&key_type_keyring, description, NULL);
+ key = request_key_net(&key_type_keyring, description, sock_net(&rx->sk),
+ NULL, NULL);
if (IS_ERR(key)) {
kfree(description);
_leave(" = %ld", PTR_ERR(key));
@@ -972,7 +984,8 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
_enter("");
key = key_alloc(&key_type_rxrpc, "x",
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
+ &internal_key_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(key)) {
_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
@@ -1020,7 +1033,7 @@ struct key *rxrpc_get_null_key(const char *keyname)
key = key_alloc(&key_type_rxrpc, keyname,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
- KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ &rxrpc_null_key_acl, KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(key))
return key;
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 2e78f0cc7ef1..a4c47d2b7054 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -144,7 +144,7 @@ found_service:
/* look through the service's keyring */
kref = keyring_search(make_key_ref(rx->securities, 1UL),
- &key_type_rxrpc_s, kdesc);
+ &key_type_rxrpc_s, kdesc, true);
if (IS_ERR(kref)) {
read_unlock(&local->services_lock);
_leave(" = %ld [search]", PTR_ERR(kref));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 027a3b07d329..0004535c0188 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -211,9 +211,14 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
/* Save client advertised inbound read limit for use later in accept. */
newxprt->sc_ord = param->initiator_depth;
- /* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ /* The remote port is arbitrary and not under the control of the
+ * client ULP. Set it to a fixed value so that the DRC continues
+ * to be effective after a reconnect.
+ */
+ rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
+
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6310ddede220..578cce4fbe6c 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -213,12 +213,14 @@ config LIB80211
config LIB80211_CRYPT_WEP
tristate
+ select CRYPTO_LIB_ARC4
config LIB80211_CRYPT_CCMP
tristate
config LIB80211_CRYPT_TKIP
tristate
+ select CRYPTO_LIB_ARC4
config LIB80211_DEBUG
bool "lib80211 debugging messages"
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 62edf5b01953..f5e842ba7673 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/err.h>
+#include <linux/fips.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -25,6 +26,7 @@
#include <linux/ieee80211.h>
#include <net/iw_handler.h>
+#include <crypto/arc4.h>
#include <crypto/hash.h>
#include <linux/crypto.h>
#include <linux/crc32.h>
@@ -60,9 +62,9 @@ struct lib80211_tkip_data {
int key_idx;
- struct crypto_cipher *rx_tfm_arc4;
+ struct arc4_ctx rx_ctx_arc4;
+ struct arc4_ctx tx_ctx_arc4;
struct crypto_shash *rx_tfm_michael;
- struct crypto_cipher *tx_tfm_arc4;
struct crypto_shash *tx_tfm_michael;
/* scratch buffers for virt_to_page() (crypto API) */
@@ -89,30 +91,21 @@ static void *lib80211_tkip_init(int key_idx)
{
struct lib80211_tkip_data *priv;
+ if (fips_enabled)
+ return NULL;
+
priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
if (priv == NULL)
goto fail;
priv->key_idx = key_idx;
- priv->tx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(priv->tx_tfm_arc4)) {
- priv->tx_tfm_arc4 = NULL;
- goto fail;
- }
-
priv->tx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
if (IS_ERR(priv->tx_tfm_michael)) {
priv->tx_tfm_michael = NULL;
goto fail;
}
- priv->rx_tfm_arc4 = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(priv->rx_tfm_arc4)) {
- priv->rx_tfm_arc4 = NULL;
- goto fail;
- }
-
priv->rx_tfm_michael = crypto_alloc_shash("michael_mic", 0, 0);
if (IS_ERR(priv->rx_tfm_michael)) {
priv->rx_tfm_michael = NULL;
@@ -124,9 +117,7 @@ static void *lib80211_tkip_init(int key_idx)
fail:
if (priv) {
crypto_free_shash(priv->tx_tfm_michael);
- crypto_free_cipher(priv->tx_tfm_arc4);
crypto_free_shash(priv->rx_tfm_michael);
- crypto_free_cipher(priv->rx_tfm_arc4);
kfree(priv);
}
@@ -138,11 +129,9 @@ static void lib80211_tkip_deinit(void *priv)
struct lib80211_tkip_data *_priv = priv;
if (_priv) {
crypto_free_shash(_priv->tx_tfm_michael);
- crypto_free_cipher(_priv->tx_tfm_arc4);
crypto_free_shash(_priv->rx_tfm_michael);
- crypto_free_cipher(_priv->rx_tfm_arc4);
}
- kfree(priv);
+ kzfree(priv);
}
static inline u16 RotR1(u16 val)
@@ -341,7 +330,6 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
int len;
u8 rc4key[16], *pos, *icv;
u32 crc;
- int i;
if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -366,9 +354,9 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_cipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
- for (i = 0; i < len + 4; i++)
- crypto_cipher_encrypt_one(tkey->tx_tfm_arc4, pos + i, pos + i);
+ arc4_setkey(&tkey->tx_ctx_arc4, rc4key, 16);
+ arc4_crypt(&tkey->tx_ctx_arc4, pos, pos, len + 4);
+
return 0;
}
@@ -396,7 +384,6 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
u8 icv[4];
u32 crc;
int plen;
- int i;
hdr = (struct ieee80211_hdr *)skb->data;
@@ -449,9 +436,8 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
plen = skb->len - hdr_len - 12;
- crypto_cipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
- for (i = 0; i < plen + 4; i++)
- crypto_cipher_decrypt_one(tkey->rx_tfm_arc4, pos + i, pos + i);
+ arc4_setkey(&tkey->rx_ctx_arc4, rc4key, 16);
+ arc4_crypt(&tkey->rx_ctx_arc4, pos, pos, plen + 4);
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
@@ -636,17 +622,17 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
struct lib80211_tkip_data *tkey = priv;
int keyidx;
struct crypto_shash *tfm = tkey->tx_tfm_michael;
- struct crypto_cipher *tfm2 = tkey->tx_tfm_arc4;
+ struct arc4_ctx *tfm2 = &tkey->tx_ctx_arc4;
struct crypto_shash *tfm3 = tkey->rx_tfm_michael;
- struct crypto_cipher *tfm4 = tkey->rx_tfm_arc4;
+ struct arc4_ctx *tfm4 = &tkey->rx_ctx_arc4;
keyidx = tkey->key_idx;
memset(tkey, 0, sizeof(*tkey));
tkey->key_idx = keyidx;
tkey->tx_tfm_michael = tfm;
- tkey->tx_tfm_arc4 = tfm2;
+ tkey->tx_ctx_arc4 = *tfm2;
tkey->rx_tfm_michael = tfm3;
- tkey->rx_tfm_arc4 = tfm4;
+ tkey->rx_ctx_arc4 = *tfm4;
if (len == TKIP_KEY_LEN) {
memcpy(tkey->key, key, TKIP_KEY_LEN);
tkey->key_set = 1;
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index e127b6f7fc9f..dafc6f3571db 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -7,6 +7,7 @@
*/
#include <linux/err.h>
+#include <linux/fips.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -18,7 +19,7 @@
#include <net/lib80211.h>
-#include <linux/crypto.h>
+#include <crypto/arc4.h>
#include <linux/crc32.h>
MODULE_AUTHOR("Jouni Malinen");
@@ -31,52 +32,31 @@ struct lib80211_wep_data {
u8 key[WEP_KEY_LEN + 1];
u8 key_len;
u8 key_idx;
- struct crypto_cipher *tx_tfm;
- struct crypto_cipher *rx_tfm;
+ struct arc4_ctx tx_ctx;
+ struct arc4_ctx rx_ctx;
};
static void *lib80211_wep_init(int keyidx)
{
struct lib80211_wep_data *priv;
+ if (fips_enabled)
+ return NULL;
+
priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
if (priv == NULL)
- goto fail;
+ return NULL;
priv->key_idx = keyidx;
- priv->tx_tfm = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(priv->tx_tfm)) {
- priv->tx_tfm = NULL;
- goto fail;
- }
-
- priv->rx_tfm = crypto_alloc_cipher("arc4", 0, 0);
- if (IS_ERR(priv->rx_tfm)) {
- priv->rx_tfm = NULL;
- goto fail;
- }
/* start WEP IV from a random value */
get_random_bytes(&priv->iv, 4);
return priv;
-
- fail:
- if (priv) {
- crypto_free_cipher(priv->tx_tfm);
- crypto_free_cipher(priv->rx_tfm);
- kfree(priv);
- }
- return NULL;
}
static void lib80211_wep_deinit(void *priv)
{
- struct lib80211_wep_data *_priv = priv;
- if (_priv) {
- crypto_free_cipher(_priv->tx_tfm);
- crypto_free_cipher(_priv->rx_tfm);
- }
- kfree(priv);
+ kzfree(priv);
}
/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */
@@ -128,7 +108,6 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
u32 crc, klen, len;
u8 *pos, *icv;
u8 key[WEP_KEY_LEN + 3];
- int i;
/* other checks are in lib80211_wep_build_iv */
if (skb_tailroom(skb) < 4)
@@ -156,10 +135,8 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
icv[2] = crc >> 16;
icv[3] = crc >> 24;
- crypto_cipher_setkey(wep->tx_tfm, key, klen);
-
- for (i = 0; i < len + 4; i++)
- crypto_cipher_encrypt_one(wep->tx_tfm, pos + i, pos + i);
+ arc4_setkey(&wep->tx_ctx, key, klen);
+ arc4_crypt(&wep->tx_ctx, pos, pos, len + 4);
return 0;
}
@@ -177,7 +154,6 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
u32 crc, klen, plen;
u8 key[WEP_KEY_LEN + 3];
u8 keyidx, *pos, icv[4];
- int i;
if (skb->len < hdr_len + 8)
return -1;
@@ -198,9 +174,8 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
/* Apply RC4 to data and compute CRC32 over decrypted data */
plen = skb->len - hdr_len - 8;
- crypto_cipher_setkey(wep->rx_tfm, key, klen);
- for (i = 0; i < plen + 4; i++)
- crypto_cipher_decrypt_one(wep->rx_tfm, pos + i, pos + i);
+ arc4_setkey(&wep->rx_ctx, key, klen);
+ arc4_crypt(&wep->rx_ctx, pos, pos, plen + 4);
crc = ~crc32_le(~0, pos, plen);
icv[0] = crc;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4831ad745f91..298fe91557f7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -741,8 +741,7 @@ static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
"asymmetric", NULL, p, plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
+ &internal_key_acl,
KEY_ALLOC_NOT_IN_QUOTA |
KEY_ALLOC_BUILT_IN |
KEY_ALLOC_BYPASS_RESTRICTION);
@@ -768,8 +767,7 @@ static int __init load_builtin_regdb_keys(void)
builtin_regdb_keys =
keyring_alloc(".builtin_regdb_keys",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ &internal_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(builtin_regdb_keys))
return PTR_ERR(builtin_regdb_keys);
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 1da597aa6141..1a72b7d95cdc 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -34,7 +34,7 @@ static void simple_thread_func(int cnt)
/* Silly tracepoints */
trace_foo_bar("hello", cnt, array, random_strings[len],
- &current->cpus_allowed);
+ current->cpus_ptr);
trace_foo_with_template_simple("HELLO", cnt);
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
index cfa0c2f71c84..8378c63a1e09 100755
--- a/scripts/atomic/check-atomics.sh
+++ b/scripts/atomic/check-atomics.sh
@@ -22,7 +22,7 @@ while read header; do
OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
OLDSUM="${OLDSUM#// }"
- NEWSUM="$(head -n -1 ${LINUXDIR}/include/${header} | sha1sum)"
+ NEWSUM="$(sed '$d' ${LINUXDIR}/include/${header} | sha1sum)"
NEWSUM="${NEWSUM%% *}"
if [ "${OLDSUM}" != "${NEWSUM}" ]; then
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 068e93c5d29c..59f1cc2557a7 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -76,7 +76,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new)
AA_BUG(!orig);
AA_BUG(!new);
- lockdep_assert_held_exclusive(&labels_set(orig)->lock);
+ lockdep_assert_held_write(&labels_set(orig)->lock);
tmp = rcu_dereference_protected(orig->proxy->label,
&labels_ns(orig)->lock);
@@ -566,7 +566,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new)
AA_BUG(!ls);
AA_BUG(!label);
- lockdep_assert_held_exclusive(&ls->lock);
+ lockdep_assert_held_write(&ls->lock);
if (new)
__aa_proxy_redirect(label, new);
@@ -603,7 +603,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new)
AA_BUG(!ls);
AA_BUG(!old);
AA_BUG(!new);
- lockdep_assert_held_exclusive(&ls->lock);
+ lockdep_assert_held_write(&ls->lock);
AA_BUG(new->flags & FLAG_IN_TREE);
if (!label_is_stale(old))
@@ -640,7 +640,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls,
AA_BUG(!ls);
AA_BUG(!label);
AA_BUG(labels_set(label) != ls);
- lockdep_assert_held_exclusive(&ls->lock);
+ lockdep_assert_held_write(&ls->lock);
AA_BUG(label->flags & FLAG_IN_TREE);
/* Figure out where to put new node */
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index dc28914fa72e..c07196502577 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -509,7 +509,7 @@ static inline int may_allow_all(struct dev_cgroup *parent)
* This is one of the three key functions for hierarchy implementation.
* This function is responsible for re-evaluating all the cgroup's active
* exceptions due to a parent's exception change.
- * Refer to Documentation/cgroup-v1/devices.txt for more details.
+ * Refer to Documentation/cgroup-v1/devices.rst for more details.
*/
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index 4582bc26770a..f9f3c8ffe786 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -47,7 +47,8 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
if (!keyring[id]) {
keyring[id] =
- request_key(&key_type_keyring, keyring_name[id], NULL);
+ request_key(&key_type_keyring, keyring_name[id],
+ NULL, NULL);
if (IS_ERR(keyring[id])) {
int err = PTR_ERR(keyring[id]);
pr_err("no %s keyring: %d\n", keyring_name[id], err);
@@ -69,14 +70,15 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
return -EOPNOTSUPP;
}
-static int __integrity_init_keyring(const unsigned int id, key_perm_t perm,
- struct key_restriction *restriction)
+static int __init __integrity_init_keyring(const unsigned int id,
+ struct key_acl *acl,
+ struct key_restriction *restriction)
{
const struct cred *cred = current_cred();
int err = 0;
keyring[id] = keyring_alloc(keyring_name[id], KUIDT_INIT(0),
- KGIDT_INIT(0), cred, perm,
+ KGIDT_INIT(0), cred, acl,
KEY_ALLOC_NOT_IN_QUOTA, restriction, NULL);
if (IS_ERR(keyring[id])) {
err = PTR_ERR(keyring[id]);
@@ -94,10 +96,7 @@ static int __integrity_init_keyring(const unsigned int id, key_perm_t perm,
int __init integrity_init_keyring(const unsigned int id)
{
struct key_restriction *restriction;
- key_perm_t perm;
-
- perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW
- | KEY_USR_READ | KEY_USR_SEARCH;
+ struct key_acl *acl = &internal_keyring_acl;
if (id == INTEGRITY_KEYRING_PLATFORM) {
restriction = NULL;
@@ -112,14 +111,14 @@ int __init integrity_init_keyring(const unsigned int id)
return -ENOMEM;
restriction->check = restrict_link_to_ima;
- perm |= KEY_USR_WRITE;
+ acl = &internal_writable_keyring_acl;
out:
- return __integrity_init_keyring(id, perm, restriction);
+ return __integrity_init_keyring(id, acl, restriction);
}
-int __init integrity_add_key(const unsigned int id, const void *data,
- off_t size, key_perm_t perm)
+static int __init integrity_add_key(const unsigned int id, const void *data,
+ off_t size, struct key_acl *acl)
{
key_ref_t key;
int rc = 0;
@@ -128,7 +127,7 @@ int __init integrity_add_key(const unsigned int id, const void *data,
return -EINVAL;
key = key_create_or_update(make_key_ref(keyring[id], 1), "asymmetric",
- NULL, data, size, perm,
+ NULL, data, size, acl ?: &internal_key_acl,
KEY_ALLOC_NOT_IN_QUOTA);
if (IS_ERR(key)) {
rc = PTR_ERR(key);
@@ -148,7 +147,6 @@ int __init integrity_load_x509(const unsigned int id, const char *path)
void *data;
loff_t size;
int rc;
- key_perm_t perm;
rc = kernel_read_file_from_path(path, &data, &size, 0,
READING_X509_CERTIFICATE);
@@ -157,21 +155,19 @@ int __init integrity_load_x509(const unsigned int id, const char *path)
return rc;
}
- perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ;
-
pr_info("Loading X.509 certificate: %s\n", path);
- rc = integrity_add_key(id, (const void *)data, size, perm);
+ rc = integrity_add_key(id, data, size, NULL);
vfree(data);
return rc;
}
int __init integrity_load_cert(const unsigned int id, const char *source,
- const void *data, size_t len, key_perm_t perm)
+ const void *data, size_t len, struct key_acl *acl)
{
if (!data)
return -EINVAL;
pr_info("Loading X.509 certificate: %s\n", source);
- return integrity_add_key(id, data, len, perm);
+ return integrity_add_key(id, data, len, acl);
}
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index ad4b323ecea1..a29df775fdd8 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -35,7 +35,7 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid)
key_ref_t kref;
kref = keyring_search(make_key_ref(key, 1),
- &key_type_asymmetric, name);
+ &key_type_asymmetric, name, true);
if (!IS_ERR(kref)) {
pr_err("Key '%s' is in ima_blacklist_keyring\n", name);
return ERR_PTR(-EKEYREJECTED);
@@ -47,13 +47,13 @@ static struct key *request_asymmetric_key(struct key *keyring, uint32_t keyid)
key_ref_t kref;
kref = keyring_search(make_key_ref(keyring, 1),
- &key_type_asymmetric, name);
+ &key_type_asymmetric, name, true);
if (IS_ERR(kref))
key = ERR_CAST(kref);
else
key = key_ref_to_ptr(kref);
} else {
- key = request_key(&key_type_asymmetric, name, NULL);
+ key = request_key(&key_type_asymmetric, name, NULL, NULL);
}
if (IS_ERR(key)) {
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index d485f6fc908e..466eebd3b4aa 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -356,7 +356,7 @@ int evm_init_key(void)
struct encrypted_key_payload *ekp;
int rc;
- evm_key = request_key(&key_type_encrypted, EVMKEY, NULL);
+ evm_key = request_key(&key_type_encrypted, EVMKEY, NULL, NULL);
if (IS_ERR(evm_key))
return -ENOENT;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 5bbd8b4dc29a..f9a81b187fae 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -166,7 +166,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
/* check value type */
switch (xattr_data->type) {
case EVM_XATTR_HMAC:
- if (xattr_len != sizeof(struct evm_ima_xattr_data)) {
+ if (xattr_len != sizeof(struct evm_xattr)) {
evm_status = INTEGRITY_FAIL;
goto out;
}
@@ -176,7 +176,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
xattr_value_len, &digest);
if (rc)
break;
- rc = crypto_memneq(xattr_data->digest, digest.digest,
+ rc = crypto_memneq(xattr_data->data, digest.digest,
SHA1_DIGEST_SIZE);
if (rc)
rc = -EINVAL;
@@ -520,7 +520,7 @@ int evm_inode_init_security(struct inode *inode,
const struct xattr *lsm_xattr,
struct xattr *evm_xattr)
{
- struct evm_ima_xattr_data *xattr_data;
+ struct evm_xattr *xattr_data;
int rc;
if (!evm_key_loaded() || !evm_protected_xattr(lsm_xattr->name))
@@ -530,7 +530,7 @@ int evm_inode_init_security(struct inode *inode,
if (!xattr_data)
return -ENOMEM;
- xattr_data->type = EVM_XATTR_HMAC;
+ xattr_data->data.type = EVM_XATTR_HMAC;
rc = evm_init_hmac(inode, lsm_xattr, xattr_data->digest);
if (rc < 0)
goto out;
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 2692c7358c2c..2ced99dde694 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -160,7 +160,8 @@ config IMA_APPRAISE
config IMA_ARCH_POLICY
bool "Enable loading an IMA architecture specific policy"
- depends on KEXEC_VERIFY_SIG || IMA_APPRAISE && INTEGRITY_ASYMMETRIC_KEYS
+ depends on (KEXEC_VERIFY_SIG && IMA) || IMA_APPRAISE \
+ && INTEGRITY_ASYMMETRIC_KEYS
default n
help
This option enables loading an IMA architecture specific policy
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index ca10917b5f89..011b91c79351 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -61,6 +61,8 @@ struct ima_event_data {
struct evm_ima_xattr_data *xattr_value;
int xattr_len;
const char *violation;
+ const void *buf;
+ int buf_len;
};
/* IMA template field data definition */
@@ -142,7 +144,11 @@ void ima_add_violation(struct file *file, const unsigned char *filename,
int ima_init_crypto(void);
void ima_putc(struct seq_file *m, void *data, int datalen);
void ima_print_digest(struct seq_file *m, u8 *digest, u32 size);
+int template_desc_init_fields(const char *template_fmt,
+ const struct ima_template_field ***fields,
+ int *num_fields);
struct ima_template_desc *ima_template_desc_current(void);
+struct ima_template_desc *lookup_template_desc(const char *name);
int ima_restore_measurement_entry(struct ima_template_entry *entry);
int ima_restore_measurement_list(loff_t bufsize, void *buf);
int ima_measurements_show(struct seq_file *m, void *v);
@@ -150,6 +156,8 @@ unsigned long ima_get_binary_runtime_size(void);
int ima_init_template(void);
void ima_init_template_list(void);
int __init ima_init_digests(void);
+int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data);
/*
* used to protect h_table and sha_table
@@ -180,6 +188,7 @@ static inline unsigned long ima_hash_key(u8 *digest)
hook(KEXEC_KERNEL_CHECK) \
hook(KEXEC_INITRAMFS_CHECK) \
hook(POLICY_CHECK) \
+ hook(KEXEC_CMDLINE) \
hook(MAX_CHECK)
#define __ima_hook_enumify(ENUM) ENUM,
@@ -189,7 +198,8 @@ enum ima_hooks {
/* LIM API function definitions */
int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
- int mask, enum ima_hooks func, int *pcr);
+ int mask, enum ima_hooks func, int *pcr,
+ struct ima_template_desc **template_desc);
int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
int ima_collect_measurement(struct integrity_iint_cache *iint,
struct file *file, void *buf, loff_t size,
@@ -197,11 +207,13 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
- int xattr_len, int pcr);
+ int xattr_len, int pcr,
+ struct ima_template_desc *template_desc);
void ima_audit_measurement(struct integrity_iint_cache *iint,
const unsigned char *filename);
int ima_alloc_init_template(struct ima_event_data *event_data,
- struct ima_template_entry **entry);
+ struct ima_template_entry **entry,
+ struct ima_template_desc *template_desc);
int ima_store_template(struct ima_template_entry *entry, int violation,
struct inode *inode,
const unsigned char *filename, int pcr);
@@ -210,7 +222,8 @@ const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);
/* IMA policy related functions */
int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
- enum ima_hooks func, int mask, int flags, int *pcr);
+ enum ima_hooks func, int mask, int flags, int *pcr,
+ struct ima_template_desc **template_desc);
void ima_init_policy(void);
void ima_update_policy(void);
void ima_update_policy_flag(void);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 35c129cbb7e9..f614e22bf39f 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -34,11 +34,17 @@ void ima_free_template_entry(struct ima_template_entry *entry)
* ima_alloc_init_template - create and initialize a new template entry
*/
int ima_alloc_init_template(struct ima_event_data *event_data,
- struct ima_template_entry **entry)
+ struct ima_template_entry **entry,
+ struct ima_template_desc *desc)
{
- struct ima_template_desc *template_desc = ima_template_desc_current();
+ struct ima_template_desc *template_desc;
int i, result = 0;
+ if (desc)
+ template_desc = desc;
+ else
+ template_desc = ima_template_desc_current();
+
*entry = kzalloc(sizeof(**entry) + template_desc->num_fields *
sizeof(struct ima_field_data), GFP_NOFS);
if (!*entry)
@@ -129,15 +135,17 @@ void ima_add_violation(struct file *file, const unsigned char *filename,
{
struct ima_template_entry *entry;
struct inode *inode = file_inode(file);
- struct ima_event_data event_data = {iint, file, filename, NULL, 0,
- cause};
+ struct ima_event_data event_data = { .iint = iint,
+ .file = file,
+ .filename = filename,
+ .violation = cause };
int violation = 1;
int result;
/* can overflow, only indicator */
atomic_long_inc(&ima_htable.violations);
- result = ima_alloc_init_template(&event_data, &entry);
+ result = ima_alloc_init_template(&event_data, &entry, NULL);
if (result < 0) {
result = -ENOMEM;
goto err_out;
@@ -160,11 +168,13 @@ err_out:
* MAY_APPEND)
* @func: caller identifier
* @pcr: pointer filled in if matched measure policy sets pcr=
+ * @template_desc: pointer filled in if matched measure policy sets template=
*
* The policy is defined in terms of keypairs:
* subj=, obj=, type=, func=, mask=, fsmagic=
* subj,obj, and type: are LSM specific.
* func: FILE_CHECK | BPRM_CHECK | CREDS_CHECK | MMAP_CHECK | MODULE_CHECK
+ * | KEXEC_CMDLINE
* mask: contains the permission mask
* fsmagic: hex value
*
@@ -172,13 +182,15 @@ err_out:
*
*/
int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
- int mask, enum ima_hooks func, int *pcr)
+ int mask, enum ima_hooks func, int *pcr,
+ struct ima_template_desc **template_desc)
{
int flags = IMA_MEASURE | IMA_AUDIT | IMA_APPRAISE | IMA_HASH;
flags &= ima_policy_flag;
- return ima_match_policy(inode, cred, secid, func, mask, flags, pcr);
+ return ima_match_policy(inode, cred, secid, func, mask, flags, pcr,
+ template_desc);
}
/*
@@ -273,21 +285,25 @@ out:
void ima_store_measurement(struct integrity_iint_cache *iint,
struct file *file, const unsigned char *filename,
struct evm_ima_xattr_data *xattr_value,
- int xattr_len, int pcr)
+ int xattr_len, int pcr,
+ struct ima_template_desc *template_desc)
{
static const char op[] = "add_template_measure";
static const char audit_cause[] = "ENOMEM";
int result = -ENOMEM;
struct inode *inode = file_inode(file);
struct ima_template_entry *entry;
- struct ima_event_data event_data = {iint, file, filename, xattr_value,
- xattr_len, NULL};
+ struct ima_event_data event_data = { .iint = iint,
+ .file = file,
+ .filename = filename,
+ .xattr_value = xattr_value,
+ .xattr_len = xattr_len };
int violation = 0;
if (iint->measured_pcrs & (0x1 << pcr))
return;
- result = ima_alloc_init_template(&event_data, &entry);
+ result = ima_alloc_init_template(&event_data, &entry, template_desc);
if (result < 0) {
integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename,
op, audit_cause, result, 0);
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index f0cd67cab6aa..89b83194d1dc 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -54,7 +54,7 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
security_task_getsecid(current, &secid);
return ima_match_policy(inode, current_cred(), secid, func, mask,
- IMA_APPRAISE | IMA_HASH, NULL);
+ IMA_APPRAISE | IMA_HASH, NULL, NULL);
}
static int ima_fix_xattr(struct dentry *dentry,
@@ -165,7 +165,8 @@ enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
return sig->hash_algo;
break;
case IMA_XATTR_DIGEST_NG:
- ret = xattr_value->digest[0];
+ /* first byte contains algorithm id */
+ ret = xattr_value->data[0];
if (ret < HASH_ALGO__LAST)
return ret;
break;
@@ -173,7 +174,7 @@ enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
/* this is for backward compatibility */
if (xattr_len == 21) {
unsigned int zero = 0;
- if (!memcmp(&xattr_value->digest[16], &zero, 4))
+ if (!memcmp(&xattr_value->data[16], &zero, 4))
return HASH_ALGO_MD5;
else
return HASH_ALGO_SHA1;
@@ -272,7 +273,7 @@ int ima_appraise_measurement(enum ima_hooks func,
/* xattr length may be longer. md5 hash in previous
version occupied 20 bytes in xattr, instead of 16
*/
- rc = memcmp(&xattr_value->digest[hash_start],
+ rc = memcmp(&xattr_value->data[hash_start],
iint->ima_hash->digest,
iint->ima_hash->length);
else
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 1e47c1026471..5d55ade5f3b9 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -45,8 +45,8 @@ static int __init ima_add_boot_aggregate(void)
const char *audit_cause = "ENOMEM";
struct ima_template_entry *entry;
struct integrity_iint_cache tmp_iint, *iint = &tmp_iint;
- struct ima_event_data event_data = {iint, NULL, boot_aggregate_name,
- NULL, 0, NULL};
+ struct ima_event_data event_data = { .iint = iint,
+ .filename = boot_aggregate_name };
int result = -ENOMEM;
int violation = 0;
struct {
@@ -68,7 +68,7 @@ static int __init ima_add_boot_aggregate(void)
}
}
- result = ima_alloc_init_template(&event_data, &entry);
+ result = ima_alloc_init_template(&event_data, &entry, NULL);
if (result < 0) {
audit_cause = "alloc_entry";
goto err_out;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f556e6c18f9b..584019728660 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -39,6 +39,10 @@ int ima_appraise;
int ima_hash_algo = HASH_ALGO_SHA1;
static int hash_setup_done;
+static struct notifier_block ima_lsm_policy_notifier = {
+ .notifier_call = ima_lsm_policy_change,
+};
+
static int __init hash_setup(char *str)
{
struct ima_template_desc *template_desc = ima_template_desc_current();
@@ -68,6 +72,27 @@ out:
}
__setup("ima_hash=", hash_setup);
+/* Prevent mmap'ing a file execute that is already mmap'ed write */
+static int mmap_violation_check(enum ima_hooks func, struct file *file,
+ char **pathbuf, const char **pathname,
+ char *filename)
+{
+ struct inode *inode;
+ int rc = 0;
+
+ if ((func == MMAP_CHECK) && mapping_writably_mapped(file->f_mapping)) {
+ rc = -ETXTBSY;
+ inode = file_inode(file);
+
+ if (!*pathbuf) /* ima_rdwr_violation possibly pre-fetched */
+ *pathname = ima_d_path(&file->f_path, pathbuf,
+ filename);
+ integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, *pathname,
+ "mmap_file", "mmapped_writers", rc, 0);
+ }
+ return rc;
+}
+
/*
* ima_rdwr_violation_check
*
@@ -170,7 +195,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
{
struct inode *inode = file_inode(file);
struct integrity_iint_cache *iint = NULL;
- struct ima_template_desc *template_desc;
+ struct ima_template_desc *template_desc = NULL;
char *pathbuf = NULL;
char filename[NAME_MAX];
const char *pathname = NULL;
@@ -188,7 +213,8 @@ static int process_measurement(struct file *file, const struct cred *cred,
* bitmask based on the appraise/audit/measurement policy.
* Included is the appraise submask.
*/
- action = ima_get_action(inode, cred, secid, mask, func, &pcr);
+ action = ima_get_action(inode, cred, secid, mask, func, &pcr,
+ &template_desc);
violation_check = ((func == FILE_CHECK || func == MMAP_CHECK) &&
(ima_policy_flag & IMA_MEASURE));
if (!action && !violation_check)
@@ -266,12 +292,15 @@ static int process_measurement(struct file *file, const struct cred *cred,
/* Nothing to do, just return existing appraised status */
if (!action) {
- if (must_appraise)
- rc = ima_get_cache_status(iint, func);
+ if (must_appraise) {
+ rc = mmap_violation_check(func, file, &pathbuf,
+ &pathname, filename);
+ if (!rc)
+ rc = ima_get_cache_status(iint, func);
+ }
goto out_locked;
}
- template_desc = ima_template_desc_current();
if ((action & IMA_APPRAISE_SUBMASK) ||
strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0)
/* read 'security.ima' */
@@ -288,12 +317,16 @@ static int process_measurement(struct file *file, const struct cred *cred,
if (action & IMA_MEASURE)
ima_store_measurement(iint, file, pathname,
- xattr_value, xattr_len, pcr);
+ xattr_value, xattr_len, pcr,
+ template_desc);
if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
inode_lock(inode);
rc = ima_appraise_measurement(func, iint, file, pathname,
xattr_value, xattr_len);
inode_unlock(inode);
+ if (!rc)
+ rc = mmap_violation_check(func, file, &pathbuf,
+ &pathname, filename);
}
if (action & IMA_AUDIT)
ima_audit_measurement(iint, pathname);
@@ -572,6 +605,80 @@ int ima_load_data(enum kernel_load_data_id id)
return 0;
}
+/*
+ * process_buffer_measurement - Measure the buffer to ima log.
+ * @buf: pointer to the buffer that needs to be added to the log.
+ * @size: size of buffer(in bytes).
+ * @eventname: event name to be used for the buffer entry.
+ * @cred: a pointer to a credentials structure for user validation.
+ * @secid: the secid of the task to be validated.
+ *
+ * Based on policy, the buffer is measured into the ima log.
+ */
+static void process_buffer_measurement(const void *buf, int size,
+ const char *eventname,
+ const struct cred *cred, u32 secid)
+{
+ int ret = 0;
+ struct ima_template_entry *entry = NULL;
+ struct integrity_iint_cache iint = {};
+ struct ima_event_data event_data = {.iint = &iint,
+ .filename = eventname,
+ .buf = buf,
+ .buf_len = size};
+ struct ima_template_desc *template_desc = NULL;
+ struct {
+ struct ima_digest_data hdr;
+ char digest[IMA_MAX_DIGEST_SIZE];
+ } hash = {};
+ int violation = 0;
+ int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+ int action = 0;
+
+ action = ima_get_action(NULL, cred, secid, 0, KEXEC_CMDLINE, &pcr,
+ &template_desc);
+ if (!(action & IMA_MEASURE))
+ return;
+
+ iint.ima_hash = &hash.hdr;
+ iint.ima_hash->algo = ima_hash_algo;
+ iint.ima_hash->length = hash_digest_size[ima_hash_algo];
+
+ ret = ima_calc_buffer_hash(buf, size, iint.ima_hash);
+ if (ret < 0)
+ goto out;
+
+ ret = ima_alloc_init_template(&event_data, &entry, template_desc);
+ if (ret < 0)
+ goto out;
+
+ ret = ima_store_template(entry, violation, NULL, buf, pcr);
+
+ if (ret < 0)
+ ima_free_template_entry(entry);
+
+out:
+ return;
+}
+
+/**
+ * ima_kexec_cmdline - measure kexec cmdline boot args
+ * @buf: pointer to buffer
+ * @size: size of buffer
+ *
+ * Buffers can only be measured, not appraised.
+ */
+void ima_kexec_cmdline(const void *buf, int size)
+{
+ u32 secid;
+
+ if (buf && size != 0) {
+ security_task_getsecid(current, &secid);
+ process_buffer_measurement(buf, size, "kexec-cmdline",
+ current_cred(), secid);
+ }
+}
+
static int __init init_ima(void)
{
int error;
@@ -589,6 +696,10 @@ static int __init init_ima(void)
error = ima_init();
}
+ error = register_blocking_lsm_notifier(&ima_lsm_policy_notifier);
+ if (error)
+ pr_warn("Couldn't register LSM notifier, error %d\n", error);
+
if (!error)
ima_update_policy_flag();
diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c
index 36cadadbfba4..b52ae1476ec3 100644
--- a/security/integrity/ima/ima_mok.c
+++ b/security/integrity/ima/ima_mok.c
@@ -16,6 +16,15 @@
#include <keys/system_keyring.h>
+static struct key_acl integrity_blacklist_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE | KEY_ACE_SEARCH),
+ }
+};
+
struct key *ima_blacklist_keyring;
/*
@@ -35,9 +44,7 @@ __init int ima_mok_init(void)
ima_blacklist_keyring = keyring_alloc(".ima_blacklist",
KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
- (KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ |
- KEY_USR_WRITE | KEY_USR_SEARCH,
+ &integrity_blacklist_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA,
restriction, NULL);
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 7b53f2ca58e2..6df7f641ff66 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -76,6 +76,7 @@ struct ima_rule_entry {
int type; /* audit type */
} lsm[MAX_LSM_RULES];
char *fsname;
+ struct ima_template_desc *template;
};
/*
@@ -195,7 +196,7 @@ static struct ima_rule_entry secure_boot_rules[] __ro_after_init = {
};
/* An array of architecture specific rules */
-struct ima_rule_entry *arch_policy_entry __ro_after_init;
+static struct ima_rule_entry *arch_policy_entry __ro_after_init;
static LIST_HEAD(ima_default_rules);
static LIST_HEAD(ima_policy_rules);
@@ -245,31 +246,113 @@ static int __init default_appraise_policy_setup(char *str)
}
__setup("ima_appraise_tcb", default_appraise_policy_setup);
+static void ima_lsm_free_rule(struct ima_rule_entry *entry)
+{
+ int i;
+
+ for (i = 0; i < MAX_LSM_RULES; i++) {
+ kfree(entry->lsm[i].rule);
+ kfree(entry->lsm[i].args_p);
+ }
+ kfree(entry);
+}
+
+static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
+{
+ struct ima_rule_entry *nentry;
+ int i, result;
+
+ nentry = kmalloc(sizeof(*nentry), GFP_KERNEL);
+ if (!nentry)
+ return NULL;
+
+ /*
+ * Immutable elements are copied over as pointers and data; only
+ * lsm rules can change
+ */
+ memcpy(nentry, entry, sizeof(*nentry));
+ memset(nentry->lsm, 0, FIELD_SIZEOF(struct ima_rule_entry, lsm));
+
+ for (i = 0; i < MAX_LSM_RULES; i++) {
+ if (!entry->lsm[i].rule)
+ continue;
+
+ nentry->lsm[i].type = entry->lsm[i].type;
+ nentry->lsm[i].args_p = kstrdup(entry->lsm[i].args_p,
+ GFP_KERNEL);
+ if (!nentry->lsm[i].args_p)
+ goto out_err;
+
+ result = security_filter_rule_init(nentry->lsm[i].type,
+ Audit_equal,
+ nentry->lsm[i].args_p,
+ &nentry->lsm[i].rule);
+ if (result == -EINVAL)
+ pr_warn("ima: rule for LSM \'%d\' is undefined\n",
+ entry->lsm[i].type);
+ }
+ return nentry;
+
+out_err:
+ ima_lsm_free_rule(nentry);
+ return NULL;
+}
+
+static int ima_lsm_update_rule(struct ima_rule_entry *entry)
+{
+ struct ima_rule_entry *nentry;
+
+ nentry = ima_lsm_copy_rule(entry);
+ if (!nentry)
+ return -ENOMEM;
+
+ list_replace_rcu(&entry->list, &nentry->list);
+ synchronize_rcu();
+ ima_lsm_free_rule(entry);
+
+ return 0;
+}
+
/*
* The LSM policy can be reloaded, leaving the IMA LSM based rules referring
* to the old, stale LSM policy. Update the IMA LSM based rules to reflect
- * the reloaded LSM policy. We assume the rules still exist; and BUG_ON() if
- * they don't.
+ * the reloaded LSM policy.
*/
static void ima_lsm_update_rules(void)
{
- struct ima_rule_entry *entry;
- int result;
- int i;
+ struct ima_rule_entry *entry, *e;
+ int i, result, needs_update;
- list_for_each_entry(entry, &ima_policy_rules, list) {
+ list_for_each_entry_safe(entry, e, &ima_policy_rules, list) {
+ needs_update = 0;
for (i = 0; i < MAX_LSM_RULES; i++) {
- if (!entry->lsm[i].rule)
- continue;
- result = security_filter_rule_init(entry->lsm[i].type,
- Audit_equal,
- entry->lsm[i].args_p,
- &entry->lsm[i].rule);
- BUG_ON(!entry->lsm[i].rule);
+ if (entry->lsm[i].rule) {
+ needs_update = 1;
+ break;
+ }
+ }
+ if (!needs_update)
+ continue;
+
+ result = ima_lsm_update_rule(entry);
+ if (result) {
+ pr_err("ima: lsm rule update error %d\n",
+ result);
+ return;
}
}
}
+int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data)
+{
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ ima_lsm_update_rules();
+ return NOTIFY_OK;
+}
+
/**
* ima_match_rules - determine whether an inode matches the measure rule.
* @rule: a pointer to a rule
@@ -287,6 +370,11 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
{
int i;
+ if (func == KEXEC_CMDLINE) {
+ if ((rule->flags & IMA_FUNC) && (rule->func == func))
+ return true;
+ return false;
+ }
if ((rule->flags & IMA_FUNC) &&
(rule->func != func && func != POST_SETATTR))
return false;
@@ -323,11 +411,10 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
for (i = 0; i < MAX_LSM_RULES; i++) {
int rc = 0;
u32 osid;
- int retried = 0;
if (!rule->lsm[i].rule)
continue;
-retry:
+
switch (i) {
case LSM_OBJ_USER:
case LSM_OBJ_ROLE:
@@ -348,11 +435,6 @@ retry:
default:
break;
}
- if ((rc < 0) && (!retried)) {
- retried = 1;
- ima_lsm_update_rules();
- goto retry;
- }
if (!rc)
return false;
}
@@ -393,6 +475,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
* @func: IMA hook identifier
* @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
* @pcr: set the pcr to extend
+ * @template_desc: the template that should be used for this rule
*
* Measure decision based on func/mask/fsmagic and LSM(subj/obj/type)
* conditions.
@@ -402,7 +485,8 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
* than writes so ima_match_policy() is classical RCU candidate.
*/
int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
- enum ima_hooks func, int mask, int flags, int *pcr)
+ enum ima_hooks func, int mask, int flags, int *pcr,
+ struct ima_template_desc **template_desc)
{
struct ima_rule_entry *entry;
int action = 0, actmask = flags | (flags << 1);
@@ -434,6 +518,11 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
if ((pcr) && (entry->flags & IMA_PCR))
*pcr = entry->pcr;
+ if (template_desc && entry->template)
+ *template_desc = entry->template;
+ else if (template_desc)
+ *template_desc = ima_template_desc_current();
+
if (!actmask)
break;
}
@@ -672,7 +761,7 @@ enum {
Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt,
Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt,
Opt_appraise_type, Opt_permit_directio,
- Opt_pcr, Opt_err
+ Opt_pcr, Opt_template, Opt_err
};
static const match_table_t policy_tokens = {
@@ -706,6 +795,7 @@ static const match_table_t policy_tokens = {
{Opt_appraise_type, "appraise_type=%s"},
{Opt_permit_directio, "permit_directio"},
{Opt_pcr, "pcr=%s"},
+ {Opt_template, "template=%s"},
{Opt_err, NULL}
};
@@ -759,6 +849,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
char *from;
char *p;
bool uid_token;
+ struct ima_template_desc *template_desc;
int result = 0;
ab = integrity_audit_log_start(audit_context(), GFP_KERNEL,
@@ -866,6 +957,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
entry->func = KEXEC_INITRAMFS_CHECK;
else if (strcmp(args[0].from, "POLICY_CHECK") == 0)
entry->func = POLICY_CHECK;
+ else if (strcmp(args[0].from, "KEXEC_CMDLINE") == 0)
+ entry->func = KEXEC_CMDLINE;
else
result = -EINVAL;
if (!result)
@@ -1055,6 +1148,28 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
entry->flags |= IMA_PCR;
break;
+ case Opt_template:
+ ima_log_string(ab, "template", args[0].from);
+ if (entry->action != MEASURE) {
+ result = -EINVAL;
+ break;
+ }
+ template_desc = lookup_template_desc(args[0].from);
+ if (!template_desc || entry->template) {
+ result = -EINVAL;
+ break;
+ }
+
+ /*
+ * template_desc_init_fields() does nothing if
+ * the template is already initialised, so
+ * it's safe to do this unconditionally
+ */
+ template_desc_init_fields(template_desc->fmt,
+ &(template_desc->fields),
+ &(template_desc->num_fields));
+ entry->template = template_desc;
+ break;
case Opt_err:
ima_log_string(ab, "UNKNOWN", p);
result = -EINVAL;
@@ -1330,6 +1445,8 @@ int ima_policy_show(struct seq_file *m, void *v)
}
}
}
+ if (entry->template)
+ seq_printf(m, "template=%s ", entry->template->name);
if (entry->flags & IMA_DIGSIG_REQUIRED)
seq_puts(m, "appraise_type=imasig ");
if (entry->flags & IMA_PERMIT_DIRECTIO)
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index f4354c267396..cb349d7b2601 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -22,6 +22,7 @@ static struct ima_template_desc builtin_templates[] = {
{.name = IMA_TEMPLATE_IMA_NAME, .fmt = IMA_TEMPLATE_IMA_FMT},
{.name = "ima-ng", .fmt = "d-ng|n-ng"},
{.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
+ {.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
{.name = "", .fmt = ""}, /* placeholder for a custom format */
};
@@ -39,14 +40,18 @@ static const struct ima_template_field supported_fields[] = {
.field_show = ima_show_template_string},
{.field_id = "sig", .field_init = ima_eventsig_init,
.field_show = ima_show_template_sig},
+ {.field_id = "buf", .field_init = ima_eventbuf_init,
+ .field_show = ima_show_template_buf},
};
-#define MAX_TEMPLATE_NAME_LEN 15
+
+/*
+ * Used when restoring measurements carried over from a kexec. 'd' and 'n' don't
+ * need to be accounted for since they shouldn't be defined in the same template
+ * description as 'd-ng' and 'n-ng' respectively.
+ */
+#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf")
static struct ima_template_desc *ima_template;
-static struct ima_template_desc *lookup_template_desc(const char *name);
-static int template_desc_init_fields(const char *template_fmt,
- const struct ima_template_field ***fields,
- int *num_fields);
static int __init ima_template_setup(char *str)
{
@@ -104,7 +109,7 @@ static int __init ima_template_fmt_setup(char *str)
}
__setup("ima_template_fmt=", ima_template_fmt_setup);
-static struct ima_template_desc *lookup_template_desc(const char *name)
+struct ima_template_desc *lookup_template_desc(const char *name)
{
struct ima_template_desc *template_desc;
int found = 0;
@@ -149,9 +154,9 @@ static int template_fmt_size(const char *template_fmt)
return j + 1;
}
-static int template_desc_init_fields(const char *template_fmt,
- const struct ima_template_field ***fields,
- int *num_fields)
+int template_desc_init_fields(const char *template_fmt,
+ const struct ima_template_field ***fields,
+ int *num_fields)
{
const char *template_fmt_ptr;
const struct ima_template_field *found_fields[IMA_TEMPLATE_NUM_FIELDS_MAX];
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 9fe0ef7f91e2..2fb9a10bc6b7 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -158,6 +158,12 @@ void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
}
+void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data)
+{
+ ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
+}
+
/**
* ima_parse_buf() - Parses lengths and data from an input buffer
* @bufstartp: Buffer start address.
@@ -385,3 +391,18 @@ int ima_eventsig_init(struct ima_event_data *event_data,
return ima_write_template_field_data(xattr_value, event_data->xattr_len,
DATA_FMT_HEX, field_data);
}
+
+/*
+ * ima_eventbuf_init - include the buffer(kexec-cmldine) as part of the
+ * template data.
+ */
+int ima_eventbuf_init(struct ima_event_data *event_data,
+ struct ima_field_data *field_data)
+{
+ if ((!event_data->buf) || (event_data->buf_len == 0))
+ return 0;
+
+ return ima_write_template_field_data(event_data->buf,
+ event_data->buf_len, DATA_FMT_HEX,
+ field_data);
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index e515955456a3..652aa5de81ef 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -25,6 +25,8 @@ void ima_show_template_string(struct seq_file *m, enum ima_show_type show,
struct ima_field_data *field_data);
void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
struct ima_field_data *field_data);
+void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
+ struct ima_field_data *field_data);
int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
int maxfields, struct ima_field_data *fields, int *curfields,
unsigned long *len_mask, int enforce_mask, char *bufname);
@@ -38,4 +40,6 @@ int ima_eventname_ng_init(struct ima_event_data *event_data,
struct ima_field_data *field_data);
int ima_eventsig_init(struct ima_event_data *event_data,
struct ima_field_data *field_data);
+int ima_eventbuf_init(struct ima_event_data *event_data,
+ struct ima_field_data *field_data);
#endif /* __LINUX_IMA_TEMPLATE_LIB_H */
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 65377848fbc5..875c6a7a5af1 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -12,6 +12,8 @@
#include <linux/key.h>
#include <linux/audit.h>
+struct key_acl;
+
/* iint action cache flags */
#define IMA_MEASURE 0x00000001
#define IMA_MEASURED 0x00000002
@@ -74,6 +76,12 @@ enum evm_ima_xattr_type {
struct evm_ima_xattr_data {
u8 type;
+ u8 data[];
+} __packed;
+
+/* Only used in the EVM HMAC code. */
+struct evm_xattr {
+ struct evm_ima_xattr_data data;
u8 digest[SHA1_DIGEST_SIZE];
} __packed;
@@ -149,7 +157,7 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen,
int __init integrity_init_keyring(const unsigned int id);
int __init integrity_load_x509(const unsigned int id, const char *path);
int __init integrity_load_cert(const unsigned int id, const char *source,
- const void *data, size_t len, key_perm_t perm);
+ const void *data, size_t len, struct key_acl *acl);
#else
static inline int integrity_digsig_verify(const unsigned int id,
@@ -167,7 +175,7 @@ static inline int integrity_init_keyring(const unsigned int id)
static inline int __init integrity_load_cert(const unsigned int id,
const char *source,
const void *data, size_t len,
- key_perm_t perm)
+ struct key_acl *acl)
{
return 0;
}
diff --git a/security/integrity/platform_certs/platform_keyring.c b/security/integrity/platform_certs/platform_keyring.c
index bcafd7387729..7646e35f2d91 100644
--- a/security/integrity/platform_certs/platform_keyring.c
+++ b/security/integrity/platform_certs/platform_keyring.c
@@ -14,6 +14,15 @@
#include <linux/slab.h>
#include "../integrity.h"
+static struct key_acl platform_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_READ),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
/**
* add_to_platform_keyring - Add to platform keyring without validation.
* @source: Source of key
@@ -26,13 +35,10 @@
void __init add_to_platform_keyring(const char *source, const void *data,
size_t len)
{
- key_perm_t perm;
int rc;
- perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW;
-
rc = integrity_load_cert(INTEGRITY_KEYRING_PLATFORM, source, data, len,
- perm);
+ &platform_key_acl);
if (rc)
pr_info("Error adding keys to platform keyring %s\n", source);
}
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index ee502e4d390b..dd313438fecf 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -25,6 +25,24 @@ config KEYS_COMPAT
def_bool y
depends on COMPAT && KEYS
+config KEYS_REQUEST_CACHE
+ bool "Enable temporary caching of the last request_key() result"
+ depends on KEYS
+ help
+ This option causes the result of the last successful request_key()
+ call that didn't upcall to the kernel to be cached temporarily in the
+ task_struct. The cache is cleared by exit and just prior to the
+ resumption of userspace.
+
+ This allows the key used for multiple step processes where each step
+ wants to request a key that is likely the same as the one requested
+ by the last step to save on the searching.
+
+ An example of such a process is a pathwalk through a network
+ filesystem in which each method needs to request an authentication
+ key. Pathwalk will call multiple methods for each dentry traversed
+ (permission, d_revalidate, lookup, getxattr, getacl, ...).
+
config PERSISTENT_KEYRINGS
bool "Enable register of persistent per-UID keyrings"
depends on KEYS
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 35ce47ce2285..b0e59546e7bd 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -155,6 +155,14 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3),
compat_ptr(arg4), compat_ptr(arg5));
+ case KEYCTL_MOVE:
+ return keyctl_keyring_move(arg2, arg3, arg4, arg5);
+ case KEYCTL_GRANT_PERMISSION:
+ return keyctl_grant_permission(arg2, arg3, arg4, arg5);
+
+ case KEYCTL_CAPABILITIES:
+ return keyctl_capabilities(compat_ptr(arg2), arg3);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 60720f58cbe0..9df560e477c2 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -304,7 +304,7 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
const struct user_key_payload *upayload;
struct key *ukey;
- ukey = request_key(&key_type_user, master_desc, NULL);
+ ukey = request_key(&key_type_user, master_desc, NULL, NULL);
if (IS_ERR(ukey))
goto error;
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index c68528aa49c6..d649f2f29475 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -30,7 +30,7 @@ struct key *request_trusted_key(const char *trusted_desc,
struct trusted_key_payload *tpayload;
struct key *tkey;
- tkey = request_key(&key_type_trusted, trusted_desc, NULL);
+ tkey = request_key(&key_type_trusted, trusted_desc, NULL, NULL);
if (IS_ERR(tkey))
goto error;
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 44e58a3e5663..48c3e124c272 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -150,7 +150,8 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
atomic_dec(&key->user->nikeys);
key_user_put(key->user);
-
+ key_put_tag(key->domain_tag);
+ key_put_acl(rcu_access_pointer(key->acl));
kfree(key->description);
memzero_explicit(key, sizeof(*key));
@@ -220,7 +221,6 @@ continue_scanning:
if (key->type == key_gc_dead_keytype) {
gc_state |= KEY_GC_FOUND_DEAD_KEY;
set_bit(KEY_FLAG_DEAD, &key->flags);
- key->perm = 0;
goto skip_dead_key;
} else if (key->type == &key_type_keyring &&
key->restrict_link) {
diff --git a/security/keys/internal.h b/security/keys/internal.h
index d59bc25a9249..e0c5bb8b1685 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -84,11 +84,18 @@ extern struct rb_root key_serial_tree;
extern spinlock_t key_serial_lock;
extern struct mutex key_construction_mutex;
extern wait_queue_head_t request_key_conswq;
+extern struct key_acl default_key_acl;
+extern struct key_acl joinable_keyring_acl;
+extern void key_set_index_key(struct keyring_index_key *index_key);
extern struct key_type *key_type_lookup(const char *type);
extern void key_type_put(struct key_type *ktype);
+extern int __key_link_lock(struct key *keyring,
+ const struct keyring_index_key *index_key);
+extern int __key_move_lock(struct key *l_keyring, struct key *u_keyring,
+ const struct keyring_index_key *index_key);
extern int __key_link_begin(struct key *keyring,
const struct keyring_index_key *index_key,
struct assoc_array_edit **_edit);
@@ -119,6 +126,7 @@ struct keyring_search_context {
#define KEYRING_SEARCH_NO_CHECK_PERM 0x0008 /* Don't check permissions */
#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0010 /* Give an error on excessive depth */
#define KEYRING_SEARCH_SKIP_EXPIRED 0x0020 /* Ignore expired keys (intention to replace) */
+#define KEYRING_SEARCH_RECURSE 0x0040 /* Search child keyrings also */
int (*iterator)(const void *object, void *iterator_data);
@@ -131,24 +139,27 @@ struct keyring_search_context {
extern bool key_default_cmp(const struct key *key,
const struct key_match_data *match_data);
-extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+extern key_ref_t keyring_search_rcu(key_ref_t keyring_ref,
struct keyring_search_context *ctx);
-extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
-extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_cred_keyrings_rcu(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings_rcu(struct keyring_search_context *ctx);
extern struct key *find_keyring_by_name(const char *name, bool uid_keyring);
-extern int install_user_keyrings(void);
+extern int look_up_user_keyrings(struct key **, struct key **);
+extern struct key *get_user_session_keyring_rcu(const struct cred *);
extern int install_thread_keyring_to_cred(struct cred *);
extern int install_process_keyring_to_cred(struct cred *);
extern int install_session_keyring_to_cred(struct cred *, struct key *);
extern struct key *request_key_and_link(struct key_type *type,
const char *description,
+ struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
void *aux,
+ struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags);
@@ -172,7 +183,10 @@ extern void key_gc_keytype(struct key_type *ktype);
extern int key_task_permission(const key_ref_t key_ref,
const struct cred *cred,
- key_perm_t perm);
+ u32 desired_perm);
+extern unsigned int key_acl_to_perm(const struct key_acl *acl);
+extern long key_set_acl(struct key *key, struct key_acl *acl);
+extern void key_put_acl(struct key_acl *acl);
/*
* Check to see whether permission is granted to use a key in the desired way.
@@ -199,7 +213,8 @@ static inline bool key_is_dead(const struct key *key, time64_t limit)
return
key->flags & ((1 << KEY_FLAG_DEAD) |
(1 << KEY_FLAG_INVALIDATED)) ||
- (key->expiry > 0 && key->expiry <= limit);
+ (key->expiry > 0 && key->expiry <= limit) ||
+ key->domain_tag->removed;
}
/*
@@ -211,13 +226,14 @@ extern long keyctl_update_key(key_serial_t, const void __user *, size_t);
extern long keyctl_revoke_key(key_serial_t);
extern long keyctl_keyring_clear(key_serial_t);
extern long keyctl_keyring_link(key_serial_t, key_serial_t);
+extern long keyctl_keyring_move(key_serial_t, key_serial_t, key_serial_t, unsigned int);
extern long keyctl_keyring_unlink(key_serial_t, key_serial_t);
extern long keyctl_describe_key(key_serial_t, char __user *, size_t);
extern long keyctl_keyring_search(key_serial_t, const char __user *,
const char __user *, key_serial_t);
extern long keyctl_read_key(key_serial_t, char __user *, size_t);
extern long keyctl_chown_key(key_serial_t, uid_t, gid_t);
-extern long keyctl_setperm_key(key_serial_t, key_perm_t);
+extern long keyctl_setperm_key(key_serial_t, unsigned int);
extern long keyctl_instantiate_key(key_serial_t, const void __user *,
size_t, key_serial_t);
extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t);
@@ -320,6 +336,13 @@ static inline long keyctl_pkey_e_d_s(int op,
}
#endif
+extern long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen);
+
+extern long keyctl_grant_permission(key_serial_t keyid,
+ enum key_ace_subject_type type,
+ unsigned int subject,
+ unsigned int perm);
+
/*
* Debugging key validation
*/
diff --git a/security/keys/key.c b/security/keys/key.c
index 9a6108aefae9..519211a996e7 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -195,7 +195,7 @@ serial_exists:
* @uid: The owner of the new key.
* @gid: The group ID for the new key's group permissions.
* @cred: The credentials specifying UID namespace.
- * @perm: The permissions mask of the new key.
+ * @acl: The ACL to attach to the new key.
* @flags: Flags specifying quota properties.
* @restrict_link: Optional link restriction for new keyrings.
*
@@ -223,7 +223,7 @@ serial_exists:
*/
struct key *key_alloc(struct key_type *type, const char *desc,
kuid_t uid, kgid_t gid, const struct cred *cred,
- key_perm_t perm, unsigned long flags,
+ struct key_acl *acl, unsigned long flags,
struct key_restriction *restrict_link)
{
struct key_user *user = NULL;
@@ -246,6 +246,9 @@ struct key *key_alloc(struct key_type *type, const char *desc,
desclen = strlen(desc);
quotalen = desclen + 1 + type->def_datalen;
+ if (!acl)
+ acl = &default_key_acl;
+
/* get hold of the key tracking for this user */
user = key_user_lookup(uid);
if (!user)
@@ -281,17 +284,19 @@ struct key *key_alloc(struct key_type *type, const char *desc,
key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
if (!key->index_key.description)
goto no_memory_3;
+ key->index_key.type = type;
+ key_set_index_key(&key->index_key);
refcount_set(&key->usage, 1);
init_rwsem(&key->sem);
lockdep_set_class(&key->sem, &type->lock_class);
- key->index_key.type = type;
key->user = user;
key->quotalen = quotalen;
key->datalen = type->def_datalen;
key->uid = uid;
key->gid = gid;
- key->perm = perm;
+ refcount_inc(&acl->usage);
+ rcu_assign_pointer(key->acl, acl);
key->restrict_link = restrict_link;
key->last_used_at = ktime_get_real_seconds();
@@ -312,6 +317,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
goto security_error;
/* publish the key by giving it a serial number */
+ refcount_inc(&key->domain_tag->usage);
atomic_inc(&user->nkeys);
key_alloc_serial(key);
@@ -455,7 +461,7 @@ static int __key_instantiate_and_link(struct key *key,
/* disable the authorisation key */
if (authkey)
- key_revoke(authkey);
+ key_invalidate(authkey);
if (prep->expiry != TIME64_MAX) {
key->expiry = prep->expiry;
@@ -496,7 +502,7 @@ int key_instantiate_and_link(struct key *key,
struct key *authkey)
{
struct key_preparsed_payload prep;
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
int ret;
memset(&prep, 0, sizeof(prep));
@@ -511,10 +517,14 @@ int key_instantiate_and_link(struct key *key,
}
if (keyring) {
- ret = __key_link_begin(keyring, &key->index_key, &edit);
+ ret = __key_link_lock(keyring, &key->index_key);
if (ret < 0)
goto error;
+ ret = __key_link_begin(keyring, &key->index_key, &edit);
+ if (ret < 0)
+ goto error_link_end;
+
if (keyring->restrict_link && keyring->restrict_link->check) {
struct key_restriction *keyres = keyring->restrict_link;
@@ -566,7 +576,7 @@ int key_reject_and_link(struct key *key,
struct key *keyring,
struct key *authkey)
{
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
int ret, awaken, link_ret = 0;
key_check(key);
@@ -579,7 +589,12 @@ int key_reject_and_link(struct key *key,
if (keyring->restrict_link)
return -EPERM;
- link_ret = __key_link_begin(keyring, &key->index_key, &edit);
+ link_ret = __key_link_lock(keyring, &key->index_key);
+ if (link_ret == 0) {
+ link_ret = __key_link_begin(keyring, &key->index_key, &edit);
+ if (link_ret < 0)
+ __key_link_end(keyring, &key->index_key, edit);
+ }
}
mutex_lock(&key_construction_mutex);
@@ -603,7 +618,7 @@ int key_reject_and_link(struct key *key,
/* disable the authorisation key */
if (authkey)
- key_revoke(authkey);
+ key_invalidate(authkey);
}
mutex_unlock(&key_construction_mutex);
@@ -776,7 +791,7 @@ error:
* @description: The searchable description for the key.
* @payload: The data to use to instantiate or update the key.
* @plen: The length of @payload.
- * @perm: The permissions mask for a new key.
+ * @acl: The ACL to attach if a key is created.
* @flags: The quota flags for a new key.
*
* Search the destination keyring for a key of the same description and if one
@@ -799,14 +814,14 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
const char *description,
const void *payload,
size_t plen,
- key_perm_t perm,
+ struct key_acl *acl,
unsigned long flags)
{
struct keyring_index_key index_key = {
.description = description,
};
struct key_preparsed_payload prep;
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
const struct cred *cred = current_cred();
struct key *keyring, *key = NULL;
key_ref_t key_ref;
@@ -855,13 +870,20 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
goto error_free_prep;
}
index_key.desc_len = strlen(index_key.description);
+ key_set_index_key(&index_key);
- ret = __key_link_begin(keyring, &index_key, &edit);
+ ret = __key_link_lock(keyring, &index_key);
if (ret < 0) {
key_ref = ERR_PTR(ret);
goto error_free_prep;
}
+ ret = __key_link_begin(keyring, &index_key, &edit);
+ if (ret < 0) {
+ key_ref = ERR_PTR(ret);
+ goto error_link_end;
+ }
+
if (restrict_link && restrict_link->check) {
ret = restrict_link->check(keyring, index_key.type,
&prep.payload, restrict_link->key);
@@ -889,22 +911,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
goto found_matching_key;
}
- /* if the client doesn't provide, decide on the permissions we want */
- if (perm == KEY_PERM_UNDEF) {
- perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
- perm |= KEY_USR_VIEW;
-
- if (index_key.type->read)
- perm |= KEY_POS_READ;
-
- if (index_key.type == &key_type_keyring ||
- index_key.type->update)
- perm |= KEY_POS_WRITE;
- }
-
/* allocate a new key */
key = key_alloc(index_key.type, index_key.description,
- cred->fsuid, cred->fsgid, cred, perm, flags, NULL);
+ cred->fsuid, cred->fsgid, cred, acl, flags, NULL);
if (IS_ERR(key)) {
key_ref = ERR_CAST(key);
goto error_link_end;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 5aa605ef8d9d..c2dd66d556d4 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -26,6 +26,21 @@
#define KEY_MAX_DESC_SIZE 4096
+static const unsigned char keyrings_capabilities[2] = {
+ [0] = (KEYCTL_CAPS0_CAPABILITIES |
+ (IS_ENABLED(CONFIG_PERSISTENT_KEYRINGS) ? KEYCTL_CAPS0_PERSISTENT_KEYRINGS : 0) |
+ (IS_ENABLED(CONFIG_KEY_DH_OPERATIONS) ? KEYCTL_CAPS0_DIFFIE_HELLMAN : 0) |
+ (IS_ENABLED(CONFIG_ASYMMETRIC_KEY_TYPE) ? KEYCTL_CAPS0_PUBLIC_KEY : 0) |
+ (IS_ENABLED(CONFIG_BIG_KEYS) ? KEYCTL_CAPS0_BIG_KEY : 0) |
+ KEYCTL_CAPS0_INVALIDATE |
+ KEYCTL_CAPS0_RESTRICT_KEYRING |
+ KEYCTL_CAPS0_MOVE
+ ),
+ [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME |
+ KEYCTL_CAPS1_NS_KEY_TAG |
+ KEYCTL_CAPS1_ACL_ALTERABLE),
+};
+
static int key_get_type_from_user(char *type,
const char __user *_type,
unsigned len)
@@ -116,8 +131,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
/* create or update the requested key and add it to the target
* keyring */
key_ref = key_create_or_update(keyring_ref, type, description,
- payload, plen, KEY_PERM_UNDEF,
- KEY_ALLOC_IN_QUOTA);
+ payload, plen, NULL, KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key_ref)) {
ret = key_ref_to_ptr(key_ref)->serial;
key_ref_put(key_ref);
@@ -206,8 +220,9 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
}
/* do the search */
- key = request_key_and_link(ktype, description, callout_info,
- callout_len, NULL, key_ref_to_ptr(dest_ref),
+ key = request_key_and_link(ktype, description, NULL, callout_info,
+ callout_len, NULL, NULL,
+ key_ref_to_ptr(dest_ref),
KEY_ALLOC_IN_QUOTA);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
@@ -369,16 +384,10 @@ long keyctl_revoke_key(key_serial_t id)
struct key *key;
long ret;
- key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_REVOKE);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
- if (ret != -EACCES)
- goto error;
- key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR);
- if (IS_ERR(key_ref)) {
- ret = PTR_ERR(key_ref);
- goto error;
- }
+ goto error;
}
key = key_ref_to_ptr(key_ref);
@@ -412,7 +421,7 @@ long keyctl_invalidate_key(key_serial_t id)
kenter("%d", id);
- key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_INVAL);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
@@ -457,7 +466,7 @@ long keyctl_keyring_clear(key_serial_t ringid)
struct key *keyring;
long ret;
- keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
+ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_CLEAR);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
@@ -569,6 +578,52 @@ error:
}
/*
+ * Move a link to a key from one keyring to another, displacing any matching
+ * key from the destination keyring.
+ *
+ * The key must grant the caller Link permission and both keyrings must grant
+ * the caller Write permission. There must also be a link in the from keyring
+ * to the key. If both keyrings are the same, nothing is done.
+ *
+ * If successful, 0 will be returned.
+ */
+long keyctl_keyring_move(key_serial_t id, key_serial_t from_ringid,
+ key_serial_t to_ringid, unsigned int flags)
+{
+ key_ref_t key_ref, from_ref, to_ref;
+ long ret;
+
+ if (flags & ~KEYCTL_MOVE_EXCL)
+ return -EINVAL;
+
+ key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+ if (IS_ERR(key_ref))
+ return PTR_ERR(key_ref);
+
+ from_ref = lookup_user_key(from_ringid, 0, KEY_NEED_WRITE);
+ if (IS_ERR(from_ref)) {
+ ret = PTR_ERR(from_ref);
+ goto error2;
+ }
+
+ to_ref = lookup_user_key(to_ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE);
+ if (IS_ERR(to_ref)) {
+ ret = PTR_ERR(to_ref);
+ goto error3;
+ }
+
+ ret = key_move(key_ref_to_ptr(key_ref), key_ref_to_ptr(from_ref),
+ key_ref_to_ptr(to_ref), flags);
+
+ key_ref_put(to_ref);
+error3:
+ key_ref_put(from_ref);
+error2:
+ key_ref_put(key_ref);
+ return ret;
+}
+
+/*
* Return a description of a key to userspace.
*
* The key must grant the caller View permission for this to work.
@@ -586,6 +641,7 @@ long keyctl_describe_key(key_serial_t keyid,
size_t buflen)
{
struct key *key, *instkey;
+ unsigned int perm;
key_ref_t key_ref;
char *infobuf;
long ret;
@@ -615,6 +671,10 @@ okay:
key = key_ref_to_ptr(key_ref);
desclen = strlen(key->description);
+ rcu_read_lock();
+ perm = key_acl_to_perm(rcu_dereference(key->acl));
+ rcu_read_unlock();
+
/* calculate how much information we're going to return */
ret = -ENOMEM;
infobuf = kasprintf(GFP_KERNEL,
@@ -622,7 +682,7 @@ okay:
key->type->name,
from_kuid_munged(current_user_ns(), key->uid),
from_kgid_munged(current_user_ns(), key->gid),
- key->perm);
+ perm);
if (!infobuf)
goto error2;
infolen = strlen(infobuf);
@@ -700,7 +760,7 @@ long keyctl_keyring_search(key_serial_t ringid,
}
/* do the search */
- key_ref = keyring_search(keyring_ref, ktype, description);
+ key_ref = keyring_search(keyring_ref, ktype, description, true);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
@@ -839,7 +899,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
goto error;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETATTR);
+ KEY_NEED_SETSEC);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
@@ -934,18 +994,25 @@ quota_overrun:
* the key need not be fully instantiated yet. If the caller does not have
* sysadmin capability, it may only change the permission on keys that it owns.
*/
-long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
+long keyctl_setperm_key(key_serial_t id, unsigned int perm)
{
+ struct key_acl *acl;
struct key *key;
key_ref_t key_ref;
long ret;
+ int nr, i, j;
- ret = -EINVAL;
if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
- goto error;
+ return -EINVAL;
+
+ nr = 0;
+ if (perm & KEY_POS_ALL) nr++;
+ if (perm & KEY_USR_ALL) nr++;
+ if (perm & KEY_GRP_ALL) nr++;
+ if (perm & KEY_OTH_ALL) nr++;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETATTR);
+ KEY_NEED_SETSEC);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
@@ -953,17 +1020,45 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
key = key_ref_to_ptr(key_ref);
- /* make the changes with the locks held to prevent chown/chmod races */
- ret = -EACCES;
- down_write(&key->sem);
+ ret = -EOPNOTSUPP;
+ if (test_bit(KEY_FLAG_HAS_ACL, &key->flags))
+ goto error_key;
- /* if we're not the sysadmin, we can only change a key that we own */
- if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
- key->perm = perm;
- ret = 0;
+ ret = -ENOMEM;
+ acl = kzalloc(struct_size(acl, aces, nr), GFP_KERNEL);
+ if (!acl)
+ goto error_key;
+
+ refcount_set(&acl->usage, 1);
+ acl->nr_ace = nr;
+ j = 0;
+ for (i = 0; i < 4; i++) {
+ struct key_ace *ace = &acl->aces[j];
+ unsigned int subset = (perm >> (i * 8)) & KEY_OTH_ALL;
+
+ if (!subset)
+ continue;
+ ace->type = KEY_ACE_SUBJ_STANDARD;
+ ace->subject_id = KEY_ACE_EVERYONE + i;
+ ace->perm = subset;
+ if (subset & (KEY_OTH_WRITE | KEY_OTH_SETATTR))
+ ace->perm |= KEY_ACE_REVOKE;
+ if (subset & KEY_OTH_SEARCH)
+ ace->perm |= KEY_ACE_INVAL;
+ if (key->type == &key_type_keyring) {
+ if (subset & KEY_OTH_SEARCH)
+ ace->perm |= KEY_ACE_JOIN;
+ if (subset & KEY_OTH_WRITE)
+ ace->perm |= KEY_ACE_CLEAR;
+ }
+ j++;
}
+ /* make the changes with the locks held to prevent chown/chmod races */
+ down_write(&key->sem);
+ ret = key_set_acl(key, acl);
up_write(&key->sem);
+error_key:
key_put(key);
error:
return ret;
@@ -1328,7 +1423,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
long ret;
key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
- KEY_NEED_SETATTR);
+ KEY_NEED_SETSEC);
if (IS_ERR(key_ref)) {
/* setting the timeout on a key under construction is permitted
* if we have the authorisation token handy */
@@ -1479,7 +1574,7 @@ long keyctl_get_security(key_serial_t keyid,
* Attempt to install the calling process's session keyring on the process's
* parent process.
*
- * The keyring must exist and must grant the caller LINK permission, and the
+ * The keyring must exist and must grant the caller JOIN permission, and the
* parent process must be single-threaded and must have the same effective
* ownership as this process and mustn't be SUID/SGID.
*
@@ -1496,7 +1591,7 @@ long keyctl_session_to_parent(void)
struct cred *cred;
int ret;
- keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK);
+ keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_JOIN);
if (IS_ERR(keyring_r))
return PTR_ERR(keyring_r);
@@ -1520,7 +1615,8 @@ long keyctl_session_to_parent(void)
ret = -EPERM;
oldwork = NULL;
- parent = me->real_parent;
+ parent = rcu_dereference_protected(me->real_parent,
+ lockdep_is_held(&tasklist_lock));
/* the parent mustn't be init and mustn't be a kernel thread */
if (parent->pid <= 1 || !parent->mm)
@@ -1597,7 +1693,7 @@ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type,
char *restriction = NULL;
long ret;
- key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR);
+ key_ref = lookup_user_key(id, 0, KEY_NEED_SETSEC);
if (IS_ERR(key_ref))
return PTR_ERR(key_ref);
@@ -1628,6 +1724,26 @@ error:
}
/*
+ * Get keyrings subsystem capabilities.
+ */
+long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen)
+{
+ size_t size = buflen;
+
+ if (size > 0) {
+ if (size > sizeof(keyrings_capabilities))
+ size = sizeof(keyrings_capabilities);
+ if (copy_to_user(_buffer, keyrings_capabilities, size) != 0)
+ return -EFAULT;
+ if (size < buflen &&
+ clear_user(_buffer + size, buflen - size) != 0)
+ return -EFAULT;
+ }
+
+ return sizeof(keyrings_capabilities);
+}
+
+/*
* The key control system call
*/
SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -1683,7 +1799,7 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
case KEYCTL_SETPERM:
return keyctl_setperm_key((key_serial_t) arg2,
- (key_perm_t) arg3);
+ (unsigned int)arg3);
case KEYCTL_INSTANTIATE:
return keyctl_instantiate_key((key_serial_t) arg2,
@@ -1767,6 +1883,20 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
(const void __user *)arg4,
(const void __user *)arg5);
+ case KEYCTL_MOVE:
+ return keyctl_keyring_move((key_serial_t)arg2,
+ (key_serial_t)arg3,
+ (key_serial_t)arg4,
+ (unsigned int)arg5);
+ case KEYCTL_GRANT_PERMISSION:
+ return keyctl_grant_permission((key_serial_t)arg2,
+ (enum key_ace_subject_type)arg3,
+ (unsigned int)arg4,
+ (unsigned int)arg5);
+
+ case KEYCTL_CAPABILITIES:
+ return keyctl_capabilities((unsigned char __user *)arg2, (size_t)arg3);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index e311cc5df358..3b5458f23a95 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -12,10 +12,13 @@
#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/err.h>
+#include <linux/user_namespace.h>
+#include <linux/nsproxy.h>
#include <keys/keyring-type.h>
#include <keys/user-type.h>
#include <linux/assoc_array_priv.h>
#include <linux/uaccess.h>
+#include <net/net_namespace.h>
#include "internal.h"
/*
@@ -25,11 +28,6 @@
#define KEYRING_SEARCH_MAX_DEPTH 6
/*
- * We keep all named keyrings in a hash to speed looking them up.
- */
-#define KEYRING_NAME_HASH_SIZE (1 << 5)
-
-/*
* We mark pointers we pass to the associative array with bit 1 set if
* they're keyrings and clear otherwise.
*/
@@ -51,17 +49,21 @@ static inline void *keyring_key_to_ptr(struct key *key)
return key;
}
-static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE];
static DEFINE_RWLOCK(keyring_name_lock);
-static inline unsigned keyring_hash(const char *desc)
+/*
+ * Clean up the bits of user_namespace that belong to us.
+ */
+void key_free_user_ns(struct user_namespace *ns)
{
- unsigned bucket = 0;
-
- for (; *desc; desc++)
- bucket += (unsigned char)*desc;
-
- return bucket & (KEYRING_NAME_HASH_SIZE - 1);
+ write_lock(&keyring_name_lock);
+ list_del_init(&ns->keyring_name_list);
+ write_unlock(&keyring_name_lock);
+
+ key_put(ns->user_keyring_register);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+ key_put(ns->persistent_keyring_register);
+#endif
}
/*
@@ -96,27 +98,21 @@ EXPORT_SYMBOL(key_type_keyring);
* Semaphore to serialise link/link calls to prevent two link calls in parallel
* introducing a cycle.
*/
-static DECLARE_RWSEM(keyring_serialise_link_sem);
+static DEFINE_MUTEX(keyring_serialise_link_lock);
/*
* Publish the name of a keyring so that it can be found by name (if it has
- * one).
+ * one and it doesn't begin with a dot).
*/
static void keyring_publish_name(struct key *keyring)
{
- int bucket;
-
- if (keyring->description) {
- bucket = keyring_hash(keyring->description);
+ struct user_namespace *ns = current_user_ns();
+ if (keyring->description &&
+ keyring->description[0] &&
+ keyring->description[0] != '.') {
write_lock(&keyring_name_lock);
-
- if (!keyring_name_hash[bucket].next)
- INIT_LIST_HEAD(&keyring_name_hash[bucket]);
-
- list_add_tail(&keyring->name_link,
- &keyring_name_hash[bucket]);
-
+ list_add_tail(&keyring->name_link, &ns->keyring_name_list);
write_unlock(&keyring_name_lock);
}
}
@@ -164,7 +160,7 @@ static u64 mult_64x32_and_fold(u64 x, u32 y)
/*
* Hash a key type and description.
*/
-static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
+static void hash_key_type_and_desc(struct keyring_index_key *index_key)
{
const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
const unsigned long fan_mask = ASSOC_ARRAY_FAN_MASK;
@@ -175,9 +171,12 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde
int n, desc_len = index_key->desc_len;
type = (unsigned long)index_key->type;
-
acc = mult_64x32_and_fold(type, desc_len + 13);
acc = mult_64x32_and_fold(acc, 9207);
+ piece = (unsigned long)index_key->domain_tag;
+ acc = mult_64x32_and_fold(acc, piece);
+ acc = mult_64x32_and_fold(acc, 9207);
+
for (;;) {
n = desc_len;
if (n <= 0)
@@ -202,24 +201,67 @@ static unsigned long hash_key_type_and_desc(const struct keyring_index_key *inde
* zero for keyrings and non-zero otherwise.
*/
if (index_key->type != &key_type_keyring && (hash & fan_mask) == 0)
- return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
- if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0)
- return (hash + (hash << level_shift)) & ~fan_mask;
- return hash;
+ hash |= (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+ else if (index_key->type == &key_type_keyring && (hash & fan_mask) != 0)
+ hash = (hash + (hash << level_shift)) & ~fan_mask;
+ index_key->hash = hash;
}
/*
- * Build the next index key chunk.
- *
- * On 32-bit systems the index key is laid out as:
- *
- * 0 4 5 9...
- * hash desclen typeptr desc[]
+ * Finalise an index key to include a part of the description actually in the
+ * index key, to set the domain tag and to calculate the hash.
+ */
+void key_set_index_key(struct keyring_index_key *index_key)
+{
+ static struct key_tag default_domain_tag = { .usage = REFCOUNT_INIT(1), };
+ size_t n = min_t(size_t, index_key->desc_len, sizeof(index_key->desc));
+
+ memcpy(index_key->desc, index_key->description, n);
+
+ if (!index_key->domain_tag) {
+ if (index_key->type->flags & KEY_TYPE_NET_DOMAIN)
+ index_key->domain_tag = current->nsproxy->net_ns->key_domain;
+ else
+ index_key->domain_tag = &default_domain_tag;
+ }
+
+ hash_key_type_and_desc(index_key);
+}
+
+/**
+ * key_put_tag - Release a ref on a tag.
+ * @tag: The tag to release.
*
- * On 64-bit systems:
+ * This releases a reference the given tag and returns true if that ref was the
+ * last one.
+ */
+bool key_put_tag(struct key_tag *tag)
+{
+ if (refcount_dec_and_test(&tag->usage)) {
+ kfree_rcu(tag, rcu);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * key_remove_domain - Kill off a key domain and gc its keys
+ * @domain_tag: The domain tag to release.
*
- * 0 8 9 17...
- * hash desclen typeptr desc[]
+ * This marks a domain tag as being dead and releases a ref on it. If that
+ * wasn't the last reference, the garbage collector is poked to try and delete
+ * all keys that were in the domain.
+ */
+void key_remove_domain(struct key_tag *domain_tag)
+{
+ domain_tag->removed = true;
+ if (!key_put_tag(domain_tag))
+ key_schedule_gc_links();
+}
+
+/*
+ * Build the next index key chunk.
*
* We return it one word-sized chunk at a time.
*/
@@ -227,41 +269,33 @@ static unsigned long keyring_get_key_chunk(const void *data, int level)
{
const struct keyring_index_key *index_key = data;
unsigned long chunk = 0;
- long offset = 0;
+ const u8 *d;
int desc_len = index_key->desc_len, n = sizeof(chunk);
level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
switch (level) {
case 0:
- return hash_key_type_and_desc(index_key);
+ return index_key->hash;
case 1:
- return ((unsigned long)index_key->type << 8) | desc_len;
+ return index_key->x;
case 2:
- if (desc_len == 0)
- return (u8)((unsigned long)index_key->type >>
- (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
- n--;
- offset = 1;
- /* fall through */
+ return (unsigned long)index_key->type;
+ case 3:
+ return (unsigned long)index_key->domain_tag;
default:
- offset += sizeof(chunk) - 1;
- offset += (level - 3) * sizeof(chunk);
- if (offset >= desc_len)
+ level -= 4;
+ if (desc_len <= sizeof(index_key->desc))
return 0;
- desc_len -= offset;
+
+ d = index_key->description + sizeof(index_key->desc);
+ d += level * sizeof(long);
+ desc_len -= sizeof(index_key->desc);
if (desc_len > n)
desc_len = n;
- offset += desc_len;
do {
chunk <<= 8;
- chunk |= ((u8*)index_key->description)[--offset];
+ chunk |= *d++;
} while (--desc_len > 0);
-
- if (level == 2) {
- chunk <<= 8;
- chunk |= (u8)((unsigned long)index_key->type >>
- (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
- }
return chunk;
}
}
@@ -278,6 +312,7 @@ static bool keyring_compare_object(const void *object, const void *data)
const struct key *key = keyring_ptr_to_key(object);
return key->index_key.type == index_key->type &&
+ key->index_key.domain_tag == index_key->domain_tag &&
key->index_key.desc_len == index_key->desc_len &&
memcmp(key->index_key.description, index_key->description,
index_key->desc_len) == 0;
@@ -296,43 +331,38 @@ static int keyring_diff_objects(const void *object, const void *data)
int level, i;
level = 0;
- seg_a = hash_key_type_and_desc(a);
- seg_b = hash_key_type_and_desc(b);
+ seg_a = a->hash;
+ seg_b = b->hash;
if ((seg_a ^ seg_b) != 0)
goto differ;
+ level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
/* The number of bits contributed by the hash is controlled by a
* constant in the assoc_array headers. Everything else thereafter we
* can deal with as being machine word-size dependent.
*/
- level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
- seg_a = a->desc_len;
- seg_b = b->desc_len;
+ seg_a = a->x;
+ seg_b = b->x;
if ((seg_a ^ seg_b) != 0)
goto differ;
+ level += sizeof(unsigned long);
/* The next bit may not work on big endian */
- level++;
seg_a = (unsigned long)a->type;
seg_b = (unsigned long)b->type;
if ((seg_a ^ seg_b) != 0)
goto differ;
+ level += sizeof(unsigned long);
+ seg_a = (unsigned long)a->domain_tag;
+ seg_b = (unsigned long)b->domain_tag;
+ if ((seg_a ^ seg_b) != 0)
+ goto differ;
level += sizeof(unsigned long);
- if (a->desc_len == 0)
- goto same;
- i = 0;
- if (((unsigned long)a->description | (unsigned long)b->description) &
- (sizeof(unsigned long) - 1)) {
- do {
- seg_a = *(unsigned long *)(a->description + i);
- seg_b = *(unsigned long *)(b->description + i);
- if ((seg_a ^ seg_b) != 0)
- goto differ_plus_i;
- i += sizeof(unsigned long);
- } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
- }
+ i = sizeof(a->desc);
+ if (a->desc_len <= i)
+ goto same;
for (; i < a->desc_len; i++) {
seg_a = *(unsigned char *)(a->description + i);
@@ -485,11 +515,19 @@ static long keyring_read(const struct key *keyring,
return ret;
}
-/*
- * Allocate a keyring and link into the destination keyring.
+/**
+ * keyring_alloc - Allocate a keyring and link into the destination
+ * @description: The key description to allow the key to be searched out.
+ * @uid: The owner of the new key.
+ * @gid: The group ID for the new key's group permissions.
+ * @cred: The credentials specifying UID namespace.
+ * @acl: The ACL to attach to the new key.
+ * @flags: Flags specifying quota properties.
+ * @restrict_link: Optional link restriction for new keyrings.
+ * @dest: Destination keyring.
*/
struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
- const struct cred *cred, key_perm_t perm,
+ const struct cred *cred, struct key_acl *acl,
unsigned long flags,
struct key_restriction *restrict_link,
struct key *dest)
@@ -498,7 +536,7 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
int ret;
keyring = key_alloc(&key_type_keyring, description,
- uid, gid, cred, perm, flags, restrict_link);
+ uid, gid, cred, acl, flags, restrict_link);
if (!IS_ERR(keyring)) {
ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
if (ret < 0) {
@@ -516,7 +554,7 @@ EXPORT_SYMBOL(keyring_alloc);
* @keyring: The keyring being added to.
* @type: The type of key being added.
* @payload: The payload of the key intended to be added.
- * @data: Additional data for evaluating restriction.
+ * @restriction_key: Keys providing additional data for evaluating restriction.
*
* Reject the addition of any links to a keyring. It can be overridden by
* passing KEY_ALLOC_BYPASS_RESTRICTION to key_instantiate_and_link() when
@@ -658,6 +696,9 @@ static bool search_nested_keyrings(struct key *keyring,
BUG_ON((ctx->flags & STATE_CHECKS) == 0 ||
(ctx->flags & STATE_CHECKS) == STATE_CHECKS);
+ if (ctx->index_key.description)
+ key_set_index_key(&ctx->index_key);
+
/* Check to see if this top-level keyring is what we are looking for
* and whether it is valid or not.
*/
@@ -697,6 +738,9 @@ descend_to_keyring:
* Non-keyrings avoid the leftmost branch of the root entirely (root
* slots 1-15).
*/
+ if (!(ctx->flags & KEYRING_SEARCH_RECURSE))
+ goto not_this_keyring;
+
ptr = READ_ONCE(keyring->keys.root);
if (!ptr)
goto not_this_keyring;
@@ -831,7 +875,7 @@ found:
}
/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * keyring_search_rcu - Search a keyring tree for a matching key under RCU
* @keyring_ref: A pointer to the keyring with possession indicator.
* @ctx: The keyring search context.
*
@@ -843,7 +887,9 @@ found:
* addition, the LSM gets to forbid keyring searches and key matches.
*
* The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
+ * limit (KEYRING_SEARCH_MAX_DEPTH). The caller must hold the RCU read lock to
+ * prevent keyrings from being destroyed or rearranged whilst they are being
+ * searched.
*
* Keys are matched to the type provided and are then filtered by the match
* function, which is given the description to use in any way it sees fit. The
@@ -862,7 +908,7 @@ found:
* In the case of a successful return, the possession attribute from
* @keyring_ref is propagated to the returned key reference.
*/
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+key_ref_t keyring_search_rcu(key_ref_t keyring_ref,
struct keyring_search_context *ctx)
{
struct key *keyring;
@@ -884,11 +930,9 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
return ERR_PTR(err);
}
- rcu_read_lock();
ctx->now = ktime_get_real_seconds();
if (search_nested_keyrings(keyring, ctx))
__key_get(key_ref_to_ptr(ctx->result));
- rcu_read_unlock();
return ctx->result;
}
@@ -897,13 +941,15 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
* @keyring: The root of the keyring tree to be searched.
* @type: The type of keyring we want to find.
* @description: The name of the keyring we want to find.
+ * @recurse: True to search the children of @keyring also
*
- * As keyring_search_aux() above, but using the current task's credentials and
+ * As keyring_search_rcu() above, but using the current task's credentials and
* type's default matching function and preferred search method.
*/
key_ref_t keyring_search(key_ref_t keyring,
struct key_type *type,
- const char *description)
+ const char *description,
+ bool recurse)
{
struct keyring_search_context ctx = {
.index_key.type = type,
@@ -918,13 +964,17 @@ key_ref_t keyring_search(key_ref_t keyring,
key_ref_t key;
int ret;
+ if (recurse)
+ ctx.flags |= KEYRING_SEARCH_RECURSE;
if (type->match_preparse) {
ret = type->match_preparse(&ctx.match_data);
if (ret < 0)
return ERR_PTR(ret);
}
- key = keyring_search_aux(keyring, &ctx);
+ rcu_read_lock();
+ key = keyring_search_rcu(keyring, &ctx);
+ rcu_read_unlock();
if (type->match_free)
type->match_free(&ctx.match_data);
@@ -972,9 +1022,13 @@ static bool keyring_detect_restriction_cycle(const struct key *dest_keyring,
/**
* keyring_restrict - Look up and apply a restriction to a keyring
- *
- * @keyring: The keyring to be restricted
+ * @keyring_ref: The keyring to be restricted
+ * @type: The key type that will provide the restriction checker.
* @restriction: The restriction options to apply to the keyring
+ *
+ * Look up a keyring and apply a restriction to it. The restriction is managed
+ * by the specific key type, but can be configured by the options specified in
+ * the restriction string.
*/
int keyring_restrict(key_ref_t keyring_ref, const char *type,
const char *restriction)
@@ -1086,60 +1140,55 @@ found:
/*
* Find a keyring with the specified name.
*
- * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
- * user in the current user namespace are considered. If @uid_keyring is %true,
- * the keyring additionally must have been allocated as a user or user session
- * keyring; otherwise, it must grant Search permission directly to the caller.
+ * Only keyrings that have nonzero refcount, are not revoked, and are owned by
+ * a user in the current user namespace are considered. If @uid_keyring is
+ * %true, the keyring additionally must have been allocated as a user or user
+ * session keyring; otherwise, it must grant JOIN permission directly to the
+ * caller (ie. not through possession).
*
* Returns a pointer to the keyring with the keyring's refcount having being
* incremented on success. -ENOKEY is returned if a key could not be found.
*/
struct key *find_keyring_by_name(const char *name, bool uid_keyring)
{
+ struct user_namespace *ns = current_user_ns();
struct key *keyring;
- int bucket;
if (!name)
return ERR_PTR(-EINVAL);
- bucket = keyring_hash(name);
-
read_lock(&keyring_name_lock);
- if (keyring_name_hash[bucket].next) {
- /* search this hash bucket for a keyring with a matching name
- * that's readable and that hasn't been revoked */
- list_for_each_entry(keyring,
- &keyring_name_hash[bucket],
- name_link
- ) {
- if (!kuid_has_mapping(current_user_ns(), keyring->user->uid))
- continue;
-
- if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
- continue;
+ /* Search this hash bucket for a keyring with a matching name that
+ * grants Search permission and that hasn't been revoked
+ */
+ list_for_each_entry(keyring, &ns->keyring_name_list, name_link) {
+ if (!kuid_has_mapping(ns, keyring->user->uid))
+ continue;
- if (strcmp(keyring->description, name) != 0)
- continue;
+ if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
+ continue;
- if (uid_keyring) {
- if (!test_bit(KEY_FLAG_UID_KEYRING,
- &keyring->flags))
- continue;
- } else {
- if (key_permission(make_key_ref(keyring, 0),
- KEY_NEED_SEARCH) < 0)
- continue;
- }
+ if (strcmp(keyring->description, name) != 0)
+ continue;
- /* we've got a match but we might end up racing with
- * key_cleanup() if the keyring is currently 'dead'
- * (ie. it has a zero usage count) */
- if (!refcount_inc_not_zero(&keyring->usage))
+ if (uid_keyring) {
+ if (!test_bit(KEY_FLAG_UID_KEYRING,
+ &keyring->flags))
+ continue;
+ } else {
+ if (key_permission(make_key_ref(keyring, 0),
+ KEY_NEED_JOIN) < 0)
continue;
- keyring->last_used_at = ktime_get_real_seconds();
- goto out;
}
+
+ /* we've got a match but we might end up racing with
+ * key_cleanup() if the keyring is currently 'dead'
+ * (ie. it has a zero usage count) */
+ if (!refcount_inc_not_zero(&keyring->usage))
+ continue;
+ keyring->last_used_at = ktime_get_real_seconds();
+ goto out;
}
keyring = ERR_PTR(-ENOKEY);
@@ -1182,7 +1231,8 @@ static int keyring_detect_cycle(struct key *A, struct key *B)
.flags = (KEYRING_SEARCH_NO_STATE_CHECK |
KEYRING_SEARCH_NO_UPDATE_TIME |
KEYRING_SEARCH_NO_CHECK_PERM |
- KEYRING_SEARCH_DETECT_TOO_DEEP),
+ KEYRING_SEARCH_DETECT_TOO_DEEP |
+ KEYRING_SEARCH_RECURSE),
};
rcu_read_lock();
@@ -1192,13 +1242,67 @@ static int keyring_detect_cycle(struct key *A, struct key *B)
}
/*
+ * Lock keyring for link.
+ */
+int __key_link_lock(struct key *keyring,
+ const struct keyring_index_key *index_key)
+ __acquires(&keyring->sem)
+ __acquires(&keyring_serialise_link_lock)
+{
+ if (keyring->type != &key_type_keyring)
+ return -ENOTDIR;
+
+ down_write(&keyring->sem);
+
+ /* Serialise link/link calls to prevent parallel calls causing a cycle
+ * when linking two keyring in opposite orders.
+ */
+ if (index_key->type == &key_type_keyring)
+ mutex_lock(&keyring_serialise_link_lock);
+
+ return 0;
+}
+
+/*
+ * Lock keyrings for move (link/unlink combination).
+ */
+int __key_move_lock(struct key *l_keyring, struct key *u_keyring,
+ const struct keyring_index_key *index_key)
+ __acquires(&l_keyring->sem)
+ __acquires(&u_keyring->sem)
+ __acquires(&keyring_serialise_link_lock)
+{
+ if (l_keyring->type != &key_type_keyring ||
+ u_keyring->type != &key_type_keyring)
+ return -ENOTDIR;
+
+ /* We have to be very careful here to take the keyring locks in the
+ * right order, lest we open ourselves to deadlocking against another
+ * move operation.
+ */
+ if (l_keyring < u_keyring) {
+ down_write(&l_keyring->sem);
+ down_write_nested(&u_keyring->sem, 1);
+ } else {
+ down_write(&u_keyring->sem);
+ down_write_nested(&l_keyring->sem, 1);
+ }
+
+ /* Serialise link/link calls to prevent parallel calls causing a cycle
+ * when linking two keyring in opposite orders.
+ */
+ if (index_key->type == &key_type_keyring)
+ mutex_lock(&keyring_serialise_link_lock);
+
+ return 0;
+}
+
+/*
* Preallocate memory so that a key can be linked into to a keyring.
*/
int __key_link_begin(struct key *keyring,
const struct keyring_index_key *index_key,
struct assoc_array_edit **_edit)
- __acquires(&keyring->sem)
- __acquires(&keyring_serialise_link_sem)
{
struct assoc_array_edit *edit;
int ret;
@@ -1207,20 +1311,13 @@ int __key_link_begin(struct key *keyring,
keyring->serial, index_key->type->name, index_key->description);
BUG_ON(index_key->desc_len == 0);
+ BUG_ON(*_edit != NULL);
- if (keyring->type != &key_type_keyring)
- return -ENOTDIR;
-
- down_write(&keyring->sem);
+ *_edit = NULL;
ret = -EKEYREVOKED;
if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
- goto error_krsem;
-
- /* serialise link/link calls to prevent parallel calls causing a cycle
- * when linking two keyring in opposite orders */
- if (index_key->type == &key_type_keyring)
- down_write(&keyring_serialise_link_sem);
+ goto error;
/* Create an edit script that will insert/replace the key in the
* keyring tree.
@@ -1231,7 +1328,7 @@ int __key_link_begin(struct key *keyring,
NULL);
if (IS_ERR(edit)) {
ret = PTR_ERR(edit);
- goto error_sem;
+ goto error;
}
/* If we're not replacing a link in-place then we're going to need some
@@ -1250,11 +1347,7 @@ int __key_link_begin(struct key *keyring,
error_cancel:
assoc_array_cancel_edit(edit);
-error_sem:
- if (index_key->type == &key_type_keyring)
- up_write(&keyring_serialise_link_sem);
-error_krsem:
- up_write(&keyring->sem);
+error:
kleave(" = %d", ret);
return ret;
}
@@ -1299,14 +1392,11 @@ void __key_link_end(struct key *keyring,
const struct keyring_index_key *index_key,
struct assoc_array_edit *edit)
__releases(&keyring->sem)
- __releases(&keyring_serialise_link_sem)
+ __releases(&keyring_serialise_link_lock)
{
BUG_ON(index_key->type == NULL);
kenter("%d,%s,", keyring->serial, index_key->type->name);
- if (index_key->type == &key_type_keyring)
- up_write(&keyring_serialise_link_sem);
-
if (edit) {
if (!edit->dead_leaf) {
key_payload_reserve(keyring,
@@ -1315,6 +1405,9 @@ void __key_link_end(struct key *keyring,
assoc_array_cancel_edit(edit);
}
up_write(&keyring->sem);
+
+ if (index_key->type == &key_type_keyring)
+ mutex_unlock(&keyring_serialise_link_lock);
}
/*
@@ -1350,7 +1443,7 @@ static int __key_link_check_restriction(struct key *keyring, struct key *key)
*/
int key_link(struct key *keyring, struct key *key)
{
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
int ret;
kenter("{%d,%d}", keyring->serial, refcount_read(&keyring->usage));
@@ -1358,22 +1451,88 @@ int key_link(struct key *keyring, struct key *key)
key_check(keyring);
key_check(key);
+ ret = __key_link_lock(keyring, &key->index_key);
+ if (ret < 0)
+ goto error;
+
ret = __key_link_begin(keyring, &key->index_key, &edit);
- if (ret == 0) {
- kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage));
- ret = __key_link_check_restriction(keyring, key);
- if (ret == 0)
- ret = __key_link_check_live_key(keyring, key);
- if (ret == 0)
- __key_link(key, &edit);
- __key_link_end(keyring, &key->index_key, edit);
- }
+ if (ret < 0)
+ goto error_end;
+ kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage));
+ ret = __key_link_check_restriction(keyring, key);
+ if (ret == 0)
+ ret = __key_link_check_live_key(keyring, key);
+ if (ret == 0)
+ __key_link(key, &edit);
+
+error_end:
+ __key_link_end(keyring, &key->index_key, edit);
+error:
kleave(" = %d {%d,%d}", ret, keyring->serial, refcount_read(&keyring->usage));
return ret;
}
EXPORT_SYMBOL(key_link);
+/*
+ * Lock a keyring for unlink.
+ */
+static int __key_unlink_lock(struct key *keyring)
+ __acquires(&keyring->sem)
+{
+ if (keyring->type != &key_type_keyring)
+ return -ENOTDIR;
+
+ down_write(&keyring->sem);
+ return 0;
+}
+
+/*
+ * Begin the process of unlinking a key from a keyring.
+ */
+static int __key_unlink_begin(struct key *keyring, struct key *key,
+ struct assoc_array_edit **_edit)
+{
+ struct assoc_array_edit *edit;
+
+ BUG_ON(*_edit != NULL);
+
+ edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+ &key->index_key);
+ if (IS_ERR(edit))
+ return PTR_ERR(edit);
+
+ if (!edit)
+ return -ENOENT;
+
+ *_edit = edit;
+ return 0;
+}
+
+/*
+ * Apply an unlink change.
+ */
+static void __key_unlink(struct key *keyring, struct key *key,
+ struct assoc_array_edit **_edit)
+{
+ assoc_array_apply_edit(*_edit);
+ *_edit = NULL;
+ key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
+}
+
+/*
+ * Finish unlinking a key from to a keyring.
+ */
+static void __key_unlink_end(struct key *keyring,
+ struct key *key,
+ struct assoc_array_edit *edit)
+ __releases(&keyring->sem)
+{
+ if (edit)
+ assoc_array_cancel_edit(edit);
+ up_write(&keyring->sem);
+}
+
/**
* key_unlink - Unlink the first link to a key from a keyring.
* @keyring: The keyring to remove the link from.
@@ -1393,36 +1552,97 @@ EXPORT_SYMBOL(key_link);
*/
int key_unlink(struct key *keyring, struct key *key)
{
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
int ret;
key_check(keyring);
key_check(key);
- if (keyring->type != &key_type_keyring)
- return -ENOTDIR;
+ ret = __key_unlink_lock(keyring);
+ if (ret < 0)
+ return ret;
- down_write(&keyring->sem);
+ ret = __key_unlink_begin(keyring, key, &edit);
+ if (ret == 0)
+ __key_unlink(keyring, key, &edit);
+ __key_unlink_end(keyring, key, edit);
+ return ret;
+}
+EXPORT_SYMBOL(key_unlink);
- edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
- &key->index_key);
- if (IS_ERR(edit)) {
- ret = PTR_ERR(edit);
+/**
+ * key_move - Move a key from one keyring to another
+ * @key: The key to move
+ * @from_keyring: The keyring to remove the link from.
+ * @to_keyring: The keyring to make the link in.
+ * @flags: Qualifying flags, such as KEYCTL_MOVE_EXCL.
+ *
+ * Make a link in @to_keyring to a key, such that the keyring holds a reference
+ * on that key and the key can potentially be found by searching that keyring
+ * whilst simultaneously removing a link to the key from @from_keyring.
+ *
+ * This function will write-lock both keyring's semaphores and will consume
+ * some of the user's key data quota to hold the link on @to_keyring.
+ *
+ * Returns 0 if successful, -ENOTDIR if either keyring isn't a keyring,
+ * -EKEYREVOKED if either keyring has been revoked, -ENFILE if the second
+ * keyring is full, -EDQUOT if there is insufficient key data quota remaining
+ * to add another link or -ENOMEM if there's insufficient memory. If
+ * KEYCTL_MOVE_EXCL is set, then -EEXIST will be returned if there's already a
+ * matching key in @to_keyring.
+ *
+ * It is assumed that the caller has checked that it is permitted for a link to
+ * be made (the keyring should have Write permission and the key Link
+ * permission).
+ */
+int key_move(struct key *key,
+ struct key *from_keyring,
+ struct key *to_keyring,
+ unsigned int flags)
+{
+ struct assoc_array_edit *from_edit = NULL, *to_edit = NULL;
+ int ret;
+
+ kenter("%d,%d,%d", key->serial, from_keyring->serial, to_keyring->serial);
+
+ if (from_keyring == to_keyring)
+ return 0;
+
+ key_check(key);
+ key_check(from_keyring);
+ key_check(to_keyring);
+
+ ret = __key_move_lock(from_keyring, to_keyring, &key->index_key);
+ if (ret < 0)
+ goto out;
+ ret = __key_unlink_begin(from_keyring, key, &from_edit);
+ if (ret < 0)
goto error;
- }
- ret = -ENOENT;
- if (edit == NULL)
+ ret = __key_link_begin(to_keyring, &key->index_key, &to_edit);
+ if (ret < 0)
goto error;
- assoc_array_apply_edit(edit);
- key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES);
- ret = 0;
+ ret = -EEXIST;
+ if (to_edit->dead_leaf && (flags & KEYCTL_MOVE_EXCL))
+ goto error;
+ ret = __key_link_check_restriction(to_keyring, key);
+ if (ret < 0)
+ goto error;
+ ret = __key_link_check_live_key(to_keyring, key);
+ if (ret < 0)
+ goto error;
+
+ __key_unlink(from_keyring, key, &from_edit);
+ __key_link(key, &to_edit);
error:
- up_write(&keyring->sem);
+ __key_link_end(to_keyring, &key->index_key, to_edit);
+ __key_unlink_end(from_keyring, key, from_edit);
+out:
+ kleave(" = %d", ret);
return ret;
}
-EXPORT_SYMBOL(key_unlink);
+EXPORT_SYMBOL(key_move);
/**
* keyring_clear - Clear a keyring
diff --git a/security/keys/permission.c b/security/keys/permission.c
index 085f907b64ac..fd8a5dc6910a 100644
--- a/security/keys/permission.c
+++ b/security/keys/permission.c
@@ -7,13 +7,67 @@
#include <linux/export.h>
#include <linux/security.h>
+#include <linux/user_namespace.h>
+#include <linux/uaccess.h>
#include "internal.h"
+struct key_acl default_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+EXPORT_SYMBOL(default_key_acl);
+
+struct key_acl joinable_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_LINK | KEY_ACE_JOIN),
+ }
+};
+EXPORT_SYMBOL(joinable_keyring_acl);
+
+struct key_acl internal_key_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH),
+ }
+};
+EXPORT_SYMBOL(internal_key_acl);
+
+struct key_acl internal_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH),
+ }
+};
+EXPORT_SYMBOL(internal_keyring_acl);
+
+struct key_acl internal_writable_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE | KEY_ACE_SEARCH),
+ }
+};
+EXPORT_SYMBOL(internal_writable_keyring_acl);
+
/**
* key_task_permission - Check a key can be used
* @key_ref: The key to check.
* @cred: The credentials to use.
- * @perm: The permissions to check for.
+ * @desired_perm: The permission to check for.
*
* Check to see whether permission is granted to use a key in the desired way,
* but permit the security modules to override.
@@ -24,53 +78,73 @@
* permissions bits or the LSM check.
*/
int key_task_permission(const key_ref_t key_ref, const struct cred *cred,
- unsigned perm)
+ unsigned int desired_perm)
{
- struct key *key;
- key_perm_t kperm;
- int ret;
+ const struct key_acl *acl;
+ const struct key *key;
+ unsigned int allow = 0;
+ int i;
+
+ BUILD_BUG_ON(KEY_NEED_VIEW != KEY_ACE_VIEW ||
+ KEY_NEED_READ != KEY_ACE_READ ||
+ KEY_NEED_WRITE != KEY_ACE_WRITE ||
+ KEY_NEED_SEARCH != KEY_ACE_SEARCH ||
+ KEY_NEED_LINK != KEY_ACE_LINK ||
+ KEY_NEED_SETSEC != KEY_ACE_SET_SECURITY ||
+ KEY_NEED_INVAL != KEY_ACE_INVAL ||
+ KEY_NEED_REVOKE != KEY_ACE_REVOKE ||
+ KEY_NEED_JOIN != KEY_ACE_JOIN ||
+ KEY_NEED_CLEAR != KEY_ACE_CLEAR);
key = key_ref_to_ptr(key_ref);
- /* use the second 8-bits of permissions for keys the caller owns */
- if (uid_eq(key->uid, cred->fsuid)) {
- kperm = key->perm >> 16;
- goto use_these_perms;
- }
+ rcu_read_lock();
- /* use the third 8-bits of permissions for keys the caller has a group
- * membership in common with */
- if (gid_valid(key->gid) && key->perm & KEY_GRP_ALL) {
- if (gid_eq(key->gid, cred->fsgid)) {
- kperm = key->perm >> 8;
- goto use_these_perms;
- }
+ acl = rcu_dereference(key->acl);
+ if (!acl || acl->nr_ace == 0)
+ goto no_access_rcu;
- ret = groups_search(cred->group_info, key->gid);
- if (ret) {
- kperm = key->perm >> 8;
- goto use_these_perms;
+ for (i = 0; i < acl->nr_ace; i++) {
+ const struct key_ace *ace = &acl->aces[i];
+
+ switch (ace->type) {
+ case KEY_ACE_SUBJ_STANDARD:
+ switch (ace->subject_id) {
+ case KEY_ACE_POSSESSOR:
+ if (is_key_possessed(key_ref))
+ allow |= ace->perm;
+ break;
+ case KEY_ACE_OWNER:
+ if (uid_eq(key->uid, cred->fsuid))
+ allow |= ace->perm;
+ break;
+ case KEY_ACE_GROUP:
+ if (gid_valid(key->gid)) {
+ if (gid_eq(key->gid, cred->fsgid))
+ allow |= ace->perm;
+ else if (groups_search(cred->group_info, key->gid))
+ allow |= ace->perm;
+ }
+ break;
+ case KEY_ACE_EVERYONE:
+ allow |= ace->perm;
+ break;
+ }
+ break;
}
}
- /* otherwise use the least-significant 8-bits */
- kperm = key->perm;
-
-use_these_perms:
+ rcu_read_unlock();
- /* use the top 8-bits of permissions for keys the caller possesses
- * - possessor permissions are additive with other permissions
- */
- if (is_key_possessed(key_ref))
- kperm |= key->perm >> 24;
+ if (!(allow & desired_perm))
+ goto no_access;
- kperm = kperm & perm & KEY_NEED_ALL;
+ return security_key_permission(key_ref, cred, desired_perm);
- if (kperm != perm)
- return -EACCES;
-
- /* let LSM be the final arbiter */
- return security_key_permission(key_ref, cred, perm);
+no_access_rcu:
+ rcu_read_unlock();
+no_access:
+ return -EACCES;
}
EXPORT_SYMBOL(key_task_permission);
@@ -104,3 +178,218 @@ int key_validate(const struct key *key)
return 0;
}
EXPORT_SYMBOL(key_validate);
+
+/*
+ * Roughly render an ACL to an old-style permissions mask. We cannot
+ * accurately render what the ACL, particularly if it has ACEs that represent
+ * subjects outside of { poss, user, group, other }.
+ */
+unsigned int key_acl_to_perm(const struct key_acl *acl)
+{
+ unsigned int perm = 0, tperm;
+ int i;
+
+ BUILD_BUG_ON(KEY_OTH_VIEW != KEY_ACE_VIEW ||
+ KEY_OTH_READ != KEY_ACE_READ ||
+ KEY_OTH_WRITE != KEY_ACE_WRITE ||
+ KEY_OTH_SEARCH != KEY_ACE_SEARCH ||
+ KEY_OTH_LINK != KEY_ACE_LINK ||
+ KEY_OTH_SETATTR != KEY_ACE_SET_SECURITY);
+
+ if (!acl || acl->nr_ace == 0)
+ return 0;
+
+ for (i = 0; i < acl->nr_ace; i++) {
+ const struct key_ace *ace = &acl->aces[i];
+
+ switch (ace->type) {
+ case KEY_ACE_SUBJ_STANDARD:
+ tperm = ace->perm & KEY_OTH_ALL;
+
+ /* Invalidation and joining were allowed by SEARCH */
+ if (ace->perm & (KEY_ACE_INVAL | KEY_ACE_JOIN))
+ tperm |= KEY_OTH_SEARCH;
+
+ /* Revocation was allowed by either SETATTR or WRITE */
+ if ((ace->perm & KEY_ACE_REVOKE) && !(tperm & KEY_OTH_SETATTR))
+ tperm |= KEY_OTH_WRITE;
+
+ /* Clearing was allowed by WRITE */
+ if (ace->perm & KEY_ACE_CLEAR)
+ tperm |= KEY_OTH_WRITE;
+
+ switch (ace->subject_id) {
+ case KEY_ACE_POSSESSOR:
+ perm |= tperm << 24;
+ break;
+ case KEY_ACE_OWNER:
+ perm |= tperm << 16;
+ break;
+ case KEY_ACE_GROUP:
+ perm |= tperm << 8;
+ break;
+ case KEY_ACE_EVERYONE:
+ perm |= tperm << 0;
+ break;
+ }
+ }
+ }
+
+ return perm;
+}
+
+/*
+ * Destroy a key's ACL.
+ */
+void key_put_acl(struct key_acl *acl)
+{
+ if (acl && refcount_dec_and_test(&acl->usage))
+ kfree_rcu(acl, rcu);
+}
+
+/*
+ * Try to set the ACL. This either attaches or discards the proposed ACL.
+ */
+long key_set_acl(struct key *key, struct key_acl *acl)
+{
+ int i;
+
+ /* If we're not the sysadmin, we can only change a key that we own. */
+ if (!capable(CAP_SYS_ADMIN) && !uid_eq(key->uid, current_fsuid())) {
+ key_put_acl(acl);
+ return -EACCES;
+ }
+
+ for (i = 0; i < acl->nr_ace; i++) {
+ const struct key_ace *ace = &acl->aces[i];
+ if (ace->type == KEY_ACE_SUBJ_STANDARD &&
+ ace->subject_id == KEY_ACE_POSSESSOR) {
+ if (ace->perm & KEY_ACE_VIEW)
+ acl->possessor_viewable = true;
+ break;
+ }
+ }
+
+ rcu_swap_protected(key->acl, acl, lockdep_is_held(&key->sem));
+ key_put_acl(acl);
+ return 0;
+}
+
+/*
+ * Allocate a new ACL with an extra ACE slot.
+ */
+static struct key_acl *key_alloc_acl(const struct key_acl *old_acl, int nr, int skip)
+{
+ struct key_acl *acl;
+ int nr_ace, i, j = 0;
+
+ nr_ace = old_acl->nr_ace + nr;
+ if (nr_ace > 16)
+ return ERR_PTR(-EINVAL);
+
+ acl = kzalloc(struct_size(acl, aces, nr_ace), GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&acl->usage, 1);
+ acl->nr_ace = nr_ace;
+ for (i = 0; i < old_acl->nr_ace; i++) {
+ if (i == skip)
+ continue;
+ acl->aces[j] = old_acl->aces[i];
+ j++;
+ }
+ return acl;
+}
+
+/*
+ * Generate the revised ACL.
+ */
+static long key_change_acl(struct key *key, struct key_ace *new_ace)
+{
+ struct key_acl *acl, *old;
+ int i;
+
+ old = rcu_dereference_protected(key->acl, lockdep_is_held(&key->sem));
+
+ for (i = 0; i < old->nr_ace; i++)
+ if (old->aces[i].type == new_ace->type &&
+ old->aces[i].subject_id == new_ace->subject_id)
+ goto found_match;
+
+ if (new_ace->perm == 0)
+ return 0; /* No permissions to remove. Add deny record? */
+
+ acl = key_alloc_acl(old, 1, -1);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ acl->aces[i] = *new_ace;
+ goto change;
+
+found_match:
+ if (new_ace->perm == 0)
+ goto delete_ace;
+ if (new_ace->perm == old->aces[i].perm)
+ return 0;
+ acl = key_alloc_acl(old, 0, -1);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ acl->aces[i].perm = new_ace->perm;
+ goto change;
+
+delete_ace:
+ acl = key_alloc_acl(old, -1, i);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ goto change;
+
+change:
+ return key_set_acl(key, acl);
+}
+
+/*
+ * Add, alter or remove (if perm == 0) an ACE in a key's ACL.
+ */
+long keyctl_grant_permission(key_serial_t keyid,
+ enum key_ace_subject_type type,
+ unsigned int subject,
+ unsigned int perm)
+{
+ struct key_ace new_ace;
+ struct key *key;
+ key_ref_t key_ref;
+ long ret;
+
+ new_ace.type = type;
+ new_ace.perm = perm;
+
+ switch (type) {
+ case KEY_ACE_SUBJ_STANDARD:
+ if (subject >= nr__key_ace_standard_subject)
+ return -ENOENT;
+ new_ace.subject_id = subject;
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_SETSEC);
+ if (IS_ERR(key_ref)) {
+ ret = PTR_ERR(key_ref);
+ goto error;
+ }
+
+ key = key_ref_to_ptr(key_ref);
+
+ down_write(&key->sem);
+
+ /* If we're not the sysadmin, we can only change a key that we own */
+ ret = -EACCES;
+ if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid()))
+ ret = key_change_acl(key, &new_ace);
+ up_write(&key->sem);
+ key_put(key);
+error:
+ return ret;
+}
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index da9a0f42b795..8171c90d4c9a 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c
@@ -12,6 +12,27 @@
unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+static struct key_acl persistent_register_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_SEARCH | KEY_ACE_WRITE),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
+static struct key_acl persistent_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE |
+ KEY_ACE_SEARCH | KEY_ACE_LINK |
+ KEY_ACE_CLEAR | KEY_ACE_INVAL),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
/*
* Create the persistent keyring register for the current user namespace.
*
@@ -22,8 +43,7 @@ static int key_create_persistent_register(struct user_namespace *ns)
struct key *reg = keyring_alloc(".persistent_register",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
+ &persistent_register_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -56,8 +76,7 @@ static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
persistent = keyring_alloc(index_key->description,
uid, INVALID_GID, current_cred(),
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
+ &persistent_keyring_acl,
KEY_ALLOC_NOT_IN_QUOTA, NULL,
ns->persistent_keyring_register);
if (IS_ERR(persistent))
@@ -80,15 +99,17 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
long ret;
/* Look in the register if it exists */
+ memset(&index_key, 0, sizeof(index_key));
index_key.type = &key_type_keyring;
index_key.description = buf;
index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+ key_set_index_key(&index_key);
if (ns->persistent_keyring_register) {
reg_ref = make_key_ref(ns->persistent_keyring_register, true);
- down_read(&ns->persistent_keyring_register_sem);
+ down_read(&ns->keyring_sem);
persistent_ref = find_key_to_update(reg_ref, &index_key);
- up_read(&ns->persistent_keyring_register_sem);
+ up_read(&ns->keyring_sem);
if (persistent_ref)
goto found;
@@ -97,9 +118,9 @@ static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
/* It wasn't in the register, so we'll need to create it. We might
* also need to create the register.
*/
- down_write(&ns->persistent_keyring_register_sem);
+ down_write(&ns->keyring_sem);
persistent_ref = key_create_persistent(ns, uid, &index_key);
- up_write(&ns->persistent_keyring_register_sem);
+ up_write(&ns->keyring_sem);
if (!IS_ERR(persistent_ref))
goto found;
diff --git a/security/keys/proc.c b/security/keys/proc.c
index 4e3266a2529e..b394ad1e874b 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -110,11 +110,13 @@ static struct key *find_ge_key(struct seq_file *p, key_serial_t id)
}
static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
+ __acquires(rcu)
__acquires(key_serial_lock)
{
key_serial_t pos = *_pos;
struct key *key;
+ rcu_read_lock();
spin_lock(&key_serial_lock);
if (*_pos > INT_MAX)
@@ -144,12 +146,15 @@ static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
static void proc_keys_stop(struct seq_file *p, void *v)
__releases(key_serial_lock)
+ __releases(rcu)
{
spin_unlock(&key_serial_lock);
+ rcu_read_unlock();
}
static int proc_keys_show(struct seq_file *m, void *v)
{
+ const struct key_acl *acl;
struct rb_node *_p = v;
struct key *key = rb_entry(_p, struct key, serial_node);
unsigned long flags;
@@ -157,6 +162,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
time64_t now, expiry;
char xbuf[16];
short state;
+ bool check_pos;
u64 timo;
int rc;
@@ -166,16 +172,19 @@ static int proc_keys_show(struct seq_file *m, void *v)
.match_data.cmp = lookup_user_key_possessed,
.match_data.raw_data = key,
.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
- .flags = KEYRING_SEARCH_NO_STATE_CHECK,
+ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
+ KEYRING_SEARCH_RECURSE),
};
- key_ref = make_key_ref(key, 0);
+ acl = rcu_dereference(key->acl);
+ check_pos = acl->possessor_viewable;
/* determine if the key is possessed by this process (a test we can
* skip if the key does not indicate the possessor can view it
*/
- if (key->perm & KEY_POS_VIEW) {
- skey_ref = search_my_process_keyrings(&ctx);
+ key_ref = make_key_ref(key, 0);
+ if (check_pos) {
+ skey_ref = search_cred_keyrings_rcu(&ctx);
if (!IS_ERR(skey_ref)) {
key_ref_put(skey_ref);
key_ref = make_key_ref(key, 1);
@@ -185,12 +194,10 @@ static int proc_keys_show(struct seq_file *m, void *v)
/* check whether the current task is allowed to view the key */
rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
if (rc < 0)
- return 0;
+ goto out;
now = ktime_get_real_seconds();
- rcu_read_lock();
-
/* come up with a suitable timeout value */
expiry = READ_ONCE(key->expiry);
if (expiry == 0) {
@@ -229,7 +236,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
showflag(flags, 'i', KEY_FLAG_INVALIDATED),
refcount_read(&key->usage),
xbuf,
- key->perm,
+ key_acl_to_perm(acl),
from_kuid_munged(seq_user_ns(m), key->uid),
from_kgid_munged(seq_user_ns(m), key->gid),
key->type->name);
@@ -240,7 +247,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
key->type->describe(key, m);
seq_putc(m, '\n');
- rcu_read_unlock();
+out:
return 0;
}
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 0b9406bf60e5..aa3bfcadbc66 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -15,15 +15,13 @@
#include <linux/security.h>
#include <linux/user_namespace.h>
#include <linux/uaccess.h>
+#include <linux/init_task.h>
#include <keys/request_key_auth-type.h>
#include "internal.h"
/* Session keyring create vs join semaphore */
static DEFINE_MUTEX(key_session_mutex);
-/* User keyring creation semaphore */
-static DEFINE_MUTEX(key_user_keyring_mutex);
-
/* The root user's tracking struct */
struct key_user root_key_user = {
.usage = REFCOUNT_INIT(3),
@@ -34,100 +32,222 @@ struct key_user root_key_user = {
.uid = GLOBAL_ROOT_UID,
};
+static struct key_acl user_reg_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .possessor_viewable = true,
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_WRITE | KEY_ACE_SEARCH),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
+static struct key_acl user_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .possessor_viewable = true,
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_WRITE |
+ KEY_ACE_SEARCH | KEY_ACE_LINK),
+ KEY_OWNER_ACE(KEY_ACE__PERMS & ~(KEY_ACE_JOIN | KEY_ACE_SET_SECURITY)),
+ }
+};
+
+static struct key_acl session_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .possessor_viewable = true,
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~KEY_ACE_JOIN),
+ KEY_OWNER_ACE(KEY_ACE_VIEW | KEY_ACE_READ),
+ }
+};
+
+static struct key_acl thread_and_process_keyring_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .possessor_viewable = true,
+ .nr_ace = 2,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE__PERMS & ~(KEY_ACE_JOIN | KEY_ACE_SET_SECURITY)),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
/*
- * Install the user and user session keyrings for the current process's UID.
+ * Get or create a user register keyring.
*/
-int install_user_keyrings(void)
+static struct key *get_user_register(struct user_namespace *user_ns)
{
- struct user_struct *user;
- const struct cred *cred;
- struct key *uid_keyring, *session_keyring;
- key_perm_t user_keyring_perm;
- char buf[20];
- int ret;
- uid_t uid;
+ struct key *reg_keyring = READ_ONCE(user_ns->user_keyring_register);
- user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL;
- cred = current_cred();
- user = cred->user;
- uid = from_kuid(cred->user_ns, user->uid);
+ if (reg_keyring)
+ return reg_keyring;
- kenter("%p{%u}", user, uid);
+ down_write(&user_ns->keyring_sem);
- if (READ_ONCE(user->uid_keyring) && READ_ONCE(user->session_keyring)) {
- kleave(" = 0 [exist]");
- return 0;
+ /* Make sure there's a register keyring. It gets owned by the
+ * user_namespace's owner.
+ */
+ reg_keyring = user_ns->user_keyring_register;
+ if (!reg_keyring) {
+ reg_keyring = keyring_alloc(".user_reg",
+ user_ns->owner, INVALID_GID,
+ &init_cred, &user_reg_keyring_acl,
+ 0, NULL, NULL);
+ if (!IS_ERR(reg_keyring))
+ smp_store_release(&user_ns->user_keyring_register,
+ reg_keyring);
}
- mutex_lock(&key_user_keyring_mutex);
- ret = 0;
+ up_write(&user_ns->keyring_sem);
+
+ /* We don't return a ref since the keyring is pinned by the user_ns */
+ return reg_keyring;
+}
+
+/*
+ * Look up the user and user session keyrings for the current process's UID,
+ * creating them if they don't exist.
+ */
+int look_up_user_keyrings(struct key **_user_keyring,
+ struct key **_user_session_keyring)
+{
+ const struct cred *cred = current_cred();
+ struct user_namespace *user_ns = current_user_ns();
+ struct key *reg_keyring, *uid_keyring, *session_keyring;
+ key_ref_t uid_keyring_r, session_keyring_r;
+ uid_t uid = from_kuid(user_ns, cred->user->uid);
+ char buf[20];
+ int ret;
- if (!user->uid_keyring) {
- /* get the UID-specific keyring
- * - there may be one in existence already as it may have been
- * pinned by a session, but the user_struct pointing to it
- * may have been destroyed by setuid */
- sprintf(buf, "_uid.%u", uid);
+ kenter("%u", uid);
+
+ reg_keyring = get_user_register(user_ns);
+ if (IS_ERR(reg_keyring))
+ return PTR_ERR(reg_keyring);
+
+ down_write(&user_ns->keyring_sem);
+ ret = 0;
- uid_keyring = find_keyring_by_name(buf, true);
+ /* Get the user keyring. Note that there may be one in existence
+ * already as it may have been pinned by a session, but the user_struct
+ * pointing to it may have been destroyed by setuid.
+ */
+ snprintf(buf, sizeof(buf), "_uid.%u", uid);
+ uid_keyring_r = keyring_search(make_key_ref(reg_keyring, true),
+ &key_type_keyring, buf, false);
+ kdebug("_uid %p", uid_keyring_r);
+ if (uid_keyring_r == ERR_PTR(-EAGAIN)) {
+ uid_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID,
+ cred, &user_keyring_acl,
+ KEY_ALLOC_UID_KEYRING |
+ KEY_ALLOC_IN_QUOTA,
+ NULL, reg_keyring);
if (IS_ERR(uid_keyring)) {
- uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
- cred, user_keyring_perm,
- KEY_ALLOC_UID_KEYRING |
- KEY_ALLOC_IN_QUOTA,
- NULL, NULL);
- if (IS_ERR(uid_keyring)) {
- ret = PTR_ERR(uid_keyring);
- goto error;
- }
+ ret = PTR_ERR(uid_keyring);
+ goto error;
}
+ } else if (IS_ERR(uid_keyring_r)) {
+ ret = PTR_ERR(uid_keyring_r);
+ goto error;
+ } else {
+ uid_keyring = key_ref_to_ptr(uid_keyring_r);
+ }
- /* get a default session keyring (which might also exist
- * already) */
- sprintf(buf, "_uid_ses.%u", uid);
-
- session_keyring = find_keyring_by_name(buf, true);
+ /* Get a default session keyring (which might also exist already) */
+ snprintf(buf, sizeof(buf), "_uid_ses.%u", uid);
+ session_keyring_r = keyring_search(make_key_ref(reg_keyring, true),
+ &key_type_keyring, buf, false);
+ kdebug("_uid_ses %p", session_keyring_r);
+ if (session_keyring_r == ERR_PTR(-EAGAIN)) {
+ session_keyring = keyring_alloc(buf, cred->user->uid, INVALID_GID,
+ cred, &user_keyring_acl,
+ KEY_ALLOC_UID_KEYRING |
+ KEY_ALLOC_IN_QUOTA,
+ NULL, NULL);
if (IS_ERR(session_keyring)) {
- session_keyring =
- keyring_alloc(buf, user->uid, INVALID_GID,
- cred, user_keyring_perm,
- KEY_ALLOC_UID_KEYRING |
- KEY_ALLOC_IN_QUOTA,
- NULL, NULL);
- if (IS_ERR(session_keyring)) {
- ret = PTR_ERR(session_keyring);
- goto error_release;
- }
-
- /* we install a link from the user session keyring to
- * the user keyring */
- ret = key_link(session_keyring, uid_keyring);
- if (ret < 0)
- goto error_release_both;
+ ret = PTR_ERR(session_keyring);
+ goto error_release;
}
- /* install the keyrings */
- /* paired with READ_ONCE() */
- smp_store_release(&user->uid_keyring, uid_keyring);
- /* paired with READ_ONCE() */
- smp_store_release(&user->session_keyring, session_keyring);
+ /* We install a link from the user session keyring to
+ * the user keyring.
+ */
+ ret = key_link(session_keyring, uid_keyring);
+ if (ret < 0)
+ goto error_release_session;
+
+ /* And only then link the user-session keyring to the
+ * register.
+ */
+ ret = key_link(reg_keyring, session_keyring);
+ if (ret < 0)
+ goto error_release_session;
+ } else if (IS_ERR(session_keyring_r)) {
+ ret = PTR_ERR(session_keyring_r);
+ goto error_release;
+ } else {
+ session_keyring = key_ref_to_ptr(session_keyring_r);
}
- mutex_unlock(&key_user_keyring_mutex);
+ up_write(&user_ns->keyring_sem);
+
+ if (_user_session_keyring)
+ *_user_session_keyring = session_keyring;
+ else
+ key_put(session_keyring);
+ if (_user_keyring)
+ *_user_keyring = uid_keyring;
+ else
+ key_put(uid_keyring);
kleave(" = 0");
return 0;
-error_release_both:
+error_release_session:
key_put(session_keyring);
error_release:
key_put(uid_keyring);
error:
- mutex_unlock(&key_user_keyring_mutex);
+ up_write(&user_ns->keyring_sem);
kleave(" = %d", ret);
return ret;
}
/*
+ * Get the user session keyring if it exists, but don't create it if it
+ * doesn't.
+ */
+struct key *get_user_session_keyring_rcu(const struct cred *cred)
+{
+ struct key *reg_keyring = READ_ONCE(cred->user_ns->user_keyring_register);
+ key_ref_t session_keyring_r;
+ char buf[20];
+
+ struct keyring_search_context ctx = {
+ .index_key.type = &key_type_keyring,
+ .index_key.description = buf,
+ .cred = cred,
+ .match_data.cmp = key_default_cmp,
+ .match_data.raw_data = buf,
+ .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+ .flags = KEYRING_SEARCH_DO_STATE_CHECK,
+ };
+
+ if (!reg_keyring)
+ return NULL;
+
+ ctx.index_key.desc_len = snprintf(buf, sizeof(buf), "_uid_ses.%u",
+ from_kuid(cred->user_ns,
+ cred->user->uid));
+
+ session_keyring_r = keyring_search_rcu(make_key_ref(reg_keyring, true),
+ &ctx);
+ if (IS_ERR(session_keyring_r))
+ return NULL;
+ return key_ref_to_ptr(session_keyring_r);
+}
+
+/*
* Install a thread keyring to the given credentials struct if it didn't have
* one already. This is allowed to overrun the quota.
*
@@ -141,7 +261,7 @@ int install_thread_keyring_to_cred(struct cred *new)
return 0;
keyring = keyring_alloc("_tid", new->uid, new->gid, new,
- KEY_POS_ALL | KEY_USR_VIEW,
+ &thread_and_process_keyring_acl,
KEY_ALLOC_QUOTA_OVERRUN,
NULL, NULL);
if (IS_ERR(keyring))
@@ -188,7 +308,7 @@ int install_process_keyring_to_cred(struct cred *new)
return 0;
keyring = keyring_alloc("_pid", new->uid, new->gid, new,
- KEY_POS_ALL | KEY_USR_VIEW,
+ &thread_and_process_keyring_acl,
KEY_ALLOC_QUOTA_OVERRUN,
NULL, NULL);
if (IS_ERR(keyring))
@@ -243,8 +363,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
flags = KEY_ALLOC_IN_QUOTA;
keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
- KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
- flags, NULL, NULL);
+ &session_keyring_acl, flags, NULL, NULL);
if (IS_ERR(keyring))
return PTR_ERR(keyring);
} else {
@@ -289,34 +408,33 @@ static int install_session_keyring(struct key *keyring)
/*
* Handle the fsuid changing.
*/
-void key_fsuid_changed(struct task_struct *tsk)
+void key_fsuid_changed(struct cred *new_cred)
{
/* update the ownership of the thread keyring */
- BUG_ON(!tsk->cred);
- if (tsk->cred->thread_keyring) {
- down_write(&tsk->cred->thread_keyring->sem);
- tsk->cred->thread_keyring->uid = tsk->cred->fsuid;
- up_write(&tsk->cred->thread_keyring->sem);
+ if (new_cred->thread_keyring) {
+ down_write(&new_cred->thread_keyring->sem);
+ new_cred->thread_keyring->uid = new_cred->fsuid;
+ up_write(&new_cred->thread_keyring->sem);
}
}
/*
* Handle the fsgid changing.
*/
-void key_fsgid_changed(struct task_struct *tsk)
+void key_fsgid_changed(struct cred *new_cred)
{
/* update the ownership of the thread keyring */
- BUG_ON(!tsk->cred);
- if (tsk->cred->thread_keyring) {
- down_write(&tsk->cred->thread_keyring->sem);
- tsk->cred->thread_keyring->gid = tsk->cred->fsgid;
- up_write(&tsk->cred->thread_keyring->sem);
+ if (new_cred->thread_keyring) {
+ down_write(&new_cred->thread_keyring->sem);
+ new_cred->thread_keyring->gid = new_cred->fsgid;
+ up_write(&new_cred->thread_keyring->sem);
}
}
/*
* Search the process keyrings attached to the supplied cred for the first
- * matching key.
+ * matching key under RCU conditions (the caller must be holding the RCU read
+ * lock).
*
* The search criteria are the type and the match function. The description is
* given to the match function as a parameter, but doesn't otherwise influence
@@ -335,8 +453,9 @@ void key_fsgid_changed(struct task_struct *tsk)
* In the case of a successful return, the possession attribute is set on the
* returned key reference.
*/
-key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
+key_ref_t search_cred_keyrings_rcu(struct keyring_search_context *ctx)
{
+ struct key *user_session;
key_ref_t key_ref, ret, err;
const struct cred *cred = ctx->cred;
@@ -353,7 +472,7 @@ key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
/* search the thread keyring first */
if (cred->thread_keyring) {
- key_ref = keyring_search_aux(
+ key_ref = keyring_search_rcu(
make_key_ref(cred->thread_keyring, 1), ctx);
if (!IS_ERR(key_ref))
goto found;
@@ -371,7 +490,7 @@ key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
/* search the process keyring second */
if (cred->process_keyring) {
- key_ref = keyring_search_aux(
+ key_ref = keyring_search_rcu(
make_key_ref(cred->process_keyring, 1), ctx);
if (!IS_ERR(key_ref))
goto found;
@@ -392,7 +511,7 @@ key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
/* search the session keyring */
if (cred->session_keyring) {
- key_ref = keyring_search_aux(
+ key_ref = keyring_search_rcu(
make_key_ref(cred->session_keyring, 1), ctx);
if (!IS_ERR(key_ref))
@@ -412,10 +531,11 @@ key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
}
}
/* or search the user-session keyring */
- else if (READ_ONCE(cred->user->session_keyring)) {
- key_ref = keyring_search_aux(
- make_key_ref(READ_ONCE(cred->user->session_keyring), 1),
- ctx);
+ else if ((user_session = get_user_session_keyring_rcu(cred))) {
+ key_ref = keyring_search_rcu(make_key_ref(user_session, 1),
+ ctx);
+ key_put(user_session);
+
if (!IS_ERR(key_ref))
goto found;
@@ -446,16 +566,16 @@ found:
* the keys attached to the assumed authorisation key using its credentials if
* one is available.
*
- * Return same as search_my_process_keyrings().
+ * The caller must be holding the RCU read lock.
+ *
+ * Return same as search_cred_keyrings_rcu().
*/
-key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
+key_ref_t search_process_keyrings_rcu(struct keyring_search_context *ctx)
{
struct request_key_auth *rka;
key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
- might_sleep();
-
- key_ref = search_my_process_keyrings(ctx);
+ key_ref = search_cred_keyrings_rcu(ctx);
if (!IS_ERR(key_ref))
goto found;
err = key_ref;
@@ -470,24 +590,17 @@ key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
) {
const struct cred *cred = ctx->cred;
- /* defend against the auth key being revoked */
- down_read(&cred->request_key_auth->sem);
-
- if (key_validate(ctx->cred->request_key_auth) == 0) {
+ if (key_validate(cred->request_key_auth) == 0) {
rka = ctx->cred->request_key_auth->payload.data[0];
+ //// was search_process_keyrings() [ie. recursive]
ctx->cred = rka->cred;
- key_ref = search_process_keyrings(ctx);
+ key_ref = search_cred_keyrings_rcu(ctx);
ctx->cred = cred;
- up_read(&cred->request_key_auth->sem);
-
if (!IS_ERR(key_ref))
goto found;
-
ret = key_ref;
- } else {
- up_read(&cred->request_key_auth->sem);
}
}
@@ -502,7 +615,6 @@ key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
found:
return key_ref;
}
-
/*
* See if the key we're looking at is the target key.
*/
@@ -531,15 +643,16 @@ bool lookup_user_key_possessed(const struct key *key,
* returned key reference.
*/
key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
- key_perm_t perm)
+ unsigned int desired_perm)
{
struct keyring_search_context ctx = {
.match_data.cmp = lookup_user_key_possessed,
.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
- .flags = KEYRING_SEARCH_NO_STATE_CHECK,
+ .flags = (KEYRING_SEARCH_NO_STATE_CHECK |
+ KEYRING_SEARCH_RECURSE),
};
struct request_key_auth *rka;
- struct key *key;
+ struct key *key, *user_session;
key_ref_t key_ref, skey_ref;
int ret;
@@ -588,20 +701,20 @@ try_again:
if (!ctx.cred->session_keyring) {
/* always install a session keyring upon access if one
* doesn't exist yet */
- ret = install_user_keyrings();
+ ret = look_up_user_keyrings(NULL, &user_session);
if (ret < 0)
goto error;
if (lflags & KEY_LOOKUP_CREATE)
ret = join_session_keyring(NULL);
else
- ret = install_session_keyring(
- ctx.cred->user->session_keyring);
+ ret = install_session_keyring(user_session);
+ key_put(user_session);
if (ret < 0)
goto error;
goto reget_creds;
- } else if (ctx.cred->session_keyring ==
- READ_ONCE(ctx.cred->user->session_keyring) &&
+ } else if (test_bit(KEY_FLAG_UID_KEYRING,
+ &ctx.cred->session_keyring->flags) &&
lflags & KEY_LOOKUP_CREATE) {
ret = join_session_keyring(NULL);
if (ret < 0)
@@ -615,26 +728,16 @@ try_again:
break;
case KEY_SPEC_USER_KEYRING:
- if (!READ_ONCE(ctx.cred->user->uid_keyring)) {
- ret = install_user_keyrings();
- if (ret < 0)
- goto error;
- }
-
- key = ctx.cred->user->uid_keyring;
- __key_get(key);
+ ret = look_up_user_keyrings(&key, NULL);
+ if (ret < 0)
+ goto error;
key_ref = make_key_ref(key, 1);
break;
case KEY_SPEC_USER_SESSION_KEYRING:
- if (!READ_ONCE(ctx.cred->user->session_keyring)) {
- ret = install_user_keyrings();
- if (ret < 0)
- goto error;
- }
-
- key = ctx.cred->user->session_keyring;
- __key_get(key);
+ ret = look_up_user_keyrings(NULL, &key);
+ if (ret < 0)
+ goto error;
key_ref = make_key_ref(key, 1);
break;
@@ -686,12 +789,12 @@ try_again:
key_ref = make_key_ref(key, 0);
/* check to see if we possess the key */
- ctx.index_key.type = key->type;
- ctx.index_key.description = key->description;
- ctx.index_key.desc_len = strlen(key->description);
+ ctx.index_key = key->index_key;
ctx.match_data.raw_data = key;
kdebug("check possessed");
- skey_ref = search_process_keyrings(&ctx);
+ rcu_read_lock();
+ skey_ref = search_process_keyrings_rcu(&ctx);
+ rcu_read_unlock();
kdebug("possessed=%p", skey_ref);
if (!IS_ERR(skey_ref)) {
@@ -715,12 +818,12 @@ try_again:
case -ERESTARTSYS:
goto invalid_key;
default:
- if (perm)
+ if (desired_perm)
goto invalid_key;
case 0:
break;
}
- } else if (perm) {
+ } else if (desired_perm) {
ret = key_validate(key);
if (ret < 0)
goto invalid_key;
@@ -732,9 +835,11 @@ try_again:
goto invalid_key;
/* check the permissions */
- ret = key_task_permission(key_ref, ctx.cred, perm);
- if (ret < 0)
- goto invalid_key;
+ if (desired_perm) {
+ ret = key_task_permission(key_ref, ctx.cred, desired_perm);
+ if (ret < 0)
+ goto invalid_key;
+ }
key->last_used_at = ktime_get_real_seconds();
@@ -799,13 +904,13 @@ long join_session_keyring(const char *name)
if (PTR_ERR(keyring) == -ENOKEY) {
/* not found - try and create a new one */
keyring = keyring_alloc(
- name, old->uid, old->gid, old,
- KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
+ name, old->uid, old->gid, old, &joinable_keyring_acl,
KEY_ALLOC_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto error2;
}
+ goto no_perm_test;
} else if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto error2;
@@ -814,6 +919,12 @@ long join_session_keyring(const char *name)
goto error3;
}
+ ret = key_task_permission(make_key_ref(keyring, false), old,
+ KEY_NEED_JOIN);
+ if (ret < 0)
+ goto error3;
+
+no_perm_test:
/* we've got a keyring - now to install it */
ret = install_session_keyring_to_cred(new, keyring);
if (ret < 0)
@@ -883,7 +994,7 @@ void key_change_session_keyring(struct callback_head *twork)
*/
static int __init init_root_keyring(void)
{
- return install_user_keyrings();
+ return look_up_user_keyrings(NULL, NULL);
}
late_initcall(init_root_keyring);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 8ae3b7b18801..46c5187ce03f 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -13,14 +13,40 @@
#include <linux/err.h>
#include <linux/keyctl.h>
#include <linux/slab.h>
+#include <net/net_namespace.h>
#include "internal.h"
#include <keys/request_key_auth-type.h>
#define key_negative_timeout 60 /* default timeout on a negative key's existence */
+static struct key *check_cached_key(struct keyring_search_context *ctx)
+{
+#ifdef CONFIG_KEYS_REQUEST_CACHE
+ struct key *key = current->cached_requested_key;
+
+ if (key &&
+ ctx->match_data.cmp(key, &ctx->match_data) &&
+ !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED))))
+ return key_get(key);
+#endif
+ return NULL;
+}
+
+static void cache_requested_key(struct key *key)
+{
+#ifdef CONFIG_KEYS_REQUEST_CACHE
+ struct task_struct *t = current;
+
+ key_put(t->cached_requested_key);
+ t->cached_requested_key = key_get(key);
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+#endif
+}
+
/**
* complete_request_key - Complete the construction of a key.
- * @auth_key: The authorisation key.
+ * @authkey: The authorisation key.
* @error: The success or failute of the construction.
*
* Complete the attempt to construct a key. The key will be negated
@@ -92,7 +118,7 @@ static int call_sbin_request_key(struct key *authkey, void *aux)
struct request_key_auth *rka = get_request_key_auth(authkey);
const struct cred *cred = current_cred();
key_serial_t prkey, sskey;
- struct key *key = rka->target_key, *keyring, *session;
+ struct key *key = rka->target_key, *keyring, *session, *user_session;
char *argv[9], *envp[3], uid_str[12], gid_str[12];
char key_str[12], keyring_str[3][12];
char desc[20];
@@ -100,17 +126,16 @@ static int call_sbin_request_key(struct key *authkey, void *aux)
kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op);
- ret = install_user_keyrings();
+ ret = look_up_user_keyrings(NULL, &user_session);
if (ret < 0)
- goto error_alloc;
+ goto error_us;
/* allocate a new session keyring */
sprintf(desc, "_req.%u", key->serial);
cred = get_current_cred();
keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
- KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
- KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL);
+ NULL, KEY_ALLOC_QUOTA_OVERRUN, NULL, NULL);
put_cred(cred);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
@@ -140,7 +165,7 @@ static int call_sbin_request_key(struct key *authkey, void *aux)
session = cred->session_keyring;
if (!session)
- session = cred->user->session_keyring;
+ session = user_session;
sskey = session->serial;
sprintf(keyring_str[2], "%d", sskey);
@@ -182,6 +207,8 @@ error_link:
key_put(keyring);
error_alloc:
+ key_put(user_session);
+error_us:
complete_request_key(authkey, ret);
kleave(" = %d", ret);
return ret;
@@ -218,7 +245,7 @@ static int construct_key(struct key *key, const void *callout_info,
/* check that the actor called complete_request_key() prior to
* returning an error */
WARN_ON(ret < 0 &&
- !test_bit(KEY_FLAG_REVOKED, &authkey->flags));
+ !test_bit(KEY_FLAG_INVALIDATED, &authkey->flags));
key_put(authkey);
kleave(" = %d", ret);
@@ -288,13 +315,15 @@ static int construct_get_dest_keyring(struct key **_dest_keyring)
/* fall through */
case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
- dest_keyring =
- key_get(READ_ONCE(cred->user->session_keyring));
+ ret = look_up_user_keyrings(NULL, &dest_keyring);
+ if (ret < 0)
+ return ret;
break;
case KEY_REQKEY_DEFL_USER_KEYRING:
- dest_keyring =
- key_get(READ_ONCE(cred->user->uid_keyring));
+ ret = look_up_user_keyrings(&dest_keyring, NULL);
+ if (ret < 0)
+ return ret;
break;
case KEY_REQKEY_DEFL_GROUP_KEYRING:
@@ -337,11 +366,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
struct key *dest_keyring,
unsigned long flags,
struct key_user *user,
+ struct key_acl *acl,
struct key **_key)
{
- struct assoc_array_edit *edit;
+ struct assoc_array_edit *edit = NULL;
struct key *key;
- key_perm_t perm;
key_ref_t key_ref;
int ret;
@@ -351,23 +380,18 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
*_key = NULL;
mutex_lock(&user->cons_lock);
- perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
- perm |= KEY_USR_VIEW;
- if (ctx->index_key.type->read)
- perm |= KEY_POS_READ;
- if (ctx->index_key.type == &key_type_keyring ||
- ctx->index_key.type->update)
- perm |= KEY_POS_WRITE;
-
key = key_alloc(ctx->index_key.type, ctx->index_key.description,
ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
- perm, flags, NULL);
+ acl, flags, NULL);
if (IS_ERR(key))
goto alloc_failed;
set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
if (dest_keyring) {
+ ret = __key_link_lock(dest_keyring, &ctx->index_key);
+ if (ret < 0)
+ goto link_lock_failed;
ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
if (ret < 0)
goto link_prealloc_failed;
@@ -378,7 +402,9 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
* waited for locks */
mutex_lock(&key_construction_mutex);
- key_ref = search_process_keyrings(ctx);
+ rcu_read_lock();
+ key_ref = search_process_keyrings_rcu(ctx);
+ rcu_read_unlock();
if (!IS_ERR(key_ref))
goto key_already_present;
@@ -419,6 +445,8 @@ link_check_failed:
return ret;
link_prealloc_failed:
+ __key_link_end(dest_keyring, &ctx->index_key, edit);
+link_lock_failed:
mutex_unlock(&user->cons_lock);
key_put(key);
kleave(" = %d [prelink]", ret);
@@ -437,6 +465,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
const char *callout_info,
size_t callout_len,
void *aux,
+ struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags)
{
@@ -459,7 +488,7 @@ static struct key *construct_key_and_link(struct keyring_search_context *ctx,
goto error_put_dest_keyring;
}
- ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
+ ret = construct_alloc_key(ctx, dest_keyring, flags, user, acl, &key);
key_user_put(user);
if (ret == 0) {
@@ -493,16 +522,19 @@ error:
* request_key_and_link - Request a key and cache it in a keyring.
* @type: The type of key we want.
* @description: The searchable description of the key.
+ * @domain_tag: The domain in which the key operates.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
* @callout_len: The length of callout_info.
* @aux: Auxiliary data for the upcall.
+ * @acl: The ACL to attach if a new key is created.
* @dest_keyring: Where to cache the key.
* @flags: Flags to key_alloc().
*
- * A key matching the specified criteria is searched for in the process's
- * keyrings and returned with its usage count incremented if found. Otherwise,
- * if callout_info is not NULL, a key will be allocated and some service
- * (probably in userspace) will be asked to instantiate it.
+ * A key matching the specified criteria (type, description, domain_tag) is
+ * searched for in the process's keyrings and returned with its usage count
+ * incremented if found. Otherwise, if callout_info is not NULL, a key will be
+ * allocated and some service (probably in userspace) will be asked to
+ * instantiate it.
*
* If successfully found or created, the key will be linked to the destination
* keyring if one is provided.
@@ -518,14 +550,17 @@ error:
*/
struct key *request_key_and_link(struct key_type *type,
const char *description,
+ struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
void *aux,
+ struct key_acl *acl,
struct key *dest_keyring,
unsigned long flags)
{
struct keyring_search_context ctx = {
.index_key.type = type,
+ .index_key.domain_tag = domain_tag,
.index_key.description = description,
.index_key.desc_len = strlen(description),
.cred = current_cred(),
@@ -533,7 +568,8 @@ struct key *request_key_and_link(struct key_type *type,
.match_data.raw_data = description,
.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
.flags = (KEYRING_SEARCH_DO_STATE_CHECK |
- KEYRING_SEARCH_SKIP_EXPIRED),
+ KEYRING_SEARCH_SKIP_EXPIRED |
+ KEYRING_SEARCH_RECURSE),
};
struct key *key;
key_ref_t key_ref;
@@ -551,10 +587,26 @@ struct key *request_key_and_link(struct key_type *type,
}
}
+ key = check_cached_key(&ctx);
+ if (key)
+ return key;
+
/* search all the process keyrings for a key */
- key_ref = search_process_keyrings(&ctx);
+ rcu_read_lock();
+ key_ref = search_process_keyrings_rcu(&ctx);
+ rcu_read_unlock();
if (!IS_ERR(key_ref)) {
+ if (dest_keyring) {
+ ret = key_task_permission(key_ref, current_cred(),
+ KEY_NEED_LINK);
+ if (ret < 0) {
+ key_ref_put(key_ref);
+ key = ERR_PTR(ret);
+ goto error_free;
+ }
+ }
+
key = key_ref_to_ptr(key_ref);
if (dest_keyring) {
ret = key_link(dest_keyring, key);
@@ -564,6 +616,9 @@ struct key *request_key_and_link(struct key_type *type,
goto error_free;
}
}
+
+ /* Only cache the key on immediate success */
+ cache_requested_key(key);
} else if (PTR_ERR(key_ref) != -EAGAIN) {
key = ERR_CAST(key_ref);
} else {
@@ -574,7 +629,7 @@ struct key *request_key_and_link(struct key_type *type,
goto error_free;
key = construct_key_and_link(&ctx, callout_info, callout_len,
- aux, dest_keyring, flags);
+ aux, acl, dest_keyring, flags);
}
error_free:
@@ -612,10 +667,12 @@ int wait_for_key_construction(struct key *key, bool intr)
EXPORT_SYMBOL(wait_for_key_construction);
/**
- * request_key - Request a key and wait for construction
+ * request_key_tag - Request a key and wait for construction
* @type: Type of key.
* @description: The searchable description of the key.
+ * @domain_tag: The domain in which the key operates.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
+ * @acl: The ACL to attach if a new key is created.
*
* As for request_key_and_link() except that it does not add the returned key
* to a keyring if found, new keys are always allocated in the user's quota,
@@ -625,9 +682,11 @@ EXPORT_SYMBOL(wait_for_key_construction);
* Furthermore, it then works as wait_for_key_construction() to wait for the
* completion of keys undergoing construction with a non-interruptible wait.
*/
-struct key *request_key(struct key_type *type,
- const char *description,
- const char *callout_info)
+struct key *request_key_tag(struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag,
+ const char *callout_info,
+ struct key_acl *acl)
{
struct key *key;
size_t callout_len = 0;
@@ -635,8 +694,9 @@ struct key *request_key(struct key_type *type,
if (callout_info)
callout_len = strlen(callout_info);
- key = request_key_and_link(type, description, callout_info, callout_len,
- NULL, NULL, KEY_ALLOC_IN_QUOTA);
+ key = request_key_and_link(type, description, domain_tag,
+ callout_info, callout_len,
+ NULL, acl, NULL, KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key)) {
ret = wait_for_key_construction(key, false);
if (ret < 0) {
@@ -646,15 +706,17 @@ struct key *request_key(struct key_type *type,
}
return key;
}
-EXPORT_SYMBOL(request_key);
+EXPORT_SYMBOL(request_key_tag);
/**
* request_key_with_auxdata - Request a key with auxiliary data for the upcaller
* @type: The type of key we want.
* @description: The searchable description of the key.
+ * @domain_tag: The domain in which the key operates.
* @callout_info: The data to pass to the instantiation upcall (or NULL).
* @callout_len: The length of callout_info.
* @aux: Auxiliary data for the upcall.
+ * @acl: The ACL to attach if a new key is created.
*
* As for request_key_and_link() except that it does not add the returned key
* to a keyring if found and new keys are always allocated in the user's quota.
@@ -664,15 +726,18 @@ EXPORT_SYMBOL(request_key);
*/
struct key *request_key_with_auxdata(struct key_type *type,
const char *description,
+ struct key_tag *domain_tag,
const void *callout_info,
size_t callout_len,
- void *aux)
+ void *aux,
+ struct key_acl *acl)
{
struct key *key;
int ret;
- key = request_key_and_link(type, description, callout_info, callout_len,
- aux, NULL, KEY_ALLOC_IN_QUOTA);
+ key = request_key_and_link(type, description, domain_tag,
+ callout_info, callout_len,
+ aux, acl, NULL, KEY_ALLOC_IN_QUOTA);
if (!IS_ERR(key)) {
ret = wait_for_key_construction(key, false);
if (ret < 0) {
@@ -684,52 +749,55 @@ struct key *request_key_with_auxdata(struct key_type *type,
}
EXPORT_SYMBOL(request_key_with_auxdata);
-/*
- * request_key_async - Request a key (allow async construction)
- * @type: Type of key.
- * @description: The searchable description of the key.
- * @callout_info: The data to pass to the instantiation upcall (or NULL).
- * @callout_len: The length of callout_info.
+/**
+ * request_key_rcu - Request key from RCU-read-locked context
+ * @type: The type of key we want.
+ * @description: The name of the key we want.
+ * @domain_tag: The domain in which the key operates.
*
- * As for request_key_and_link() except that it does not add the returned key
- * to a keyring if found, new keys are always allocated in the user's quota and
- * no auxiliary data can be passed.
+ * Request a key from a context that we may not sleep in (such as RCU-mode
+ * pathwalk). Keys under construction are ignored.
*
- * The caller should call wait_for_key_construction() to wait for the
- * completion of the returned key if it is still undergoing construction.
+ * Return a pointer to the found key if successful, -ENOKEY if we couldn't find
+ * a key or some other error if the key found was unsuitable or inaccessible.
*/
-struct key *request_key_async(struct key_type *type,
- const char *description,
- const void *callout_info,
- size_t callout_len)
+struct key *request_key_rcu(struct key_type *type,
+ const char *description,
+ struct key_tag *domain_tag)
{
- return request_key_and_link(type, description, callout_info,
- callout_len, NULL, NULL,
- KEY_ALLOC_IN_QUOTA);
-}
-EXPORT_SYMBOL(request_key_async);
+ struct keyring_search_context ctx = {
+ .index_key.type = type,
+ .index_key.domain_tag = domain_tag,
+ .index_key.description = description,
+ .index_key.desc_len = strlen(description),
+ .cred = current_cred(),
+ .match_data.cmp = key_default_cmp,
+ .match_data.raw_data = description,
+ .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+ .flags = (KEYRING_SEARCH_DO_STATE_CHECK |
+ KEYRING_SEARCH_SKIP_EXPIRED),
+ };
+ struct key *key;
+ key_ref_t key_ref;
-/*
- * request a key with auxiliary data for the upcaller (allow async construction)
- * @type: Type of key.
- * @description: The searchable description of the key.
- * @callout_info: The data to pass to the instantiation upcall (or NULL).
- * @callout_len: The length of callout_info.
- * @aux: Auxiliary data for the upcall.
- *
- * As for request_key_and_link() except that it does not add the returned key
- * to a keyring if found and new keys are always allocated in the user's quota.
- *
- * The caller should call wait_for_key_construction() to wait for the
- * completion of the returned key if it is still undergoing construction.
- */
-struct key *request_key_async_with_auxdata(struct key_type *type,
- const char *description,
- const void *callout_info,
- size_t callout_len,
- void *aux)
-{
- return request_key_and_link(type, description, callout_info,
- callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA);
+ kenter("%s,%s", type->name, description);
+
+ key = check_cached_key(&ctx);
+ if (key)
+ return key;
+
+ /* search all the process keyrings for a key */
+ key_ref = search_process_keyrings_rcu(&ctx);
+ if (IS_ERR(key_ref)) {
+ key = ERR_CAST(key_ref);
+ if (PTR_ERR(key_ref) == -EAGAIN)
+ key = ERR_PTR(-ENOKEY);
+ } else {
+ key = key_ref_to_ptr(key_ref);
+ cache_requested_key(key);
+ }
+
+ kleave(" = %p", key);
+ return key;
}
-EXPORT_SYMBOL(request_key_async_with_auxdata);
+EXPORT_SYMBOL(request_key_rcu);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index e45b5cf3b97f..27e437d94b81 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -24,6 +24,17 @@ static void request_key_auth_revoke(struct key *);
static void request_key_auth_destroy(struct key *);
static long request_key_auth_read(const struct key *, char __user *, size_t);
+static struct key_acl request_key_auth_acl = {
+ .usage = REFCOUNT_INIT(1),
+ .nr_ace = 2,
+ .possessor_viewable = true,
+ .aces = {
+ KEY_POSSESSOR_ACE(KEY_ACE_VIEW | KEY_ACE_READ | KEY_ACE_SEARCH |
+ KEY_ACE_LINK),
+ KEY_OWNER_ACE(KEY_ACE_VIEW),
+ }
+};
+
/*
* The request-key authorisation key type definition.
*/
@@ -54,7 +65,7 @@ static void request_key_auth_free_preparse(struct key_preparsed_payload *prep)
static int request_key_auth_instantiate(struct key *key,
struct key_preparsed_payload *prep)
{
- key->payload.data[0] = (struct request_key_auth *)prep->data;
+ rcu_assign_keypointer(key, (struct request_key_auth *)prep->data);
return 0;
}
@@ -64,7 +75,7 @@ static int request_key_auth_instantiate(struct key *key,
static void request_key_auth_describe(const struct key *key,
struct seq_file *m)
{
- struct request_key_auth *rka = get_request_key_auth(key);
+ struct request_key_auth *rka = dereference_key_rcu(key);
seq_puts(m, "key:");
seq_puts(m, key->description);
@@ -79,7 +90,7 @@ static void request_key_auth_describe(const struct key *key,
static long request_key_auth_read(const struct key *key,
char __user *buffer, size_t buflen)
{
- struct request_key_auth *rka = get_request_key_auth(key);
+ struct request_key_auth *rka = dereference_key_locked(key);
size_t datalen;
long ret;
@@ -98,23 +109,6 @@ static long request_key_auth_read(const struct key *key,
return ret;
}
-/*
- * Handle revocation of an authorisation token key.
- *
- * Called with the key sem write-locked.
- */
-static void request_key_auth_revoke(struct key *key)
-{
- struct request_key_auth *rka = get_request_key_auth(key);
-
- kenter("{%d}", key->serial);
-
- if (rka->cred) {
- put_cred(rka->cred);
- rka->cred = NULL;
- }
-}
-
static void free_request_key_auth(struct request_key_auth *rka)
{
if (!rka)
@@ -128,15 +122,42 @@ static void free_request_key_auth(struct request_key_auth *rka)
}
/*
+ * Dispose of the request_key_auth record under RCU conditions
+ */
+static void request_key_auth_rcu_disposal(struct rcu_head *rcu)
+{
+ struct request_key_auth *rka =
+ container_of(rcu, struct request_key_auth, rcu);
+
+ free_request_key_auth(rka);
+}
+
+/*
+ * Handle revocation of an authorisation token key.
+ *
+ * Called with the key sem write-locked.
+ */
+static void request_key_auth_revoke(struct key *key)
+{
+ struct request_key_auth *rka = dereference_key_locked(key);
+
+ kenter("{%d}", key->serial);
+ rcu_assign_keypointer(key, NULL);
+ call_rcu(&rka->rcu, request_key_auth_rcu_disposal);
+}
+
+/*
* Destroy an instantiation authorisation token key.
*/
static void request_key_auth_destroy(struct key *key)
{
- struct request_key_auth *rka = get_request_key_auth(key);
+ struct request_key_auth *rka = rcu_access_pointer(key->payload.rcu_data0);
kenter("{%d}", key->serial);
-
- free_request_key_auth(rka);
+ if (rka) {
+ rcu_assign_keypointer(key, NULL);
+ call_rcu(&rka->rcu, request_key_auth_rcu_disposal);
+ }
}
/*
@@ -148,7 +169,7 @@ struct key *request_key_auth_new(struct key *target, const char *op,
struct key *dest_keyring)
{
struct request_key_auth *rka, *irka;
- const struct cred *cred = current->cred;
+ const struct cred *cred = current_cred();
struct key *authkey = NULL;
char desc[20];
int ret = -ENOMEM;
@@ -200,8 +221,8 @@ struct key *request_key_auth_new(struct key *target, const char *op,
authkey = key_alloc(&key_type_request_key_auth, desc,
cred->fsuid, cred->fsgid, cred,
- KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
- KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ &request_key_auth_acl,
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(authkey)) {
ret = PTR_ERR(authkey);
goto error_free_rka;
@@ -238,14 +259,17 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
.match_data.cmp = key_default_cmp,
.match_data.raw_data = description,
.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
- .flags = KEYRING_SEARCH_DO_STATE_CHECK,
+ .flags = (KEYRING_SEARCH_DO_STATE_CHECK |
+ KEYRING_SEARCH_RECURSE),
};
struct key *authkey;
key_ref_t authkey_ref;
ctx.index_key.desc_len = sprintf(description, "%x", target_id);
- authkey_ref = search_process_keyrings(&ctx);
+ rcu_read_lock();
+ authkey_ref = search_process_keyrings_rcu(&ctx);
+ rcu_read_unlock();
if (IS_ERR(authkey_ref)) {
authkey = ERR_CAST(authkey_ref);
diff --git a/security/safesetid/lsm.c b/security/safesetid/lsm.c
index cecd38e2ac80..06d4259f9ab1 100644
--- a/security/safesetid/lsm.c
+++ b/security/safesetid/lsm.c
@@ -111,7 +111,7 @@ static int check_uid_transition(kuid_t parent, kuid_t child)
* that could arise from a missing whitelist entry preventing a
* privileged process from dropping to a lesser-privileged one.
*/
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return -EACCES;
}
@@ -203,7 +203,7 @@ static int safesetid_task_fix_setuid(struct cred *new,
break;
default:
pr_warn("Unknown setid state %d\n", flags);
- force_sig(SIGKILL, current);
+ force_sig(SIGKILL);
return -EINVAL;
}
return 0;
diff --git a/security/security.c b/security/security.c
index f493db0bf62a..250ee2d76406 100644
--- a/security/security.c
+++ b/security/security.c
@@ -35,7 +35,7 @@
#define LSM_COUNT (__end_lsm_info - __start_lsm_info)
struct security_hook_heads security_hook_heads __lsm_ro_after_init;
-static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
+static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
static struct kmem_cache *lsm_file_cache;
static struct kmem_cache *lsm_inode_cache;
@@ -426,23 +426,26 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count,
panic("%s - Cannot get early memory.\n", __func__);
}
-int call_lsm_notifier(enum lsm_event event, void *data)
+int call_blocking_lsm_notifier(enum lsm_event event, void *data)
{
- return atomic_notifier_call_chain(&lsm_notifier_chain, event, data);
+ return blocking_notifier_call_chain(&blocking_lsm_notifier_chain,
+ event, data);
}
-EXPORT_SYMBOL(call_lsm_notifier);
+EXPORT_SYMBOL(call_blocking_lsm_notifier);
-int register_lsm_notifier(struct notifier_block *nb)
+int register_blocking_lsm_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&lsm_notifier_chain, nb);
+ return blocking_notifier_chain_register(&blocking_lsm_notifier_chain,
+ nb);
}
-EXPORT_SYMBOL(register_lsm_notifier);
+EXPORT_SYMBOL(register_blocking_lsm_notifier);
-int unregister_lsm_notifier(struct notifier_block *nb)
+int unregister_blocking_lsm_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&lsm_notifier_chain, nb);
+ return blocking_notifier_chain_unregister(&blocking_lsm_notifier_chain,
+ nb);
}
-EXPORT_SYMBOL(unregister_lsm_notifier);
+EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
/**
* lsm_cred_alloc - allocate a composite cred blob
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 94de51628fdc..4bef86ed463b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -194,7 +194,7 @@ static int selinux_lsm_notifier_avc_callback(u32 event)
{
if (event == AVC_CALLBACK_RESET) {
sel_ib_pkey_flush();
- call_lsm_notifier(LSM_POLICY_CHANGE, NULL);
+ call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL);
}
return 0;
@@ -6351,11 +6351,12 @@ static int selinux_setprocattr(const char *name, void *value, size_t size)
} else if (!strcmp(name, "fscreate")) {
tsec->create_sid = sid;
} else if (!strcmp(name, "keycreate")) {
- error = avc_has_perm(&selinux_state,
- mysid, sid, SECCLASS_KEY, KEY__CREATE,
- NULL);
- if (error)
- goto abort_change;
+ if (sid) {
+ error = avc_has_perm(&selinux_state, mysid, sid,
+ SECCLASS_KEY, KEY__CREATE, NULL);
+ if (error)
+ goto abort_change;
+ }
tsec->keycreate_sid = sid;
} else if (!strcmp(name, "sockcreate")) {
tsec->sockcreate_sid = sid;
@@ -6501,6 +6502,7 @@ static int selinux_key_permission(key_ref_t key_ref,
{
struct key *key;
struct key_security_struct *ksec;
+ unsigned oldstyle_perm;
u32 sid;
/* if no specific permissions are requested, we skip the
@@ -6509,13 +6511,26 @@ static int selinux_key_permission(key_ref_t key_ref,
if (perm == 0)
return 0;
+ oldstyle_perm = perm & (KEY_NEED_VIEW | KEY_NEED_READ | KEY_NEED_WRITE |
+ KEY_NEED_SEARCH | KEY_NEED_LINK);
+ if (perm & KEY_NEED_SETSEC)
+ oldstyle_perm |= OLD_KEY_NEED_SETATTR;
+ if (perm & KEY_NEED_INVAL)
+ oldstyle_perm |= KEY_NEED_SEARCH;
+ if (perm & KEY_NEED_REVOKE && !(perm & OLD_KEY_NEED_SETATTR))
+ oldstyle_perm |= KEY_NEED_WRITE;
+ if (perm & KEY_NEED_JOIN)
+ oldstyle_perm |= KEY_NEED_SEARCH;
+ if (perm & KEY_NEED_CLEAR)
+ oldstyle_perm |= KEY_NEED_WRITE;
+
sid = cred_sid(cred);
key = key_ref_to_ptr(key_ref);
ksec = key->security;
return avc_has_perm(&selinux_state,
- sid, ksec->sid, SECCLASS_KEY, perm, NULL);
+ sid, ksec->sid, SECCLASS_KEY, oldstyle_perm, NULL);
}
static int selinux_key_getsecurity(struct key *key, char **_buffer)
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 1884f34bb983..6f195c7915de 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -178,7 +178,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
selnl_notify_setenforce(new_value);
selinux_status_update_setenforce(state, new_value);
if (!new_value)
- call_lsm_notifier(LSM_POLICY_CHANGE, NULL);
+ call_blocking_lsm_notifier(LSM_POLICY_CHANGE, NULL);
}
length = count;
out:
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 8f624f80055b..09929fc5ab47 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -347,7 +347,9 @@ int ebitmap_read(struct ebitmap *e, void *fp)
{
struct ebitmap_node *n = NULL;
u32 mapunit, count, startbit, index;
+ __le32 ebitmap_start;
u64 map;
+ __le64 mapbits;
__le32 buf[3];
int rc, i;
@@ -381,12 +383,12 @@ int ebitmap_read(struct ebitmap *e, void *fp)
goto bad;
for (i = 0; i < count; i++) {
- rc = next_entry(&startbit, fp, sizeof(u32));
+ rc = next_entry(&ebitmap_start, fp, sizeof(u32));
if (rc < 0) {
pr_err("SELinux: ebitmap: truncated map\n");
goto bad;
}
- startbit = le32_to_cpu(startbit);
+ startbit = le32_to_cpu(ebitmap_start);
if (startbit & (mapunit - 1)) {
pr_err("SELinux: ebitmap start bit (%d) is "
@@ -423,12 +425,12 @@ int ebitmap_read(struct ebitmap *e, void *fp)
goto bad;
}
- rc = next_entry(&map, fp, sizeof(u64));
+ rc = next_entry(&mapbits, fp, sizeof(u64));
if (rc < 0) {
pr_err("SELinux: ebitmap: truncated map\n");
goto bad;
}
- map = le64_to_cpu(map);
+ map = le64_to_cpu(mapbits);
index = (startbit - n->startbit) / EBITMAP_UNIT_SIZE;
while (map) {
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d3f5568c1f60..d61563a3695e 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -649,9 +649,7 @@ static void context_struct_compute_av(struct policydb *policydb,
avkey.target_class = tclass;
avkey.specified = AVTAB_AV | AVTAB_XPERMS;
sattr = &policydb->type_attr_map_array[scontext->type - 1];
- BUG_ON(!sattr);
tattr = &policydb->type_attr_map_array[tcontext->type - 1];
- BUG_ON(!tattr);
ebitmap_for_each_positive_bit(sattr, snode, i) {
ebitmap_for_each_positive_bit(tattr, tnode, j) {
avkey.source_type = i + 1;
@@ -1057,9 +1055,7 @@ void security_compute_xperms_decision(struct selinux_state *state,
avkey.target_class = tclass;
avkey.specified = AVTAB_XPERMS;
sattr = &policydb->type_attr_map_array[scontext->type - 1];
- BUG_ON(!sattr);
tattr = &policydb->type_attr_map_array[tcontext->type - 1];
- BUG_ON(!tattr);
ebitmap_for_each_positive_bit(sattr, snode, i) {
ebitmap_for_each_positive_bit(tattr, tnode, j) {
avkey.source_type = i + 1;
@@ -1586,6 +1582,7 @@ static int compute_sid_handle_invalid_context(
struct policydb *policydb = &state->ss->policydb;
char *s = NULL, *t = NULL, *n = NULL;
u32 slen, tlen, nlen;
+ struct audit_buffer *ab;
if (context_struct_to_string(policydb, scontext, &s, &slen))
goto out;
@@ -1593,12 +1590,14 @@ static int compute_sid_handle_invalid_context(
goto out;
if (context_struct_to_string(policydb, newcontext, &n, &nlen))
goto out;
- audit_log(audit_context(), GFP_ATOMIC, AUDIT_SELINUX_ERR,
- "op=security_compute_sid invalid_context=%s"
- " scontext=%s"
- " tcontext=%s"
- " tclass=%s",
- n, s, t, sym_name(policydb, SYM_CLASSES, tclass-1));
+ ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_SELINUX_ERR);
+ audit_log_format(ab,
+ "op=security_compute_sid invalid_context=");
+ /* no need to record the NUL with untrusted strings */
+ audit_log_n_untrustedstring(ab, n, nlen - 1);
+ audit_log_format(ab, " scontext=%s tcontext=%s tclass=%s",
+ s, t, sym_name(policydb, SYM_CLASSES, tclass-1));
+ audit_log_end(ab);
out:
kfree(s);
kfree(t);
@@ -3005,10 +3004,16 @@ int security_sid_mls_copy(struct selinux_state *state,
if (rc) {
if (!context_struct_to_string(policydb, &newcon, &s,
&len)) {
- audit_log(audit_context(),
- GFP_ATOMIC, AUDIT_SELINUX_ERR,
- "op=security_sid_mls_copy "
- "invalid_context=%s", s);
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(audit_context(),
+ GFP_ATOMIC,
+ AUDIT_SELINUX_ERR);
+ audit_log_format(ab,
+ "op=security_sid_mls_copy invalid_context=");
+ /* don't record NUL with untrusted strings */
+ audit_log_n_untrustedstring(ab, s, len - 1);
+ audit_log_end(ab);
kfree(s);
}
goto out_unlock;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 4c5e5a438f8b..50c536cad85b 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4284,7 +4284,8 @@ static int smack_key_permission(key_ref_t key_ref,
#endif
if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW))
request |= MAY_READ;
- if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR))
+ if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETSEC |
+ KEY_NEED_INVAL | KEY_NEED_REVOKE | KEY_NEED_CLEAR))
request |= MAY_WRITE;
rc = smk_access(tkp, keyp->security, request, &ad);
rc = smk_bu_note("key access", tkp, keyp->security, request, rc);
diff --git a/sound/core/seq/oss/seq_oss_ioctl.c b/sound/core/seq/oss/seq_oss_ioctl.c
index 96ad01fb668c..ccf682689ec9 100644
--- a/sound/core/seq/oss/seq_oss_ioctl.c
+++ b/sound/core/seq/oss/seq_oss_ioctl.c
@@ -49,7 +49,7 @@ static int snd_seq_oss_oob_user(struct seq_oss_devinfo *dp, void __user *arg)
if (copy_from_user(ev, arg, 8))
return -EFAULT;
memset(&tmpev, 0, sizeof(tmpev));
- snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.port, dp->addr.client);
+ snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port);
tmpev.time.tick = 0;
if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) {
snd_seq_oss_dispatch(dp, &tmpev, 0, 0);
diff --git a/sound/core/seq/oss/seq_oss_rw.c b/sound/core/seq/oss/seq_oss_rw.c
index 79ef430e56e1..537d5f423e20 100644
--- a/sound/core/seq/oss/seq_oss_rw.c
+++ b/sound/core/seq/oss/seq_oss_rw.c
@@ -161,7 +161,7 @@ insert_queue(struct seq_oss_devinfo *dp, union evrec *rec, struct file *opt)
memset(&event, 0, sizeof(event));
/* set dummy -- to be sure */
event.type = SNDRV_SEQ_EVENT_NOTEOFF;
- snd_seq_oss_fill_addr(dp, &event, dp->addr.port, dp->addr.client);
+ snd_seq_oss_fill_addr(dp, &event, dp->addr.client, dp->addr.port);
if (snd_seq_oss_process_event(dp, rec, &event))
return 0; /* invalid event - no need to insert queue */
diff --git a/sound/firewire/amdtp-am824.c b/sound/firewire/amdtp-am824.c
index cc6eb30f03a2..71168728940a 100644
--- a/sound/firewire/amdtp-am824.c
+++ b/sound/firewire/amdtp-am824.c
@@ -320,7 +320,7 @@ static void read_midi_messages(struct amdtp_stream *s,
u8 *b;
for (f = 0; f < frames; f++) {
- port = (s->data_block_counter + f) % 8;
+ port = (8 - s->tx_first_dbc + s->data_block_counter + f) % 8;
b = (u8 *)&buffer[p->midi_position];
len = b[0] - 0x80;
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 6907dbefd08c..3842f9d34b7c 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -400,27 +400,33 @@ static void setup_fg_nodes(struct hdac_device *codec)
int snd_hdac_refresh_widgets(struct hdac_device *codec, bool sysfs)
{
hda_nid_t start_nid;
- int nums, err;
+ int nums, err = 0;
+ /*
+ * Serialize against multiple threads trying to update the sysfs
+ * widgets array.
+ */
+ mutex_lock(&codec->widget_lock);
nums = snd_hdac_get_sub_nodes(codec, codec->afg, &start_nid);
if (!start_nid || nums <= 0 || nums >= 0xff) {
dev_err(&codec->dev, "cannot read sub nodes for FG 0x%02x\n",
codec->afg);
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
if (sysfs) {
- mutex_lock(&codec->widget_lock);
err = hda_widget_sysfs_reinit(codec, start_nid, nums);
- mutex_unlock(&codec->widget_lock);
if (err < 0)
- return err;
+ goto unlock;
}
codec->num_nodes = nums;
codec->start_nid = start_nid;
codec->end_nid = start_nid + nums;
- return 0;
+unlock:
+ mutex_unlock(&codec->widget_lock);
+ return err;
}
EXPORT_SYMBOL_GPL(snd_hdac_refresh_widgets);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5b3c26991f26..6f3a35949cdd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2448,9 +2448,10 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
- SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
- SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
- SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
@@ -7074,6 +7075,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+ SND_PCI_QUIRK(0x17aa, 0x3111, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
index 21127e4958b2..2c03e0f6bf72 100644
--- a/sound/usb/line6/pcm.c
+++ b/sound/usb/line6/pcm.c
@@ -556,6 +556,11 @@ int line6_init_pcm(struct usb_line6 *line6,
line6pcm->max_packet_size_out =
usb_maxpacket(line6->usbdev,
usb_sndisocpipe(line6->usbdev, ep_write), 1);
+ if (!line6pcm->max_packet_size_in || !line6pcm->max_packet_size_out) {
+ dev_err(line6pcm->line6->ifcdev,
+ "cannot get proper max packet size\n");
+ return -EINVAL;
+ }
spin_lock_init(&line6pcm->out.lock);
spin_lock_init(&line6pcm->in.lock);
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 1f6011f36bb0..199fa157a411 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -741,7 +741,7 @@ static int snd_ni_control_init_val(struct usb_mixer_interface *mixer,
return err;
}
- kctl->private_value |= (value << 24);
+ kctl->private_value |= ((unsigned int)value << 24);
return 0;
}
@@ -902,7 +902,7 @@ static int snd_ftu_eff_switch_init(struct usb_mixer_interface *mixer,
if (err < 0)
return err;
- kctl->private_value |= value[0] << 24;
+ kctl->private_value |= (unsigned int)value[0] << 24;
return 0;
}
diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore
index 9e9dd4b681b2..a94c0e83b209 100644
--- a/tools/gpio/.gitignore
+++ b/tools/gpio/.gitignore
@@ -1,4 +1,4 @@
gpio-event-mon
gpio-hammer
lsgpio
-
+include/linux/gpio.h
diff --git a/tools/include/linux/rcu.h b/tools/include/linux/rcu.h
index 7d02527e5bce..9554d3fa54f3 100644
--- a/tools/include/linux/rcu.h
+++ b/tools/include/linux/rcu.h
@@ -19,7 +19,7 @@ static inline bool rcu_is_watching(void)
return false;
}
-#define rcu_assign_pointer(p, v) ((p) = (v))
-#define RCU_INIT_POINTER(p, v) p=(v)
+#define rcu_assign_pointer(p, v) do { (p) = (v); } while (0)
+#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a8b823c30b43..489e118b69d2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -785,7 +785,7 @@ union bpf_attr {
* based on a user-provided identifier for all traffic coming from
* the tasks belonging to the related cgroup. See also the related
* kernel documentation, available from the Linux sources in file
- * *Documentation/cgroup-v1/net_cls.txt*.
+ * *Documentation/cgroup-v1/net_cls.rst*.
*
* The Linux kernel has two versions for cgroups: there are
* cgroups v1 and cgroups v2. Both are available to users, who can
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index def9131d3d8e..5be86b1025e8 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -24,6 +24,7 @@ instructions RMW[{'once,'acquire,'release}]
enum Barriers = 'wmb (*smp_wmb*) ||
'rmb (*smp_rmb*) ||
'mb (*smp_mb*) ||
+ 'barrier (*barrier*) ||
'rcu-lock (*rcu_read_lock*) ||
'rcu-unlock (*rcu_read_unlock*) ||
'sync-rcu (*synchronize_rcu*) ||
@@ -76,3 +77,8 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
(* Validate SRCU dynamic match *)
flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
+
+(* Compute marked and plain memory accesses *)
+let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
+ LKR | LKW | UL | LF | RL | RU
+let Plain = M \ Marked
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8dcb37835b61..ea2ff4b94074 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -24,8 +24,14 @@ include "lock.cat"
(* Basic relations *)
(*******************)
+(* Release Acquire *)
+let acq-po = [Acquire] ; po ; [M]
+let po-rel = [M] ; po ; [Release]
+let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+
(* Fences *)
-let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
+let R4rmb = R \ Noreturn (* Reads for which rmb works *)
+let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
let wmb = [W] ; fencerel(Wmb) ; [W]
let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
@@ -34,13 +40,14 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; po ; [UL] ; (co | po) ; [LKW] ;
fencerel(After-unlock-lock) ; [M])
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
-
let strong-fence = mb | gp
-(* Release Acquire *)
-let acq-po = [Acquire] ; po ; [M]
-let po-rel = [M] ; po ; [Release]
-let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+let nonrw-fence = strong-fence | po-rel | acq-po
+let fence = nonrw-fence | wmb | rmb
+let barrier = fencerel(Barrier | Rmb | Wmb | Mb | Sync-rcu | Sync-srcu |
+ Before-atomic | After-atomic | Acquire | Release |
+ Rcu-lock | Rcu-unlock | Srcu-lock | Srcu-unlock) |
+ (po ; [Release]) | ([Acquire] ; po)
(**********************************)
(* Fundamental coherence ordering *)
@@ -61,21 +68,22 @@ empty rmw & (fre ; coe) as atomic
let dep = addr | data
let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
-let to-w = rwdep | (overwrite & int)
-let to-r = addr | (dep ; rfi)
-let fence = strong-fence | wmb | po-rel | rmb | acq-po
+let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
+let to-r = addr | (dep ; [Marked] ; rfi)
let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
-let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | po-unlock-rf-lock-po
-let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
+let A-cumul(r) = (rfe ; [Marked])? ; r
+let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
+ po-unlock-rf-lock-po) ; [Marked]
+let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
+ [Marked] ; rfe? ; [Marked]
(*
* Happens Before: Ordering from the passage of time.
* No fences needed here for prop because relation confined to one process.
*)
-let hb = ppo | rfe | ((prop \ id) & int)
+let hb = [Marked] ; (ppo | rfe | ((prop \ id) & int)) ; [Marked]
acyclic hb as happens-before
(****************************************)
@@ -83,7 +91,7 @@ acyclic hb as happens-before
(****************************************)
(* Propagation: Each non-rf link needs a strong fence. *)
-let pb = prop ; strong-fence ; hb*
+let pb = prop ; strong-fence ; hb* ; [Marked]
acyclic pb as propagation
(*******)
@@ -114,24 +122,28 @@ let rcu-link = po? ; hb* ; pb* ; prop ; po
(*
* Any sequence containing at least as many grace periods as RCU read-side
- * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ * critical sections (joined by rcu-link) induces order like a generalized
+ * inter-CPU strong fence.
* Likewise for SRCU grace periods and read-side critical sections, provided
* the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
* struct srcu_struct location.
*)
-let rec rcu-fence = rcu-gp | srcu-gp |
+let rec rcu-order = rcu-gp | srcu-gp |
(rcu-gp ; rcu-link ; rcu-rscsi) |
((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
(rcu-rscsi ; rcu-link ; rcu-gp) |
((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
- (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
- ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) |
- (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
- ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) |
- (rcu-fence ; rcu-link ; rcu-fence)
+ (rcu-gp ; rcu-link ; rcu-order ; rcu-link ; rcu-rscsi) |
+ ((srcu-gp ; rcu-link ; rcu-order ; rcu-link ; srcu-rscsi) & loc) |
+ (rcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; rcu-gp) |
+ ((srcu-rscsi ; rcu-link ; rcu-order ; rcu-link ; srcu-gp) & loc) |
+ (rcu-order ; rcu-link ; rcu-order)
+let rcu-fence = po ; rcu-order ; po?
+let fence = fence | rcu-fence
+let strong-fence = strong-fence | rcu-fence
(* rb orders instructions just as pb does *)
-let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb*
+let rb = prop ; rcu-fence ; hb* ; pb* ; [Marked]
irreflexive rb as rcu
@@ -143,3 +155,49 @@ irreflexive rb as rcu
* let xb = hb | pb | rb
* acyclic xb as executes-before
*)
+
+(*********************************)
+(* Plain accesses and data races *)
+(*********************************)
+
+(* Warn about plain writes and marked accesses in the same region *)
+let mixed-accesses = ([Plain & W] ; (po-loc \ barrier) ; [Marked]) |
+ ([Marked] ; (po-loc \ barrier) ; [Plain & W])
+flag ~empty mixed-accesses as mixed-accesses
+
+(* Executes-before and visibility *)
+let xbstar = (hb | pb | rb)*
+let vis = cumul-fence* ; rfe? ; [Marked] ;
+ ((strong-fence ; [Marked] ; xbstar) | (xbstar & int))
+
+(* Boundaries for lifetimes of plain accesses *)
+let w-pre-bounded = [Marked] ; (addr | fence)?
+let r-pre-bounded = [Marked] ; (addr | nonrw-fence |
+ ([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))?
+let w-post-bounded = fence? ; [Marked]
+let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ;
+ [Marked]
+
+(* Visibility and executes-before for plain accesses *)
+let ww-vis = fence | (strong-fence ; xbstar ; w-pre-bounded) |
+ (w-post-bounded ; vis ; w-pre-bounded)
+let wr-vis = fence | (strong-fence ; xbstar ; r-pre-bounded) |
+ (w-post-bounded ; vis ; r-pre-bounded)
+let rw-xbstar = fence | (r-post-bounded ; xbstar ; w-pre-bounded)
+
+(* Potential races *)
+let pre-race = ext & ((Plain * M) | ((M \ IW) * Plain))
+
+(* Coherence requirements for plain accesses *)
+let wr-incoh = pre-race & rf & rw-xbstar^-1
+let rw-incoh = pre-race & fr & wr-vis^-1
+let ww-incoh = pre-race & co & ww-vis^-1
+empty (wr-incoh | rw-incoh | ww-incoh) as plain-coherence
+
+(* Actual races *)
+let ww-nonrace = ww-vis & ((Marked * W) | rw-xbstar) & ((W * Marked) | wr-vis)
+let ww-race = (pre-race & co) \ ww-nonrace
+let wr-race = (pre-race & (co? ; rf)) \ wr-vis
+let rw-race = (pre-race & fr) \ rw-xbstar
+
+flag ~empty (ww-race | wr-race | rw-race) as data-race
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 551eeaa389d4..ef0f3c1850de 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -24,6 +24,7 @@ smp_mb__before_atomic() { __fence{before-atomic}; }
smp_mb__after_atomic() { __fence{after-atomic}; }
smp_mb__after_spinlock() { __fence{after-spinlock}; }
smp_mb__after_unlock_lock() { __fence{after-unlock-lock}; }
+barrier() { __fence{barrier}; }
// Exchange
xchg(X,V) __xchg{mb}(X,V)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index b2b60b84fb9d..172f0145301c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -1,7 +1,7 @@
C MP+poonceonces
(*
- * Result: Maybe
+ * Result: Sometimes
*
* Can the counter-intuitive message-passing outcome be prevented with
* no ordering at all?
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 5ee08f129094..681f9067fa9e 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -244,7 +244,7 @@ produce the name:
Adding the ".litmus" suffix: SB+rfionceonce-poonceonces.litmus
The descriptors that describe connections between consecutive accesses
-within the cycle through a given litmus test can be provided by the herd
+within the cycle through a given litmus test can be provided by the herd7
tool (Rfi, Po, Fre, and so on) or by the linux-kernel.bell file (Once,
Release, Acquire, and so on).
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index a059d1a6d8a2..6b52f365d73a 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -11,7 +11,7 @@
include "cross.cat"
(*
- * The lock-related events generated by herd are as follows:
+ * The lock-related events generated by herd7 are as follows:
*
* LKR Lock-Read: the read part of a spin_lock() or successful
* spin_trylock() read-modify-write event pair
diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README
index 29375a1fbbfa..095c7eb36f9f 100644
--- a/tools/memory-model/scripts/README
+++ b/tools/memory-model/scripts/README
@@ -22,7 +22,7 @@ checklitmushist.sh
Run all litmus tests having .litmus.out files from previous
initlitmushist.sh or newlitmushist.sh runs, comparing the
- herd output to that of the original runs.
+ herd7 output to that of the original runs.
checklitmus.sh
@@ -43,7 +43,7 @@ initlitmushist.sh
judgelitmus.sh
- Given a .litmus file and its .litmus.out herd output, check the
+ Given a .litmus file and its .litmus.out herd7 output, check the
.litmus.out file against the .litmus file's "Result:" comment to
judge whether the test ran correctly. Not normally run manually,
provided instead for use by other scripts.
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
index b35fcd61ecf6..3c0c7fbbd223 100755
--- a/tools/memory-model/scripts/checkalllitmus.sh
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Run herd tests on all .litmus files in the litmus-tests directory
+# Run herd7 tests on all .litmus files in the litmus-tests directory
# and check each file's result against a "Result:" comment within that
# litmus test. If the verification result does not match that specified
# in the litmus test, this script prints an error message prefixed with
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
index dd08801a30b0..11461ed40b5e 100755
--- a/tools/memory-model/scripts/checklitmus.sh
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
-# Run a herd test and invokes judgelitmus.sh to check the result against
+# Run a herd7 test and invokes judgelitmus.sh to check the result against
# a "Result:" comment within the litmus test. It also outputs verification
# results to a file whose name is that of the specified litmus test, but
# with ".out" appended.
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
index 859e1d581e05..40f52080fdbd 100644
--- a/tools/memory-model/scripts/parseargs.sh
+++ b/tools/memory-model/scripts/parseargs.sh
@@ -91,7 +91,7 @@ do
shift
;;
--herdopts|--herdopt)
- checkarg --destdir "(herd options)" "$#" "$2" '.*' '^--'
+ checkarg --destdir "(herd7 options)" "$#" "$2" '.*' '^--'
LKMM_HERD_OPTIONS="$2"
shift
;;
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
index e507f5f933d5..6ed376f495bb 100644
--- a/tools/memory-model/scripts/runlitmushist.sh
+++ b/tools/memory-model/scripts/runlitmushist.sh
@@ -79,7 +79,7 @@ then
echo ' ---' Summary: 1>&2
grep '!!!' $T/*.sh.out 1>&2
nfail="`grep '!!!' $T/*.sh.out | wc -l`"
- echo 'Number of failed herd runs (e.g., timeout): ' $nfail 1>&2
+ echo 'Number of failed herd7 runs (e.g., timeout): ' $nfail 1>&2
exit 1
else
echo All runs completed successfully. 1>&2
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index fd280b070fdb..fed468fb0c78 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -7,6 +7,6 @@
#define rcu_dereference_raw(p) rcu_dereference(p)
#define rcu_dereference_protected(p, cond) rcu_dereference(p)
#define rcu_dereference_check(p, cond) rcu_dereference(p)
-#define RCU_INIT_POINTER(p, v) (p) = (v)
+#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index b38260e29775..241919ef1eac 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -146,6 +146,7 @@ int main(int argc, char *argv[])
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
free(state);
diff --git a/tools/testing/selftests/rcutorture/Makefile b/tools/testing/selftests/rcutorture/Makefile
new file mode 100644
index 000000000000..5202dc666206
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0+
+all:
+ ( cd ../../../..; tools/testing/selftests/rcutorture/bin/kvm.sh --duration 10 --configs TREE01 )
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 40359486b3a8..93e80a42249a 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Usage: configinit.sh config-spec-file build-output-dir results-dir
+# Usage: configinit.sh config-spec-file results-dir
#
# Create a .config file from the spec file. Run from the kernel source tree.
# Exits with 0 if all went well, with 1 if all went well but the config
@@ -11,10 +11,6 @@
# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
# this should be a full pathname.
#
-# The second argument is a optional path to a build output directory,
-# for example, "O=/tmp/foo". If this argument is omitted, the .config
-# file will be generated directly in the current directory.
-#
# Copyright (C) IBM Corporation, 2013
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -26,34 +22,23 @@ mkdir $T
# Capture config spec file.
c=$1
-buildloc=$2
-resdir=$3
-builddir=
-if echo $buildloc | grep -q '^O='
-then
- builddir=`echo $buildloc | sed -e 's/^O=//'`
- if test ! -d $builddir
- then
- mkdir $builddir
- fi
-else
- echo Bad build directory: \"$buildloc\"
- exit 2
-fi
+resdir=$2
sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
grep '^grep' < $T/u.sh > $T/upd.sh
echo "cat - $c" >> $T/upd.sh
-make mrproper
-make $buildloc distclean > $resdir/Make.distclean 2>&1
-make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
-mv $builddir/.config $builddir/.config.sav
-sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
-cp $builddir/.config $builddir/.config.new
-yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+if test -z "$TORTURE_TRUST_MAKE"
+then
+ make clean > $resdir/Make.clean 2>&1
+fi
+make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+mv .config .config.sav
+sh $T/upd.sh < .config.sav > .config
+cp .config .config.new
+yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
# verify new config matches specification.
-configcheck.sh $builddir/.config $c
+configcheck.sh .config $c
exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index ff7102212703..4e9485590c10 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -9,6 +9,11 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+if test -n "$TORTURE_ALLOTED_CPUS"
+then
+ echo $TORTURE_ALLOTED_CPUS
+ exit 0
+fi
ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
idlecpus=`mpstat | tail -1 | \
awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 6bcb8b5b2ff2..c3a49fb4d6f6 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -172,7 +172,7 @@ identify_qemu_append () {
local console=ttyS0
case "$1" in
qemu-system-x86_64|qemu-system-i386)
- echo noapic selinux=0 initcall_debug debug
+ echo selinux=0 initcall_debug debug
;;
qemu-system-aarch64)
console=ttyAMA0
@@ -191,8 +191,19 @@ identify_qemu_append () {
# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
# and TORTURE_QEMU_INTERACTIVE environment variables.
identify_qemu_args () {
+ local KVM_CPU=""
+ case "$1" in
+ qemu-system-x86_64)
+ KVM_CPU=kvm64
+ ;;
+ qemu-system-i386)
+ KVM_CPU=kvm32
+ ;;
+ esac
case "$1" in
qemu-system-x86_64|qemu-system-i386)
+ echo -machine q35,accel=kvm
+ echo -cpu ${KVM_CPU}
;;
qemu-system-aarch64)
echo -machine virt,gic-version=host -cpu host
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 435b60933985..dc49a3ba6111 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -34,10 +34,15 @@ do
exit 0;
fi
- # Set affinity to randomly selected CPU
- cpus=`ls /sys/devices/system/cpu/*/online |
- sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
- grep -v '^0*$'`
+ # Set affinity to randomly selected online CPU
+ cpus=`grep 1 /sys/devices/system/cpu/*/online |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
+
+ # Do not leave out poor old cpu0 which may not be hot-pluggable
+ if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then
+ cpus="0 $cpus"
+ fi
+
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index c27a0bbb9c02..18d6518504ee 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -3,7 +3,7 @@
#
# Build a kvm-ready Linux kernel from the tree in the current directory.
#
-# Usage: kvm-build.sh config-template build-dir resdir
+# Usage: kvm-build.sh config-template resdir
#
# Copyright (C) IBM Corporation, 2011
#
@@ -15,8 +15,7 @@ then
echo "kvm-build.sh :$config_template: Not a readable file"
exit 1
fi
-builddir=${2}
-resdir=${3}
+resdir=${2}
T=${TMPDIR-/tmp}/test-linux.sh.$$
trap 'rm -rf $T' 0
@@ -29,14 +28,14 @@ CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_CONSOLE=y
___EOF___
-configinit.sh $T/config O=$builddir $resdir
+configinit.sh $T/config $resdir
retval=$?
if test $retval -gt 1
then
exit 2
fi
ncpus=`cpus2use.sh`
-make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 8426fe1f15ee..1871d00bccd7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -11,6 +11,7 @@
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+# Returns error status reflecting the success (or not) of the specified run.
#
# Copyright (C) IBM Corporation, 2018
#
@@ -56,6 +57,8 @@ done
if test -n "$files"
then
$editor $files
+ exit 1
else
echo No errors in console logs.
+ exit 0
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 2adde6aaafdb..e5edd5198725 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -7,6 +7,8 @@
#
# Usage: kvm-recheck.sh resdir ...
#
+# Returns status reflecting the success or not of the last run specified.
+#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@@ -28,8 +30,16 @@ do
TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
rm -f $i/console.log.*.diags
kvm-recheck-${TORTURE_SUITE}.sh $i
- if test -f "$i/console.log"
+ if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
+ then
+ echo QEMU error, output:
+ cat $i/qemu-output
+ elif test -f "$i/console.log"
then
+ if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137
+ then
+ echo QEMU killed
+ fi
configcheck.sh $i/.config $i/ConfigFragment
if test -r $i/Make.oldconfig.err
then
@@ -58,3 +68,4 @@ do
fi
done
done
+EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0eb1ec16d78a..27b7b5693ede 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -36,11 +36,6 @@ config_template=${1}
config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
title=`echo $config_template | sed -e 's/^.*\///'`
builddir=${2}
-if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
-then
- echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
- exit 1
-fi
resdir=${3}
if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
then
@@ -85,18 +80,18 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $builddir $resdir
+elif kvm-build.sh $T/Kc2 $resdir
then
# Had to build a kernel for this test.
- QEMU="`identify_qemu $builddir/vmlinux`"
+ QEMU="`identify_qemu vmlinux`"
BOOT_IMAGE="`identify_boot_image $QEMU`"
- cp $builddir/vmlinux $resdir
- cp $builddir/.config $resdir
- cp $builddir/Module.symvers $resdir > /dev/null || :
- cp $builddir/System.map $resdir > /dev/null || :
+ cp vmlinux $resdir
+ cp .config $resdir
+ cp Module.symvers $resdir > /dev/null || :
+ cp System.map $resdir > /dev/null || :
if test -n "$BOOT_IMAGE"
then
- cp $builddir/$BOOT_IMAGE $resdir
+ cp $BOOT_IMAGE $resdir
KERNEL=$resdir/${BOOT_IMAGE##*/}
# Arch-independent indicator
touch $resdir/builtkernel
@@ -107,7 +102,7 @@ then
parse-build.sh $resdir/Make.out $title
else
# Build failed.
- cp $builddir/.config $resdir || :
+ cp .config $resdir || :
echo Build failed, not running KVM, see $resdir.
if test -f $builddir.wait
then
@@ -165,7 +160,7 @@ then
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 8f1e337b9b54..72518580df23 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -24,6 +24,7 @@ dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
+TORTURE_ALLOTED_CPUS=""
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
@@ -32,6 +33,7 @@ TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
+TORTURE_TRUST_MAKE=""
resdir=""
configs=""
cpus=0
@@ -62,6 +64,7 @@ usage () {
echo " --qemu-cmd qemu-system-..."
echo " --results absolute-pathname"
echo " --torture rcu"
+ echo " --trust-make"
exit 1
}
@@ -89,6 +92,7 @@ do
--cpus)
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
+ TORTURE_ALLOTED_CPUS="$2"
shift
;;
--datestamp)
@@ -173,6 +177,9 @@ do
jitter=0
fi
;;
+ --trust-make)
+ TORTURE_TRUST_MAKE="y"
+ ;;
*)
echo Unknown argument $1
usage
@@ -285,6 +292,7 @@ cat << ___EOF___ > $T/script
CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
KVM="$KVM"; export KVM
PATH="$PATH"; export PATH
+TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
@@ -297,6 +305,7 @@ TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
+TORTURE_TRUST_MAKE="$TORTURE_TRUST_MAKE"; export TORTURE_TRUST_MAKE
if ! test -e $resdir
then
mkdir -p "$resdir" || :
@@ -342,7 +351,7 @@ function dump(first, pastlast, batchnum)
print "needqemurun="
jn=1
for (j = first; j < pastlast; j++) {
- builddir=KVM "/b1"
+ builddir=KVM "/b" j - first + 1
cpusr[jn] = cpus[j];
if (cfrep[cf[j]] == "") {
cfr[jn] = cf[j];
@@ -358,7 +367,6 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "rm -f " builddir ".*";
print "touch " builddir ".wait";
- print "mkdir " builddir " > /dev/null 2>&1 || :";
print "mkdir " rd cfr[jn] " || :";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
@@ -464,3 +472,5 @@ else
fi
# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
+# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
+# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 0701b3bf6ade..09155c15ea65 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -21,7 +21,7 @@ mkdir $T
. functions.sh
-if grep -q CC < $F
+if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE"
then
:
else
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4508373a922f..4bf62d7b1cbc 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -106,6 +106,7 @@ fi | tee -a $file.diags
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
grep -v 'ODEBUG: ' |
+grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' > $T.diags
if test -s $T.diags
then
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index d2d2a86139db..e19a444a0684 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,2 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index ea47da95374b..d6da9a61d44a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcu_nocbs=0
+rcutorture.fwd_progress=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
new file mode 100644
index 000000000000..4d8eb5bfb6f6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL
@@ -0,0 +1,14 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
new file mode 100644
index 000000000000..7017f5f5a55f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL.boot
@@ -0,0 +1,3 @@
+rcutorture.torture_type=trivial
+rcutorture.onoff_interval=0
+rcutorture.shuffle_interval=0
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
index 8cd10662ffba..4b76450d78d1 100644
--- a/tools/testing/selftests/timers/freq-step.c
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -21,9 +21,9 @@
#define SAMPLE_READINGS 10
#define MEAN_SAMPLE_INTERVAL 0.1
#define STEP_INTERVAL 1.0
-#define MAX_PRECISION 100e-9
-#define MAX_FREQ_ERROR 10e-6
-#define MAX_STDDEV 1000e-9
+#define MAX_PRECISION 500e-9
+#define MAX_FREQ_ERROR 0.02e-6
+#define MAX_STDDEV 50e-9
#ifndef ADJ_SETOFFSET
#define ADJ_SETOFFSET 0x0100
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 186520198de7..fa07d526fe39 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,8 +12,9 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
- protection_keys test_vdso test_vsyscall mov_ss_trap
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+ protection_keys test_vdso test_vsyscall mov_ss_trap \
+ syscall_arg_fault
+TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index af85bd4752a5..5ab4c60c100e 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -23,6 +23,10 @@
#include <pthread.h>
#include <asm/ldt.h>
#include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <setjmp.h>
#ifndef __x86_64__
# error This test is 64-bit only
@@ -31,6 +35,8 @@
static volatile sig_atomic_t want_segv;
static volatile unsigned long segv_addr;
+static unsigned short *shared_scratch;
+
static int nerrs;
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
@@ -71,6 +77,43 @@ static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
}
+static jmp_buf jmpbuf;
+
+static void sigill(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static bool have_fsgsbase;
+
+static inline unsigned long rdgsbase(void)
+{
+ unsigned long gsbase;
+
+ asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
+
+ return gsbase;
+}
+
+static inline unsigned long rdfsbase(void)
+{
+ unsigned long fsbase;
+
+ asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
+
+ return fsbase;
+}
+
+static inline void wrgsbase(unsigned long gsbase)
+{
+ asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
+}
+
+static inline void wrfsbase(unsigned long fsbase)
+{
+ asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
+}
+
enum which_base { FS, GS };
static unsigned long read_base(enum which_base which)
@@ -199,16 +242,13 @@ static void do_remote_base()
to_set, hard_zero ? " and clear gs" : "", sel);
}
-void do_unexpected_base(void)
+static __thread int set_thread_area_entry_number = -1;
+
+static unsigned short load_gs(void)
{
/*
- * The goal here is to try to arrange for GS == 0, GSBASE !=
- * 0, and for the the kernel the think that GSBASE == 0.
- *
- * To make the test as reliable as possible, this uses
- * explicit descriptorss. (This is not the only way. This
- * could use ARCH_SET_GS with a low, nonzero base, but the
- * relevant side effect of ARCH_SET_GS could change.)
+ * Sets GS != 0 and GSBASE != 0 but arranges for the kernel to think
+ * that GSBASE == 0 (i.e. thread.gsbase == 0).
*/
/* Step 1: tell the kernel that we have GSBASE == 0. */
@@ -228,8 +268,9 @@ void do_unexpected_base(void)
.useable = 0
};
if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
- printf("\tother thread: using LDT slot 0\n");
+ printf("\tusing LDT slot 0\n");
asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+ return 0x7;
} else {
/* No modify_ldt for us (configured out, perhaps) */
@@ -239,7 +280,7 @@ void do_unexpected_base(void)
MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
memcpy(low_desc, &desc, sizeof(desc));
- low_desc->entry_number = -1;
+ low_desc->entry_number = set_thread_area_entry_number;
/* 32-bit set_thread_area */
long ret;
@@ -251,18 +292,43 @@ void do_unexpected_base(void)
if (ret != 0) {
printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
- return;
+ return 0;
}
- printf("\tother thread: using GDT slot %d\n", desc.entry_number);
- asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+ printf("\tusing GDT slot %d\n", desc.entry_number);
+ set_thread_area_entry_number = desc.entry_number;
+
+ unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
+ asm volatile ("mov %0, %%gs" : : "rm" (gs));
+ return gs;
}
+}
- /*
- * Step 3: set the selector back to zero. On AMD chips, this will
- * preserve GSBASE.
- */
+void test_wrbase(unsigned short index, unsigned long base)
+{
+ unsigned short newindex;
+ unsigned long newbase;
- asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+ printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
+
+ asm volatile ("mov %0, %%gs" : : "rm" (index));
+ wrgsbase(base);
+
+ remote_base = 0;
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ asm volatile ("mov %%gs, %0" : "=rm" (newindex));
+ newbase = rdgsbase();
+
+ if (newindex == index && newbase == base) {
+ printf("[OK]\tIndex and base were preserved\n");
+ } else {
+ printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
+ newindex, newbase);
+ nerrs++;
+ }
}
static void *threadproc(void *ctx)
@@ -273,12 +339,19 @@ static void *threadproc(void *ctx)
if (ftx == 3)
return NULL;
- if (ftx == 1)
+ if (ftx == 1) {
do_remote_base();
- else if (ftx == 2)
- do_unexpected_base();
- else
+ } else if (ftx == 2) {
+ /*
+ * On AMD chips, this causes GSBASE != 0, GS == 0, and
+ * thread.gsbase == 0.
+ */
+
+ load_gs();
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+ } else {
errx(1, "helper thread got bad command");
+ }
ftx = 0;
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
@@ -367,10 +440,99 @@ static void test_unexpected_base(void)
}
}
+#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+
+static void test_ptrace_write_gsbase(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
+
+ *shared_scratch = load_gs();
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long gs, base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
+
+ if (gs != *shared_scratch) {
+ nerrs++;
+ printf("[FAIL]\tGS is not prepared with nonzero\n");
+ goto END;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ /*
+ * In a non-FSGSBASE system, the nonzero selector will load
+ * GSBASE (again). But what is tested here is whether the
+ * selector value is changed or not by the GSBASE write in
+ * a ptracer.
+ */
+ if (gs != *shared_scratch) {
+ nerrs++;
+ printf("[FAIL]\tGS changed to %lx\n", gs);
+
+ /*
+ * On older kernels, poking a nonzero value into the
+ * base would zero the selector. On newer kernels,
+ * this behavior has changed -- poking the base
+ * changes only the base and, if FSGSBASE is not
+ * available, this may have no effect.
+ */
+ if (gs == 0)
+ printf("\tNote: this is expected behavior on older kernels.\n");
+ } else if (have_fsgsbase && (base != 0xFF)) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ } else {
+ printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : "");
+ printf("\n");
+ }
+ }
+
+END:
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+}
+
int main()
{
pthread_t thread;
+ shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+
+ /* Probe FSGSBASE */
+ sethandler(SIGILL, sigill, 0);
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ rdfsbase();
+ have_fsgsbase = true;
+ printf("\tFSGSBASE instructions are enabled\n");
+ } else {
+ printf("\tFSGSBASE instructions are disabled\n");
+ }
+ clearhandler(SIGILL);
+
sethandler(SIGSEGV, sigsegv, 0);
check_gs_value(0);
@@ -417,11 +579,28 @@ int main()
test_unexpected_base();
+ if (have_fsgsbase) {
+ unsigned short ss;
+
+ asm volatile ("mov %%ss, %0" : "=rm" (ss));
+
+ test_wrbase(0, 0);
+ test_wrbase(0, 1);
+ test_wrbase(0, 0x200000000);
+ test_wrbase(0, 0xffffffffffffffff);
+ test_wrbase(ss, 0);
+ test_wrbase(ss, 1);
+ test_wrbase(ss, 0x200000000);
+ test_wrbase(ss, 0xffffffffffffffff);
+ }
+
ftx = 3; /* Kill the thread. */
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
if (pthread_join(thread, NULL) != 0)
err(1, "pthread_join");
+ test_ptrace_write_gsbase();
+
return nerrs == 0 ? 0 : 1;
}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 4e25d38c8bbd..bc0ecc2e862e 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,9 +15,30 @@
#include <setjmp.h>
#include <errno.h>
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
/* Our sigaltstack scratch space. */
static unsigned char altstack_data[SIGSTKSZ];
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -35,13 +56,22 @@ static sigjmp_buf jmpbuf;
static volatile sig_atomic_t n_errs;
+#ifdef __x86_64__
+#define REG_AX REG_RAX
+#define REG_IP REG_RIP
+#else
+#define REG_AX REG_EAX
+#define REG_IP REG_EIP
+#endif
+
static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
+ long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
- if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
- printf("[FAIL]\tAX had the wrong value: 0x%x\n",
- ctx->uc_mcontext.gregs[REG_EAX]);
+ if (ax != -EFAULT && ax != -ENOSYS) {
+ printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
+ (unsigned long)ax);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
@@ -50,9 +80,42 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
siglongjmp(jmpbuf, 1);
}
+static volatile sig_atomic_t sigtrap_consecutive_syscalls;
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ /*
+ * KVM has some bugs that can cause us to stop making progress.
+ * detect them and complain, but don't infinite loop or fail the
+ * test.
+ */
+
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+ unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+ if (*ip == 0x340f || *ip == 0x050f) {
+ /* The trap was on SYSCALL or SYSENTER */
+ sigtrap_consecutive_syscalls++;
+ if (sigtrap_consecutive_syscalls > 3) {
+ printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
+ siglongjmp(jmpbuf, 1);
+ }
+ } else {
+ sigtrap_consecutive_syscalls = 0;
+ }
+}
+
static void sigill(int sig, siginfo_t *info, void *ctx_void)
{
- printf("[SKIP]\tIllegal instruction\n");
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+ unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
+
+ if (*ip == 0x0b0f) {
+ /* one of the ud2 instructions faulted */
+ printf("[OK]\tSYSCALL returned normally\n");
+ } else {
+ printf("[SKIP]\tIllegal instruction\n");
+ }
siglongjmp(jmpbuf, 1);
}
@@ -120,9 +183,48 @@ int main()
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"syscall\n\t"
- "pushl $0" /* make sure we segfault cleanly */
+ "ud2" /* make sure we recover cleanly */
+ : : : "memory", "flags");
+ }
+
+ printf("[RUN]\tSYSENTER with TF and invalid state\n");
+ sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ printf("[RUN]\tSYSCALL with TF and invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "syscall\n\t"
+ "ud2" /* make sure we recover cleanly */
: : : "memory", "flags");
}
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
return 0;
}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 0b4f1cc2291c..4602326b8f5b 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -18,6 +18,7 @@
#include <sched.h>
#include <stdbool.h>
#include <setjmp.h>
+#include <sys/uio.h>
#ifdef __x86_64__
# define VSYS(x) (x)
@@ -49,21 +50,21 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
}
/* vsyscalls and vDSO */
-bool should_read_vsyscall = false;
+bool vsyscall_map_r = false, vsyscall_map_x = false;
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
-gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+const gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
gtod_t vdso_gtod;
typedef int (*vgettime_t)(clockid_t, struct timespec *);
vgettime_t vdso_gettime;
typedef long (*time_func_t)(time_t *t);
-time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+const time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
time_func_t vdso_time;
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
getcpu_t vdso_getcpu;
static void init_vdso(void)
@@ -107,7 +108,7 @@ static int init_vsys(void)
maps = fopen("/proc/self/maps", "r");
if (!maps) {
printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
- should_read_vsyscall = true;
+ vsyscall_map_r = true;
return 0;
}
@@ -133,12 +134,8 @@ static int init_vsys(void)
}
printf("\tvsyscall permissions are %c-%c\n", r, x);
- should_read_vsyscall = (r == 'r');
- if (x != 'x') {
- vgtod = NULL;
- vtime = NULL;
- vgetcpu = NULL;
- }
+ vsyscall_map_r = (r == 'r');
+ vsyscall_map_x = (x == 'x');
found = true;
break;
@@ -148,10 +145,8 @@ static int init_vsys(void)
if (!found) {
printf("\tno vsyscall map in /proc/self/maps\n");
- should_read_vsyscall = false;
- vgtod = NULL;
- vtime = NULL;
- vgetcpu = NULL;
+ vsyscall_map_r = false;
+ vsyscall_map_x = false;
}
return nerrs;
@@ -183,9 +178,13 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node,
}
static jmp_buf jmpbuf;
+static volatile unsigned long segv_err;
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+
+ segv_err = ctx->uc_mcontext.gregs[REG_ERR];
siglongjmp(jmpbuf, 1);
}
@@ -238,7 +237,7 @@ static int test_gtod(void)
err(1, "syscall gettimeofday");
if (vdso_gtod)
ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
- if (vgtod)
+ if (vsyscall_map_x)
ret_vsys = vgtod(&tv_vsys, &tz_vsys);
if (sys_gtod(&tv_sys2, &tz_sys) != 0)
err(1, "syscall gettimeofday");
@@ -252,7 +251,7 @@ static int test_gtod(void)
}
}
- if (vgtod) {
+ if (vsyscall_map_x) {
if (ret_vsys == 0) {
nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
} else {
@@ -273,7 +272,7 @@ static int test_time(void) {
t_sys1 = sys_time(&t2_sys1);
if (vdso_time)
t_vdso = vdso_time(&t2_vdso);
- if (vtime)
+ if (vsyscall_map_x)
t_vsys = vtime(&t2_vsys);
t_sys2 = sys_time(&t2_sys2);
if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
@@ -294,7 +293,7 @@ static int test_time(void) {
}
}
- if (vtime) {
+ if (vsyscall_map_x) {
if (t_vsys < 0 || t_vsys != t2_vsys) {
printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
nerrs++;
@@ -330,7 +329,7 @@ static int test_getcpu(int cpu)
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
- if (vgetcpu)
+ if (vsyscall_map_x)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (ret_sys == 0) {
@@ -369,7 +368,7 @@ static int test_getcpu(int cpu)
}
}
- if (vgetcpu) {
+ if (vsyscall_map_x) {
if (ret_vsys) {
printf("[FAIL]\tvsyscall getcpu() failed\n");
nerrs++;
@@ -410,20 +409,88 @@ static int test_vsys_r(void)
can_read = false;
}
- if (can_read && !should_read_vsyscall) {
+ if (can_read && !vsyscall_map_r) {
printf("[FAIL]\tWe have read access, but we shouldn't\n");
return 1;
- } else if (!can_read && should_read_vsyscall) {
+ } else if (!can_read && vsyscall_map_r) {
printf("[FAIL]\tWe don't have read access, but we should\n");
return 1;
+ } else if (can_read) {
+ printf("[OK]\tWe have read access\n");
} else {
- printf("[OK]\tgot expected result\n");
+ printf("[OK]\tWe do not have read access: #PF(0x%lx)\n",
+ segv_err);
}
#endif
return 0;
}
+static int test_vsys_x(void)
+{
+#ifdef __x86_64__
+ if (vsyscall_map_x) {
+ /* We already tested this adequately. */
+ return 0;
+ }
+
+ printf("[RUN]\tMake sure that vsyscalls really page fault\n");
+
+ bool can_exec;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ vgtod(NULL, NULL);
+ can_exec = true;
+ } else {
+ can_exec = false;
+ }
+
+ if (can_exec) {
+ printf("[FAIL]\tExecuting the vsyscall did not page fault\n");
+ return 1;
+ } else if (segv_err & (1 << 4)) { /* INSTR */
+ printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n",
+ segv_err);
+ } else {
+ printf("[FAILT]\tExecution failed with the wrong error: #PF(0x%lx)\n",
+ segv_err);
+ return 1;
+ }
+#endif
+
+ return 0;
+}
+
+static int test_process_vm_readv(void)
+{
+#ifdef __x86_64__
+ char buf[4096];
+ struct iovec local, remote;
+ int ret;
+
+ printf("[RUN]\tprocess_vm_readv() from vsyscall page\n");
+
+ local.iov_base = buf;
+ local.iov_len = 4096;
+ remote.iov_base = (void *)0xffffffffff600000;
+ remote.iov_len = 4096;
+ ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
+ if (ret != 4096) {
+ printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
+ return 0;
+ }
+
+ if (vsyscall_map_r) {
+ if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+ printf("[OK]\tIt worked and read correct data\n");
+ } else {
+ printf("[FAIL]\tIt worked but returned incorrect data\n");
+ return 1;
+ }
+ }
+#endif
+
+ return 0;
+}
#ifdef __x86_64__
#define X86_EFLAGS_TF (1UL << 8)
@@ -455,7 +522,7 @@ static int test_emulation(void)
time_t tmp;
bool is_native;
- if (!vtime)
+ if (!vsyscall_map_x)
return 0;
printf("[RUN]\tchecking that vsyscalls are emulated\n");
@@ -497,6 +564,9 @@ int main(int argc, char **argv)
sethandler(SIGSEGV, sigsegv, 0);
nerrs += test_vsys_r();
+ nerrs += test_vsys_x();
+
+ nerrs += test_process_vm_readv();
#ifdef __x86_64__
nerrs += test_emulation();