summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/Design/Requirements/2013-08-is-it-dead.pngbin0 -> 100825 bytes
-rw-r--r--Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg374
-rw-r--r--Documentation/RCU/Design/Requirements/RCUApplicability.svg237
-rw-r--r--Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg639
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html2897
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.htmlx2741
-rwxr-xr-xDocumentation/RCU/Design/htmlqqz.sh108
-rw-r--r--Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt4
-rw-r--r--Documentation/devicetree/bindings/ata/sata_rcar.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt (renamed from Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt)2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt74
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt16
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.txt1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt (renamed from Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt)9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt80
-rw-r--r--Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt110
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt9
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt199
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt3
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt1
-rw-r--r--Documentation/filesystems/Locking6
-rw-r--r--Documentation/filesystems/porting17
-rw-r--r--Documentation/filesystems/vfs.txt21
-rw-r--r--Documentation/kernel-parameters.txt47
-rw-r--r--Documentation/memory-barriers.txt12
-rw-r--r--Documentation/sysctl/kernel.txt15
-rw-r--r--Documentation/trace/events-msr.txt37
-rw-r--r--Documentation/trace/postprocess/decode_msr.py37
-rw-r--r--MAINTAINERS10
-rw-r--r--Makefile2
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/boot/dts/ste-nomadik-stn8815.dtsi6
-rw-r--r--arch/arm/boot/dts/versatile-ab.dts10
-rw-r--r--arch/arm/boot/dts/versatile-pb.dts20
-rw-r--r--arch/arm/boot/dts/wm8650.dtsi9
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm/mach-exynos/Kconfig1
-rw-r--r--arch/arm/mach-omap2/gpmc-onenand.c14
-rw-r--r--arch/arm/mach-sti/Kconfig1
-rw-r--r--arch/arm/mach-ux500/Kconfig1
-rw-r--r--arch/arm/net/bpf_jit_32.c19
-rw-r--r--arch/blackfin/include/asm/cmpxchg.h1
-rw-r--r--arch/c6x/include/asm/cmpxchg.h2
-rw-r--r--arch/frv/include/asm/cmpxchg.h2
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/h8300/include/asm/io.h39
-rw-r--r--arch/h8300/kernel/setup.c8
-rw-r--r--arch/ia64/include/asm/barrier.h2
-rw-r--r--arch/ia64/include/asm/percpu.h2
-rw-r--r--arch/mips/net/bpf_jit.c16
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c13
-rw-r--r--arch/s390/include/asm/barrier.h2
-rw-r--r--arch/sparc/net/bpf_jit_comp.c17
-rw-r--r--arch/tile/Kconfig11
-rw-r--r--arch/tile/include/asm/cmpxchg.h2
-rw-r--r--arch/tile/include/asm/page.h8
-rw-r--r--arch/x86/Kconfig19
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/crypto/chacha20_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/common.c6
-rw-r--r--arch/x86/entry/entry_32.S15
-rw-r--r--arch/x86/entry/entry_64.S8
-rw-r--r--arch/x86/entry/entry_64_compat.S40
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c151
-rw-r--r--arch/x86/entry/vdso/vdso-layout.lds.S3
-rw-r--r--arch/x86/entry/vdso/vdso2c.c3
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S54
-rw-r--r--arch/x86/entry/vdso/vma.c14
-rw-r--r--arch/x86/include/asm/apic.h6
-rw-r--r--arch/x86/include/asm/atomic.h1
-rw-r--r--arch/x86/include/asm/atomic64_32.h1
-rw-r--r--arch/x86/include/asm/calgary.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h2
-rw-r--r--arch/x86/include/asm/cpu.h3
-rw-r--r--arch/x86/include/asm/cpufeature.h116
-rw-r--r--arch/x86/include/asm/fixmap.h5
-rw-r--r--arch/x86/include/asm/fpu/internal.h173
-rw-r--r--arch/x86/include/asm/intel_pt.h10
-rw-r--r--arch/x86/include/asm/ipi.h2
-rw-r--r--arch/x86/include/asm/jump_label.h63
-rw-r--r--arch/x86/include/asm/microcode.h39
-rw-r--r--arch/x86/include/asm/msi.h6
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/msr-trace.h57
-rw-r--r--arch/x86/include/asm/msr.h43
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/paravirt.h44
-rw-r--r--arch/x86/include/asm/paravirt_types.h40
-rw-r--r--arch/x86/include/asm/pgtable.h15
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/pvclock.h14
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h59
-rw-r--r--arch/x86/include/asm/reboot.h1
-rw-r--r--arch/x86/include/asm/smp.h12
-rw-r--r--arch/x86/include/asm/suspend_32.h1
-rw-r--r--arch/x86/include/asm/suspend_64.h1
-rw-r--r--arch/x86/include/asm/uaccess.h9
-rw-r--r--arch/x86/include/asm/vdso.h1
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xor_32.h2
-rw-r--r--arch/x86/include/uapi/asm/mce.h2
-rw-r--r--arch/x86/kernel/apic/apic.c45
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c19
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c10
-rw-r--r--arch/x86/kernel/apic/ipi.c18
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c9
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c65
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c93
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c12
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c16
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c36
-rw-r--r--arch/x86/kernel/cpu/perf_event.h21
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c115
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c39
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c42
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_pt.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c25
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c635
-rw-r--r--arch/x86/kernel/cpu/rdrand.c25
-rw-r--r--arch/x86/kernel/cpu/scattered.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c4
-rw-r--r--arch/x86/kernel/crash.c11
-rw-r--r--arch/x86/kernel/fpu/init.c21
-rw-r--r--arch/x86/kernel/fpu/xstate.c4
-rw-r--r--arch/x86/kernel/hw_breakpoint.c6
-rw-r--r--arch/x86/kernel/kvmclock.c11
-rw-r--r--arch/x86/kernel/nmi.c32
-rw-r--r--arch/x86/kernel/paravirt.c36
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c4
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c15
-rw-r--r--arch/x86/kernel/pvclock.c24
-rw-r--r--arch/x86/kernel/reboot.c30
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smp.c4
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/cpuid.h34
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/svm.c17
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/cpu.c35
-rw-r--r--arch/x86/lib/msr.c26
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/debug_pagetables.c46
-rw-r--r--arch/x86/mm/dump_pagetables.c34
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/pat.c12
-rw-r--r--arch/x86/mm/pat_rbtree.c52
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/setup_nx.c4
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c1
-rw-r--r--arch/x86/power/cpu.c92
-rw-r--r--arch/x86/xen/enlighten.c18
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/xen-asm_32.S14
-rw-r--r--arch/x86/xen/xen-asm_64.S19
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--block/blk-merge.c2
-rw-r--r--crypto/async_tx/async_memcpy.c2
-rw-r--r--crypto/async_tx/async_pq.c4
-rw-r--r--crypto/async_tx/async_raid6_recov.c4
-rw-r--r--crypto/async_tx/async_xor.c4
-rw-r--r--drivers/acpi/device_sysfs.c2
-rw-r--r--drivers/ata/Kconfig2
-rw-r--r--drivers/ata/ahci.c67
-rw-r--r--drivers/ata/ahci.h27
-rw-r--r--drivers/ata/ahci_brcmstb.c61
-rw-r--r--drivers/ata/ahci_qoriq.c31
-rw-r--r--drivers/ata/libahci.c46
-rw-r--r--drivers/ata/libata-core.c20
-rw-r--r--drivers/ata/sata_rcar.c19
-rw-r--r--drivers/ata/sata_sx4.c2
-rw-r--r--drivers/base/platform-msi.c256
-rw-r--r--drivers/base/platform.c20
-rw-r--r--drivers/char/hw_random/via-rng.c5
-rw-r--r--drivers/clocksource/Kconfig132
-rw-r--r--drivers/clocksource/Makefile2
-rw-r--r--drivers/clocksource/acpi_pm.c27
-rw-r--r--drivers/clocksource/arm_global_timer.c21
-rw-r--r--drivers/clocksource/dw_apb_timer.c46
-rw-r--r--drivers/clocksource/dw_apb_timer_of.c16
-rw-r--r--drivers/clocksource/h8300_timer16.c222
-rw-r--r--drivers/clocksource/h8300_timer8.c264
-rw-r--r--drivers/clocksource/h8300_tpu.c159
-rw-r--r--drivers/clocksource/mtk_timer.c20
-rw-r--r--drivers/clocksource/rockchip_timer.c23
-rw-r--r--drivers/clocksource/tango_xtal.c18
-rw-r--r--drivers/clocksource/tegra20_timer.c3
-rw-r--r--drivers/clocksource/time-lpc32xx.c4
-rw-r--r--drivers/clocksource/time-pistachio.c2
-rw-r--r--drivers/clocksource/timer-sun5i.c16
-rw-r--r--drivers/clocksource/vt8500_timer.c1
-rw-r--r--drivers/connector/connector.c11
-rw-r--r--drivers/crypto/padlock-aes.c2
-rw-r--r--drivers/crypto/padlock-sha.c2
-rw-r--r--drivers/dma/mic_x100_dma.c15
-rw-r--r--drivers/dma/xgene-dma.c4
-rw-r--r--drivers/firmware/dmi_scan.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c1
-rw-r--r--drivers/iommu/dma-iommu.c11
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/irqchip/Kconfig19
-rw-r--r--drivers/irqchip/Makefile3
-rw-r--r--drivers/irqchip/irq-bcm2836.c55
-rw-r--r--drivers/irqchip/irq-gic-realview.c43
-rw-r--r--drivers/irqchip/irq-gic-v2m.c165
-rw-r--r--drivers/irqchip/irq-gic.c64
-rw-r--r--drivers/irqchip/irq-mbigen.c297
-rw-r--r--drivers/irqchip/irq-omap-intc.c28
-rw-r--r--drivers/irqchip/irq-renesas-h8300h.c8
-rw-r--r--drivers/irqchip/irq-renesas-intc-irqpin.c96
-rw-r--r--drivers/irqchip/irq-sunxi-nmi.c13
-rw-r--r--drivers/irqchip/irq-ts4800.c163
-rw-r--r--drivers/irqchip/irq-zevio.c3
-rw-r--r--drivers/mmc/card/block.c11
-rw-r--r--drivers/mmc/core/bus.c2
-rw-r--r--drivers/mmc/core/core.c85
-rw-r--r--drivers/mmc/core/core.h8
-rw-r--r--drivers/mmc/core/host.c11
-rw-r--r--drivers/mmc/core/mmc.c23
-rw-r--r--drivers/mmc/core/mmc_ops.c9
-rw-r--r--drivers/mmc/core/pwrseq.h2
-rw-r--r--drivers/mmc/core/pwrseq_emmc.c2
-rw-r--r--drivers/mmc/core/pwrseq_simple.c2
-rw-r--r--drivers/mmc/core/sd.c17
-rw-r--r--drivers/mmc/core/sdio.c2
-rw-r--r--drivers/mmc/core/sdio_bus.c1
-rw-r--r--drivers/mmc/host/Kconfig1
-rw-r--r--drivers/mmc/host/atmel-mci-regs.h171
-rw-r--r--drivers/mmc/host/atmel-mci.c165
-rw-r--r--drivers/mmc/host/cb710-mmc.h3
-rw-r--r--drivers/mmc/host/dw_mmc-pltfm.c2
-rw-r--r--drivers/mmc/host/dw_mmc-rockchip.c8
-rw-r--r--drivers/mmc/host/dw_mmc.c21
-rw-r--r--drivers/mmc/host/mtk-sd.c9
-rw-r--r--drivers/mmc/host/mvsdio.c58
-rw-r--r--drivers/mmc/host/of_mmc_spi.c4
-rw-r--r--drivers/mmc/host/omap_hsmmc.c6
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c7
-rw-r--r--drivers/mmc/host/sdhci-of-at91.c75
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c10
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c6
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c3
-rw-r--r--drivers/mmc/host/sdhci-tegra.c178
-rw-r--r--drivers/mmc/host/sdhci.c124
-rw-r--r--drivers/mmc/host/sdhci.h21
-rw-r--r--drivers/mmc/host/sh_mmcif.c84
-rw-r--r--drivers/mmc/host/usdhi6rol0.c3
-rw-r--r--drivers/mtd/mtdcore.c26
-rw-r--r--drivers/mtd/spi-nor/spi-nor.c10
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c6
-rw-r--r--drivers/net/hamradio/6pack.c6
-rw-r--r--drivers/net/hamradio/mkiss.c5
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/usb/r8152.c10
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c8
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h4
-rw-r--r--drivers/net/vrf.c10
-rw-r--r--drivers/pci/host/Kconfig1
-rw-r--r--drivers/pci/msi.c4
-rw-r--r--drivers/pci/pci-acpi.c42
-rw-r--r--drivers/pci/probe.c2
-rw-r--r--drivers/pinctrl/Kconfig3
-rw-r--r--drivers/pinctrl/Makefile7
-rw-r--r--drivers/pinctrl/bcm/Kconfig48
-rw-r--r--drivers/pinctrl/bcm/Makefile3
-rw-r--r--drivers/pinctrl/bcm/pinctrl-bcm2835.c2
-rw-r--r--drivers/pinctrl/bcm/pinctrl-iproc-gpio.c (renamed from drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c)358
-rw-r--r--drivers/pinctrl/bcm/pinctrl-nsp-gpio.c749
-rw-r--r--drivers/pinctrl/berlin/Makefile2
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mt8127.c2
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mt8135.c2
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mt8173.c2
-rw-r--r--drivers/pinctrl/mediatek/pinctrl-mtk-common.c28
-rw-r--r--drivers/pinctrl/mvebu/Makefile2
-rw-r--r--drivers/pinctrl/mvebu/pinctrl-mvebu.c29
-rw-r--r--drivers/pinctrl/pinconf-generic.c1
-rw-r--r--drivers/pinctrl/pinctrl-adi2.c24
-rw-r--r--drivers/pinctrl/pinctrl-at91-pio4.c17
-rw-r--r--drivers/pinctrl/pinctrl-at91.c14
-rw-r--r--drivers/pinctrl/pinctrl-lantiq.h8
-rw-r--r--drivers/pinctrl/pinctrl-rockchip.c58
-rw-r--r--drivers/pinctrl/pinctrl-single.c5
-rw-r--r--drivers/pinctrl/pinctrl-tegra-xusb.c4
-rw-r--r--drivers/pinctrl/pinctrl-tegra.c1
-rw-r--r--drivers/pinctrl/pinctrl-xway.c1187
-rw-r--r--drivers/pinctrl/pxa/Kconfig17
-rw-r--r--drivers/pinctrl/pxa/Makefile2
-rw-r--r--drivers/pinctrl/pxa/pinctrl-pxa27x.c566
-rw-r--r--drivers/pinctrl/pxa/pinctrl-pxa2xx.c436
-rw-r--r--drivers/pinctrl/pxa/pinctrl-pxa2xx.h92
-rw-r--r--drivers/pinctrl/qcom/Kconfig8
-rw-r--r--drivers/pinctrl/qcom/Makefile1
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm8996.c1942
-rw-r--r--drivers/pinctrl/qcom/pinctrl-qdf2xxx.c14
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-gpio.c15
-rw-r--r--drivers/pinctrl/qcom/pinctrl-spmi-mpp.c14
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c21
-rw-r--r--drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c21
-rw-r--r--drivers/pinctrl/samsung/pinctrl-exynos.c103
-rw-r--r--drivers/pinctrl/samsung/pinctrl-samsung.c2
-rw-r--r--drivers/pinctrl/samsung/pinctrl-samsung.h1
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-emev2.c136
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7740.c2
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7778.c22
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7779.c62
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7790.c58
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7791.c167
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7794.c86
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-r8a7795.c1505
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-sh73a0.c548
-rw-r--r--drivers/pinctrl/sh-pfc/pfc-sh7734.c29
-rw-r--r--drivers/pinctrl/sh-pfc/pinctrl.c4
-rw-r--r--drivers/pinctrl/sh-pfc/sh_pfc.h74
-rw-r--r--drivers/pinctrl/sirf/pinctrl-atlas7.c134
-rw-r--r--drivers/pinctrl/sirf/pinctrl-sirf.c8
-rw-r--r--drivers/pinctrl/spear/Makefile2
-rw-r--r--drivers/pinctrl/sunxi/Kconfig9
-rw-r--r--drivers/pinctrl/sunxi/Makefile2
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c515
-rw-r--r--drivers/pinctrl/sunxi/pinctrl-sun9i-a80-r.c181
-rw-r--r--drivers/pinctrl/uniphier/Kconfig23
-rw-r--r--drivers/scsi/sd.c9
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c24
-rw-r--r--drivers/staging/lustre/lustre/llite/xattr.c4
-rw-r--r--drivers/tty/sysrq.c6
-rw-r--r--fs/9p/acl.c24
-rw-r--r--fs/9p/vfs_inode.c24
-rw-r--r--fs/9p/vfs_inode_dotl.c21
-rw-r--r--fs/9p/xattr.c4
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/affs/namei.c1
-rw-r--r--fs/affs/symlink.c9
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/autofs4/symlink.c14
-rw-r--r--fs/befs/linuxvfs.c40
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c21
-rw-r--r--fs/btrfs/xattr.c166
-rw-r--r--fs/btrfs/xattr.h2
-rw-r--r--fs/ceph/acl.c16
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsfs.h5
-rw-r--r--fs/cifs/link.c10
-rw-r--r--fs/cifs/xattr.c16
-rw-r--r--fs/coda/cnode.c5
-rw-r--r--fs/coda/symlink.c4
-rw-r--r--fs/compat_ioctl.c253
-rw-r--r--fs/configfs/symlink.c22
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/ecryptfs/inode.c17
-rw-r--r--fs/efs/inode.c1
-rw-r--r--fs/efs/symlink.c4
-rw-r--r--fs/exofs/inode.c1
-rw-r--r--fs/exofs/namei.c1
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/namei.c1
-rw-r--r--fs/ext2/symlink.c5
-rw-r--r--fs/ext2/xattr.c15
-rw-r--r--fs/ext2/xattr_security.c21
-rw-r--r--fs/ext2/xattr_trusted.c23
-rw-r--r--fs/ext2/xattr_user.c23
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/symlink.c29
-rw-r--r--fs/ext4/xattr.c17
-rw-r--r--fs/ext4/xattr_security.c22
-rw-r--r--fs/ext4/xattr_trusted.c23
-rw-r--r--fs/ext4/xattr_user.c23
-rw-r--r--fs/f2fs/inode.c1
-rw-r--r--fs/f2fs/namei.c31
-rw-r--r--fs/f2fs/xattr.c92
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/fuse/dir.c17
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/inode.c19
-rw-r--r--fs/gfs2/xattr.c50
-rw-r--r--fs/gfs2/xattr.h1
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/posix_acl.c8
-rw-r--r--fs/hfsplus/xattr.c12
-rw-r--r--fs/hostfs/hostfs_kern.c22
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/namei.c5
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h7
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jffs2/security.c22
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/xattr.c26
-rw-r--r--fs/jffs2/xattr_trusted.c21
-rw-r--r--fs/jffs2/xattr_user.c20
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/jfs/namei.c1
-rw-r--r--fs/jfs/symlink.c5
-rw-r--r--fs/kernfs/inode.c4
-rw-r--r--fs/kernfs/symlink.c24
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/logfs/dir.c9
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h1
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c126
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/inode.c26
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs4proc.c75
-rw-r--r--fs/nfs/symlink.c39
-rw-r--r--fs/nilfs2/inode.c1
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/ocfs2/inode.c1
-rw-r--r--fs/ocfs2/namei.c1
-rw-r--r--fs/ocfs2/symlink.c3
-rw-r--r--fs/ocfs2/xattr.c168
-rw-r--r--fs/overlayfs/inode.c53
-rw-r--r--fs/posix_acl.c25
-rw-r--r--fs/proc/base.c24
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/namespaces.c10
-rw-r--r--fs/proc/self.c18
-rw-r--r--fs/proc/thread_self.c19
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/xattr.c16
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/reiserfs/xattr_security.c16
-rw-r--r--fs/reiserfs/xattr_trusted.c15
-rw-r--r--fs/reiserfs/xattr_user.c14
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/squashfs/symlink.c3
-rw-r--r--fs/squashfs/xattr.c38
-rw-r--r--fs/sysv/inode.c4
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/inode.c3
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/udf/symlink.c4
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/Makefile2
-rw-r--r--fs/ufs/inode.c5
-rw-r--r--fs/ufs/namei.c5
-rw-r--r--fs/ufs/symlink.c42
-rw-r--r--fs/ufs/ufs.h4
-rw-r--r--fs/xattr.c165
-rw-r--r--fs/xfs/xfs_acl.c23
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iops.c14
-rw-r--r--fs/xfs/xfs_xattr.c143
-rw-r--r--include/asm-generic/barrier.h2
-rw-r--r--include/asm-generic/pgtable.h10
-rw-r--r--include/asm-generic/qspinlock.h9
-rw-r--r--include/linux/bootmem.h4
-rw-r--r--include/linux/clocksource.h16
-rw-r--r--include/linux/compiler.h17
-rw-r--r--include/linux/context_tracking.h4
-rw-r--r--include/linux/context_tracking_state.h4
-rw-r--r--include/linux/delayed_call.h34
-rw-r--r--include/linux/filter.h19
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/ftrace.h1
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--include/linux/irqchip/arm-gic.h13
-rw-r--r--include/linux/irqdesc.h6
-rw-r--r--include/linux/irqdomain.h13
-rw-r--r--include/linux/kernel.h28
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/list_bl.h2
-rw-r--r--include/linux/list_nulls.h2
-rw-r--r--include/linux/mmc/dw_mmc.h12
-rw-r--r--include/linux/mmc/host.h6
-rw-r--r--include/linux/msi.h18
-rw-r--r--include/linux/mtd/spi-nor.h2
-rw-r--r--include/linux/nfs_fs.h1
-rw-r--r--include/linux/pci.h10
-rw-r--r--include/linux/percpu-refcount.h2
-rw-r--r--include/linux/percpu.h6
-rw-r--r--include/linux/platform_data/irq-renesas-intc-irqpin.h29
-rw-r--r--include/linux/platform_data/mmc-mvsdio.h18
-rw-r--r--include/linux/platform_device.h1
-rw-r--r--include/linux/posix_acl_xattr.h6
-rw-r--r--include/linux/rculist.h105
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/rcutiny.h8
-rw-r--r--include/linux/rcutree.h4
-rw-r--r--include/linux/sched.h40
-rw-r--r--include/linux/sched_clock.h12
-rw-r--r--include/linux/stop_machine.h7
-rw-r--r--include/linux/time.h26
-rw-r--r--include/linux/tracepoint-defs.h27
-rw-r--r--include/linux/tracepoint.h20
-rw-r--r--include/linux/vtime.h25
-rw-r--r--include/linux/wait.h30
-rw-r--r--include/linux/workqueue.h6
-rw-r--r--include/linux/xattr.h20
-rw-r--r--include/net/l3mdev.h16
-rw-r--r--include/net/route.h7
-rw-r--r--include/sound/soc.h2
-rw-r--r--include/uapi/linux/perf_event.h6
-rw-r--r--init/main.c2
-rw-r--r--kernel/context_tracking.c4
-rw-r--r--kernel/events/core.c321
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/futex.c83
-rw-r--r--kernel/irq/chip.c9
-rw-r--r--kernel/irq/irqdesc.c19
-rw-r--r--kernel/irq/irqdomain.c12
-rw-r--r--kernel/irq/manage.c31
-rw-r--r--kernel/irq/msi.c58
-rw-r--r--kernel/kexec_core.c30
-rw-r--r--kernel/ksysfs.c26
-rw-r--r--kernel/locking/qspinlock.c82
-rw-r--r--kernel/locking/qspinlock_paravirt.h252
-rw-r--r--kernel/locking/qspinlock_stat.h300
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/panic.c33
-rw-r--r--kernel/rcu/rcutorture.c24
-rw-r--r--kernel/rcu/srcu.c2
-rw-r--r--kernel/rcu/tree.c313
-rw-r--r--kernel/rcu/tree.h61
-rw-r--r--kernel/rcu/tree_plugin.h66
-rw-r--r--kernel/rcu/tree_trace.c39
-rw-r--r--kernel/rcu/update.c22
-rw-r--r--kernel/sched/auto_group.c2
-rw-r--r--kernel/sched/clock.c2
-rw-r--r--kernel/sched/core.c181
-rw-r--r--kernel/sched/cputime.c74
-rw-r--r--kernel/sched/deadline.c59
-rw-r--r--kernel/sched/fair.c314
-rw-r--r--kernel/sched/idle_task.c1
-rw-r--r--kernel/sched/sched.h70
-rw-r--r--kernel/stop_machine.c84
-rw-r--r--kernel/time/alarmtimer.c17
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/ntp.c44
-rw-r--r--kernel/time/ntp_internal.h2
-rw-r--r--kernel/time/posix-clock.c4
-rw-r--r--kernel/time/tick-sched.c36
-rw-r--r--kernel/time/timekeeping.c49
-rw-r--r--kernel/time/timekeeping_internal.h8
-rw-r--r--kernel/trace/trace_printk.c1
-rw-r--r--kernel/watchdog.c20
-rw-r--r--kernel/workqueue.c220
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/atomic64_test.c124
-rw-r--r--lib/list_debug.c2
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nobootmem.c1
-rw-r--r--mm/shmem.c181
-rw-r--r--mm/vmstat.c2
-rw-r--r--net/bridge/br_stp_if.c5
-rw-r--r--net/core/dst.c3
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/unix/af_unix.c66
-rw-r--r--scripts/recordmcount.c2
-rw-r--r--security/smack/smack_lsm.c2
-rw-r--r--sound/pci/hda/patch_realtek.c101
-rw-r--r--sound/soc/codecs/arizona.c2
-rw-r--r--sound/soc/codecs/rt5645.c4
-rw-r--r--sound/soc/codecs/rt5645.h4
-rw-r--r--sound/soc/intel/skylake/skl-topology.c3
-rw-r--r--sound/soc/intel/skylake/skl.c4
-rw-r--r--sound/soc/intel/skylake/skl.h2
-rw-r--r--tools/Makefile2
-rw-r--r--tools/build/Makefile2
-rw-r--r--tools/build/Makefile.feature44
-rw-r--r--tools/build/Makefile.include2
-rw-r--r--tools/build/feature/Makefile93
-rw-r--r--tools/include/linux/bitmap.h (renamed from tools/perf/util/include/linux/bitmap.h)2
-rw-r--r--tools/include/linux/string.h15
-rw-r--r--tools/lib/bitmap.c (renamed from tools/perf/util/bitmap.c)0
-rw-r--r--tools/lib/bpf/Makefile14
-rw-r--r--tools/lib/bpf/bpf.c14
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/libbpf.c412
-rw-r--r--tools/lib/bpf/libbpf.h88
-rw-r--r--tools/lib/find_bit.c84
-rw-r--r--tools/lib/string.c89
-rw-r--r--tools/lib/subcmd/Build7
-rw-r--r--tools/lib/subcmd/Makefile48
-rw-r--r--tools/lib/subcmd/exec-cmd.c209
-rw-r--r--tools/lib/subcmd/exec-cmd.h16
-rw-r--r--tools/lib/subcmd/help.c (renamed from tools/perf/util/help.c)179
-rw-r--r--tools/lib/subcmd/help.h (renamed from tools/perf/util/help.h)13
-rw-r--r--tools/lib/subcmd/pager.c (renamed from tools/perf/util/pager.c)23
-rw-r--r--tools/lib/subcmd/pager.h9
-rw-r--r--tools/lib/subcmd/parse-options.c (renamed from tools/perf/util/parse-options.c)250
-rw-r--r--tools/lib/subcmd/parse-options.h (renamed from tools/perf/util/parse-options.h)26
-rw-r--r--tools/lib/subcmd/run-command.c (renamed from tools/perf/util/run-command.c)24
-rw-r--r--tools/lib/subcmd/run-command.h (renamed from tools/perf/util/run-command.h)12
-rw-r--r--tools/lib/subcmd/sigchain.c (renamed from tools/perf/util/sigchain.c)3
-rw-r--r--tools/lib/subcmd/sigchain.h (renamed from tools/perf/util/sigchain.h)6
-rw-r--r--tools/lib/subcmd/subcmd-config.c11
-rw-r--r--tools/lib/subcmd/subcmd-config.h14
-rw-r--r--tools/lib/subcmd/subcmd-util.h91
-rw-r--r--tools/lib/traceevent/event-parse.c134
-rw-r--r--tools/lib/traceevent/event-parse.h4
-rw-r--r--tools/lib/util/find_next_bit.c89
-rw-r--r--tools/perf/Build8
-rw-r--r--tools/perf/Documentation/perf-config.txt103
-rw-r--r--tools/perf/Documentation/perf-evlist.txt3
-rw-r--r--tools/perf/Documentation/perf-record.txt24
-rw-r--r--tools/perf/Documentation/perf-report.txt41
-rw-r--r--tools/perf/Documentation/perf-stat.txt34
-rw-r--r--tools/perf/Documentation/perf-top.txt3
-rw-r--r--tools/perf/Documentation/tips.txt14
-rw-r--r--tools/perf/MANIFEST7
-rw-r--r--tools/perf/Makefile.perf44
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h8
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c2
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c4
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c3
-rw-r--r--tools/perf/arch/x86/tests/rdpmc.c2
-rw-r--r--tools/perf/arch/x86/util/Build1
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c4
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c6
-rw-r--r--tools/perf/bench/futex-hash.c2
-rw-r--r--tools/perf/bench/futex-lock-pi.c2
-rw-r--r--tools/perf/bench/futex-requeue.c2
-rw-r--r--tools/perf/bench/futex-wake-parallel.c2
-rw-r--r--tools/perf/bench/futex-wake.c2
-rw-r--r--tools/perf/bench/mem-functions.c2
-rw-r--r--tools/perf/bench/numa.c2
-rw-r--r--tools/perf/bench/sched-messaging.c2
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c44
-rw-r--r--tools/perf/builtin-bench.c2
-rw-r--r--tools/perf/builtin-buildid-cache.c2
-rw-r--r--tools/perf/builtin-buildid-list.c4
-rw-r--r--tools/perf/builtin-config.c66
-rw-r--r--tools/perf/builtin-data.c2
-rw-r--r--tools/perf/builtin-diff.c15
-rw-r--r--tools/perf/builtin-evlist.c13
-rw-r--r--tools/perf/builtin-help.c10
-rw-r--r--tools/perf/builtin-inject.c2
-rw-r--r--tools/perf/builtin-kmem.c2
-rw-r--r--tools/perf/builtin-kvm.c5
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-lock.c2
-rw-r--r--tools/perf/builtin-mem.c2
-rw-r--r--tools/perf/builtin-probe.c17
-rw-r--r--tools/perf/builtin-record.c48
-rw-r--r--tools/perf/builtin-report.c52
-rw-r--r--tools/perf/builtin-sched.c2
-rw-r--r--tools/perf/builtin-script.c245
-rw-r--r--tools/perf/builtin-stat.c679
-rw-r--r--tools/perf/builtin-timechart.c2
-rw-r--r--tools/perf/builtin-top.c75
-rw-r--r--tools/perf/builtin-trace.c4
-rw-r--r--tools/perf/builtin-version.c10
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt3
-rw-r--r--tools/perf/config/Makefile24
-rw-r--r--tools/perf/config/utilities.mak19
-rw-r--r--tools/perf/perf.c24
-rw-r--r--tools/perf/scripts/python/stat-cpi.py77
-rw-r--r--tools/perf/tests/.gitignore1
-rw-r--r--tools/perf/tests/Build16
-rw-r--r--tools/perf/tests/attr.c6
-rw-r--r--tools/perf/tests/bp_signal.c2
-rw-r--r--tools/perf/tests/bp_signal_overflow.c2
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c35
-rw-r--r--tools/perf/tests/bpf.c93
-rw-r--r--tools/perf/tests/builtin-test.c141
-rw-r--r--tools/perf/tests/code-reading.c16
-rw-r--r--tools/perf/tests/cpumap.c88
-rw-r--r--tools/perf/tests/dso-data.c6
-rw-r--r--tools/perf/tests/dwarf-unwind.c37
-rw-r--r--tools/perf/tests/event_update.c117
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c5
-rw-r--r--tools/perf/tests/evsel-tp-sched.c2
-rw-r--r--tools/perf/tests/fdarray.c4
-rw-r--r--tools/perf/tests/hists_common.c6
-rw-r--r--tools/perf/tests/hists_cumulate.c10
-rw-r--r--tools/perf/tests/hists_filter.c4
-rw-r--r--tools/perf/tests/hists_link.c10
-rw-r--r--tools/perf/tests/hists_output.c12
-rw-r--r--tools/perf/tests/keep-tracking.c5
-rw-r--r--tools/perf/tests/kmod-path.c2
-rw-r--r--tools/perf/tests/llvm.c75
-rw-r--r--tools/perf/tests/llvm.h2
-rw-r--r--tools/perf/tests/make3
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c8
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/openat-syscall.c2
-rw-r--r--tools/perf/tests/parse-events.c2
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c2
-rw-r--r--tools/perf/tests/perf-record.c8
-rw-r--r--tools/perf/tests/pmu.c2
-rw-r--r--tools/perf/tests/python-use.c3
-rw-r--r--tools/perf/tests/sample-parsing.c2
-rw-r--r--tools/perf/tests/stat.c111
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c8
-rw-r--r--tools/perf/tests/task-exit.c2
-rw-r--r--tools/perf/tests/tests.h95
-rw-r--r--tools/perf/tests/thread-map.c45
-rw-r--r--tools/perf/tests/thread-mg-share.c2
-rw-r--r--tools/perf/tests/topology.c2
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c2
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/ui/browsers/hists.c335
-rw-r--r--tools/perf/ui/gtk/hists.c152
-rw-r--r--tools/perf/ui/hist.c14
-rw-r--r--tools/perf/ui/stdio/hist.c100
-rw-r--r--tools/perf/util/Build28
-rw-r--r--tools/perf/util/annotate.c23
-rw-r--r--tools/perf/util/auxtrace.c2
-rw-r--r--tools/perf/util/bpf-loader.c433
-rw-r--r--tools/perf/util/bpf-loader.h4
-rw-r--r--tools/perf/util/bpf-prologue.c455
-rw-r--r--tools/perf/util/bpf-prologue.h34
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/cache.h14
-rw-r--r--tools/perf/util/callchain.c164
-rw-r--r--tools/perf/util/callchain.h30
-rw-r--r--tools/perf/util/cgroup.c2
-rw-r--r--tools/perf/util/color.c2
-rw-r--r--tools/perf/util/config.c2
-rw-r--r--tools/perf/util/cpumap.c51
-rw-r--r--tools/perf/util/cpumap.h1
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/dso.c2
-rw-r--r--tools/perf/util/env.c9
-rw-r--r--tools/perf/util/environment.c8
-rw-r--r--tools/perf/util/event.c308
-rw-r--r--tools/perf/util/event.h150
-rw-r--r--tools/perf/util/evlist.c112
-rw-r--r--tools/perf/util/evlist.h10
-rw-r--r--tools/perf/util/evsel.c53
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/exec_cmd.c148
-rw-r--r--tools/perf/util/exec_cmd.h12
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh15
-rw-r--r--tools/perf/util/header.c207
-rw-r--r--tools/perf/util/header.h17
-rw-r--r--tools/perf/util/help-unknown-cmd.c103
-rw-r--r--tools/perf/util/help-unknown-cmd.h0
-rw-r--r--tools/perf/util/hist.c118
-rw-r--r--tools/perf/util/hist.h24
-rw-r--r--tools/perf/util/include/linux/string.h3
-rw-r--r--tools/perf/util/intel-pt.c4
-rw-r--r--tools/perf/util/machine.c74
-rw-r--r--tools/perf/util/map.c7
-rw-r--r--tools/perf/util/parse-branch-options.c2
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/parse-regs-options.c2
-rw-r--r--tools/perf/util/path.c18
-rw-r--r--tools/perf/util/pmu.c1
-rw-r--r--tools/perf/util/probe-event.c7
-rw-r--r--tools/perf/util/probe-finder.c6
-rw-r--r--tools/perf/util/python-ext-sources2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c115
-rw-r--r--tools/perf/util/session.c200
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/sort.c601
-rw-r--r--tools/perf/util/sort.h14
-rw-r--r--tools/perf/util/stat.c62
-rw-r--r--tools/perf/util/stat.h10
-rw-r--r--tools/perf/util/string.c16
-rw-r--r--tools/perf/util/symbol-elf.c9
-rw-r--r--tools/perf/util/symbol.c66
-rw-r--r--tools/perf/util/symbol.h4
-rw-r--r--tools/perf/util/term.c35
-rw-r--r--tools/perf/util/term.h10
-rw-r--r--tools/perf/util/thread.c10
-rw-r--r--tools/perf/util/thread_map.c28
-rw-r--r--tools/perf/util/thread_map.h3
-rw-r--r--tools/perf/util/tool.h8
-rw-r--r--tools/perf/util/trace-event.h4
-rw-r--r--tools/perf/util/unwind-libdw.c63
-rw-r--r--tools/perf/util/unwind-libdw.h2
-rw-r--r--tools/perf/util/unwind-libunwind.c80
-rw-r--r--tools/perf/util/util.c67
-rw-r--r--tools/perf/util/util.h20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh41
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt1
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt4
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
844 files changed, 33086 insertions, 7624 deletions
diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
new file mode 100644
index 000000000000..7496a55e4e7b
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
Binary files differ
diff --git a/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
new file mode 100644
index 000000000000..4b4014fda770
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/GPpartitionReaders1.svg
@@ -0,0 +1,374 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="447.99197"
+ height="428.19299"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="GPpartitionReaders1.svg">
+ <defs
+ id="defs4">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend"
+ style="overflow:visible">
+ <path
+ id="path3792"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart"
+ style="overflow:visible">
+ <path
+ id="path3789"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="1.6184291"
+ inkscape:cx="223.99599"
+ inkscape:cy="214.0965"
+ inkscape:document-units="px"
+ inkscape:current-layer="layer1"
+ showgrid="false"
+ inkscape:window-width="979"
+ inkscape:window-height="836"
+ inkscape:window-x="571"
+ inkscape:window-y="335"
+ inkscape:window-maximized="0"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-28.441125,-185.60612)">
+ <flowRoot
+ xml:space="preserve"
+ id="flowRoot2985"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ id="flowRegion2987"><rect
+ id="rect2989"
+ width="82.85714"
+ height="11.428572"
+ x="240"
+ y="492.36218" /></flowRegion><flowPara
+ id="flowPara2991"></flowPara></flowRoot> <g
+ id="g4433"
+ transform="translate(2,0)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993"
+ y="-261.66608"
+ x="412.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="412.12299"
+ id="tspan2995"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="m 97.580736,477.4048 183.140664,0"
+ id="path2997"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 281.54942,465.38397 0,22.62742"
+ id="path4397-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="268.18076"
+ id="text4429"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431"
+ x="112.04738"
+ y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="439.13766"
+ id="text4441"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4443"
+ x="112.04738"
+ y="439.13766">WRITE_ONCE(b, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.60869"
+ y="309.29346"
+ id="text4445"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447"
+ x="255.60869"
+ y="309.29346">r1 = READ_ONCE(a);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.14423"
+ y="520.61786"
+ id="text4449"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451"
+ x="255.14423"
+ y="520.61786">WRITE_ONCE(c, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="384.71124"
+ id="text4453"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455"
+ x="396.10254"
+ y="384.71124">r2 = READ_ONCE(b);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="582.13617"
+ id="text4457"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459"
+ x="396.10254"
+ y="582.13617">r3 = READ_ONCE(c);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.08231"
+ y="213.91006"
+ id="text4461"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463"
+ x="112.08231"
+ y="213.91006">thread0()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="252.34512"
+ y="213.91006"
+ id="text4461-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-0"
+ x="252.34512"
+ y="213.91006">thread1()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.42557"
+ y="213.91006"
+ id="text4461-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2"
+ x="396.42557"
+ y="213.91006">thread2()</tspan></text>
+ <rect
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="rect4495"
+ width="436.28488"
+ height="416.4859"
+ x="34.648232"
+ y="191.10612" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 183.14066,191.10612 0,417.193 -0.70711,0"
+ id="path4497"
+ inkscape:connector-curvature="0" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 325.13867,191.10612 0,417.193 -0.70711,0"
+ id="path4497-5"
+ inkscape:connector-curvature="0" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="251.53981"
+ id="text4429-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9"
+ x="111.75929"
+ y="251.53981">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="367.91556"
+ id="text4429-8-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4"
+ x="396.10254"
+ y="367.91556">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="597.40289"
+ id="text4429-8-9-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4"
+ x="396.10254"
+ y="597.40289">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="453.15311"
+ id="text4429-8-9-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-6"
+ x="111.75929"
+ y="453.15311">rcu_read_unlock();</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 33.941125,227.87568 436.284885,0 0,0.7071"
+ id="path4608"
+ inkscape:connector-curvature="0" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="394.94427"
+ y="345.66351"
+ id="text4648"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650"
+ x="394.94427"
+ y="345.66351">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(36.441125,199.60612)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.11968"
+ y="475.77856"
+ id="text4648-4"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4"
+ x="112.11968"
+ y="475.77856">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-246.38346,329.72117)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-103.65246,202.90878)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="254.85066"
+ y="348.96619"
+ id="text4648-4-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5"
+ x="254.85066"
+ y="348.96619">QS</tspan></text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
new file mode 100644
index 000000000000..ebcbeee391ed
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
@@ -0,0 +1,237 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
+
+<!-- CreationDate: Tue Mar 4 18:34:25 2014 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="1089.1382"
+ height="668.21368"
+ viewBox="-2121 -36 14554.634 8876.4061"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="RCUApplicability.svg">
+ <metadata
+ id="metadata40">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs38" />
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="849"
+ inkscape:window-height="639"
+ id="namedview36"
+ showgrid="false"
+ inkscape:zoom="0.51326165"
+ inkscape:cx="544.56912"
+ inkscape:cy="334.10686"
+ inkscape:window-x="149"
+ inkscape:window-y="448"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <g
+ style="fill:none;stroke-width:0.025in"
+ id="g4"
+ transform="translate(-2043.6828,14.791398)">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="14400"
+ height="8775"
+ rx="0"
+ style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="0"
+ width="11700"
+ height="6075"
+ rx="0"
+ style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="0"
+ width="9000"
+ height="4275"
+ rx="0"
+ style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="4050"
+ y="0"
+ width="6300"
+ height="2475"
+ rx="0"
+ style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
+ id="rect12" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="900"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text14"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3017">Read-Mostly, Stale &amp;</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="1350"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text16"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3019">Inconsistent Data OK</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="1800"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text18"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3021">(RCU Works Great!!!)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="3825"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text20"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3023">(RCU Works Well)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="3375"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text22"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="5175"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text24"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="6975"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text26"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="5625"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text28"
+ sodipodi:linespacing="125%"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ id="tspan3029">(RCU Might Be OK...)</tspan></text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="7875"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text30"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="8325"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text32"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7200"
+ y="7425"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ id="text34"
+ style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
+ sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
new file mode 100644
index 000000000000..48cd1623d4d4
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/ReadersPartitionGP1.svg
@@ -0,0 +1,639 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="735.25"
+ height="516.21875"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.3.1 r9886"
+ sodipodi:docname="ReadersPartitionGP1.svg">
+ <defs
+ id="defs4">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend"
+ style="overflow:visible">
+ <path
+ id="path3792"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart"
+ style="overflow:visible">
+ <path
+ id="path3789"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lstart"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lstart-4"
+ style="overflow:visible">
+ <path
+ id="path3789-9"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(1.1,0,0,1.1,1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0"
+ refX="0"
+ id="Arrow2Lend-4"
+ style="overflow:visible">
+ <path
+ id="path3792-4"
+ style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
+ d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+ transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+ inkscape:connector-curvature="0" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ id="base"
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1.0"
+ inkscape:pageopacity="0.0"
+ inkscape:pageshadow="2"
+ inkscape:zoom="1.3670394"
+ inkscape:cx="367.26465"
+ inkscape:cy="258.46182"
+ inkscape:document-units="px"
+ inkscape:current-layer="g4433-6"
+ showgrid="false"
+ inkscape:window-width="1351"
+ inkscape:window-height="836"
+ inkscape:window-x="438"
+ inkscape:window-y="335"
+ inkscape:window-maximized="0"
+ fit-margin-top="5"
+ fit-margin-left="5"
+ fit-margin-right="5"
+ fit-margin-bottom="5" />
+ <metadata
+ id="metadata7">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <g
+ inkscape:label="Layer 1"
+ inkscape:groupmode="layer"
+ id="layer1"
+ transform="translate(-29.15625,-185.59375)">
+ <flowRoot
+ xml:space="preserve"
+ id="flowRoot2985"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"><flowRegion
+ id="flowRegion2987"><rect
+ id="rect2989"
+ width="82.85714"
+ height="11.428572"
+ x="240"
+ y="492.36218" /></flowRegion><flowPara
+ id="flowPara2991" /></flowRoot> <g
+ id="g4433"
+ transform="translate(2,-12)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993"
+ y="-261.66608"
+ x="436.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="436.12299"
+ id="tspan2995"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="M 97.580736,477.4048 327.57913,476.09759"
+ id="path2997"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 328.40703,465.38397 0,22.62742"
+ id="path4397-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="268.18076"
+ id="text4429"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431"
+ x="112.04738"
+ y="268.18076">WRITE_ONCE(a, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.04738"
+ y="487.13766"
+ id="text4441"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4443"
+ x="112.04738"
+ y="487.13766">WRITE_ONCE(b, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.60869"
+ y="297.29346"
+ id="text4445"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447"
+ x="255.60869"
+ y="297.29346">r1 = READ_ONCE(a);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="255.14423"
+ y="554.61786"
+ id="text4449"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451"
+ x="255.14423"
+ y="554.61786">WRITE_ONCE(c, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="370.71124"
+ id="text4453"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455"
+ x="396.10254"
+ y="370.71124">WRITE_ONCE(d, 1);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="572.13617"
+ id="text4457"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459"
+ x="396.10254"
+ y="572.13617">r2 = READ_ONCE(c);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.08231"
+ y="213.91006"
+ id="text4461"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463"
+ x="112.08231"
+ y="213.91006">thread0()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="252.34512"
+ y="213.91006"
+ id="text4461-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-0"
+ x="252.34512"
+ y="213.91006">thread1()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.42557"
+ y="213.91006"
+ id="text4461-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2"
+ x="396.42557"
+ y="213.91006">thread2()</tspan></text>
+ <rect
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="rect4495"
+ width="724.25244"
+ height="505.21201"
+ x="34.648232"
+ y="191.10612" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 183.14066,191.10612 0,504.24243"
+ id="path4497"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 325.13867,191.10612 0,504.24243"
+ id="path4497-5"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="251.53981"
+ id="text4429-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9"
+ x="111.75929"
+ y="251.53981">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="353.91556"
+ id="text4429-8-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4"
+ x="396.10254"
+ y="353.91556">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="396.10254"
+ y="587.40289"
+ id="text4429-8-9-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4"
+ x="396.10254"
+ y="587.40289">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="111.75929"
+ y="501.15311"
+ id="text4429-8-9-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-6"
+ x="111.75929"
+ y="501.15311">rcu_read_unlock();</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 33.941125,227.87568 724.941765,0"
+ id="path4608"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="394.94427"
+ y="331.66351"
+ id="text4648"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650"
+ x="394.94427"
+ y="331.66351">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(36.441125,185.60612)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="112.11968"
+ y="523.77856"
+ id="text4648-4"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4"
+ x="112.11968"
+ y="523.77856">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-246.38346,377.72117)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-103.65246,190.90878)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="254.85066"
+ y="336.96619"
+ id="text4648-4-3"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5"
+ x="254.85066"
+ y="336.96619">QS</tspan></text>
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 470.93311,190.39903 0,504.24243"
+ id="path4497-5-6"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ d="m 616.22755,190.38323 0,504.24243"
+ id="path4497-5-2"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <g
+ id="g4433-6"
+ transform="translate(288.0964,78.32827)">
+ <text
+ sodipodi:linespacing="125%"
+ id="text2993-7"
+ y="-261.66608"
+ x="440.12299"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ xml:space="preserve"
+ transform="matrix(0,1,-1,0,0,0)"><tspan
+ y="-261.66608"
+ x="440.12299"
+ id="tspan2995-1"
+ sodipodi:role="line">synchronize_rcu()</tspan></text>
+ <g
+ id="g4417-1"
+ transform="matrix(0,1,-1,0,730.90257,222.4928)">
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow2Lstart);marker-end:url(#Arrow2Lend)"
+ d="M 97.580736,477.4048 328.5624,477.07246"
+ id="path2997-2"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 96.752718,465.38398 0,22.62742"
+ id="path4397-3"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ <path
+ style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+ d="m 329.39039,465.38397 0,22.62742"
+ id="path4397-5-4"
+ inkscape:connector-curvature="0"
+ sodipodi:nodetypes="cc" />
+ </g>
+ </g>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="541.70508"
+ y="387.6217"
+ id="text4445-0"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4447-5"
+ x="541.70508"
+ y="387.6217">r3 = READ_ONCE(d);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="541.2406"
+ y="646.94611"
+ id="text4449-6"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4451-6"
+ x="541.2406"
+ y="646.94611">WRITE_ONCE(e, 1);</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-7-7-5"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(182.44393,281.23704)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="540.94702"
+ y="427.29443"
+ id="text4648-4-3-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-4-5-7"
+ x="540.94702"
+ y="427.29443">QS</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="461.83929"
+ id="text4453-7"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4455-1"
+ x="686.27747"
+ y="461.83929">r4 = READ_ONCE(b);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="669.26422"
+ id="text4457-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4459-2"
+ x="686.27747"
+ y="669.26422">r5 = READ_ONCE(e);</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="445.04358"
+ id="text4429-8-9-33"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-2"
+ x="686.27747"
+ y="445.04358">rcu_read_lock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="686.27747"
+ y="684.53094"
+ id="text4429-8-9-3-8"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4431-9-4-4-5"
+ x="686.27747"
+ y="684.53094">rcu_read_unlock();</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="685.11914"
+ y="422.79153"
+ id="text4648-9"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-7"
+ x="685.11914"
+ y="422.79153">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-8"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(326.61602,276.73415)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="397.85934"
+ y="609.59003"
+ id="text4648-5"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-77"
+ x="397.85934"
+ y="609.59003">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-80"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(39.356201,463.53264)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="256.75986"
+ y="586.99133"
+ id="text4648-5-2"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4650-77-7"
+ x="256.75986"
+ y="586.99133">QS</tspan></text>
+ <path
+ sodipodi:type="arc"
+ style="fill:none;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
+ id="path4652-80-5"
+ sodipodi:cx="358.85669"
+ sodipodi:cy="142.87541"
+ sodipodi:rx="10.960155"
+ sodipodi:ry="10.253048"
+ d="m 358.86939,132.62237 a 10.960155,10.253048 0 1 1 -0.0228,0"
+ transform="translate(-101.74328,440.93395)"
+ sodipodi:start="4.7135481"
+ sodipodi:end="10.994651"
+ sodipodi:open="true" />
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="546.22791"
+ y="213.91006"
+ id="text4461-2-5"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2-6"
+ x="546.22791"
+ y="213.91006">thread3()</tspan></text>
+ <text
+ xml:space="preserve"
+ style="font-size:10px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;font-family:Symbol;-inkscape-font-specification:Symbol"
+ x="684.00067"
+ y="213.91006"
+ id="text4461-2-1"
+ sodipodi:linespacing="125%"><tspan
+ sodipodi:role="line"
+ id="tspan4463-2-0"
+ x="684.00067"
+ y="213.91006">thread4()</tspan></text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
new file mode 100644
index 000000000000..a725f9900ec8
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -0,0 +1,2897 @@
+<!-- DO NOT HAND EDIT. -->
+<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+
+<h1>A Tour Through RCU's Requirements</h1>
+
+<p>Copyright IBM Corporation, 2015</p>
+<p>Author: Paul E.&nbsp;McKenney</p>
+<p><i>The initial version of this document appeared in the
+<a href="https://lwn.net/">LWN</a> articles
+<a href="https://lwn.net/Articles/652156/">here</a>,
+<a href="https://lwn.net/Articles/652677/">here</a>, and
+<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
+
+<h2>Introduction</h2>
+
+<p>
+Read-copy update (RCU) is a synchronization mechanism that is often
+used as a replacement for reader-writer locking.
+RCU is unusual in that updaters do not block readers,
+which means that RCU's read-side primitives can be exceedingly fast
+and scalable.
+In addition, updaters can make useful forward progress concurrently
+with readers.
+However, all this concurrency between RCU readers and updaters does raise
+the question of exactly what RCU readers are doing, which in turn
+raises the question of exactly what RCU's requirements are.
+
+<p>
+This document therefore summarizes RCU's requirements, and can be thought
+of as an informal, high-level specification for RCU.
+It is important to understand that RCU's specification is primarily
+empirical in nature;
+in fact, I learned about many of these requirements the hard way.
+This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been
+a great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+<p>
+All that aside, here are the categories of currently known RCU requirements:
+</p>
+
+<ol>
+<li> <a href="#Fundamental Requirements">
+ Fundamental Requirements</a>
+<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
+<li> <a href="#Parallelism Facts of Life">
+ Parallelism Facts of Life</a>
+<li> <a href="#Quality-of-Implementation Requirements">
+ Quality-of-Implementation Requirements</a>
+<li> <a href="#Linux Kernel Complications">
+ Linux Kernel Complications</a>
+<li> <a href="#Software-Engineering Requirements">
+ Software-Engineering Requirements</a>
+<li> <a href="#Other RCU Flavors">
+ Other RCU Flavors</a>
+<li> <a href="#Possible Future Changes">
+ Possible Future Changes</a>
+</ol>
+
+<p>
+This is followed by a <a href="#Summary">summary</a>,
+which is in turn followed by the inevitable
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
+
+<p>
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements.
+These are:
+
+<ol>
+<li> <a href="#Grace-Period Guarantee">
+ Grace-Period Guarantee</a>
+<li> <a href="#Publish-Subscribe Guarantee">
+ Publish-Subscribe Guarantee</a>
+<li> <a href="#Memory-Barrier Guarantees">
+ Memory-Barrier Guarantees</a>
+<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
+ RCU Primitives Guaranteed to Execute Unconditionally</a>
+<li> <a href="#Guaranteed Read-to-Write Upgrade">
+ Guaranteed Read-to-Write Upgrade</a>
+</ol>
+
+<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
+
+<p>
+RCU's grace-period guarantee is unusual in being premeditated:
+Jack Slingwine and I had this guarantee firmly in mind when we started
+work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
+That said, the past two decades of experience with RCU have produced
+a much more detailed understanding of this guarantee.
+
+<p>
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections.
+An RCU read-side critical section
+begins with the marker <tt>rcu_read_lock()</tt> and ends with
+the marker <tt>rcu_read_unlock()</tt>.
+These markers may be nested, and RCU treats a nested set as one
+big RCU read-side critical section.
+Production-quality implementations of <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
+fact have exactly zero overhead in Linux kernels built for production
+use with <tt>CONFIG_PREEMPT=n</tt>.
+
+<p>
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+<blockquote>
+<pre>
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5 rcu_read_lock();
+ 6 r1 = READ_ONCE(x);
+ 7 r2 = READ_ONCE(y);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 WRITE_ONCE(x, 1);
+14 synchronize_rcu();
+15 WRITE_ONCE(y, 1);
+16 }
+</pre>
+</blockquote>
+
+<p>
+Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
+all pre-existing readers, any instance of <tt>thread0()</tt> that
+loads a value of zero from <tt>x</tt> must complete before
+<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
+also load a value of zero from <tt>y</tt>.
+Similarly, any instance of <tt>thread0()</tt> that loads a value of
+one from <tt>y</tt> must have started after the
+<tt>synchronize_rcu()</tt> started, and must therefore also load
+a value of one from <tt>x</tt>.
+Therefore, the outcome:
+<blockquote>
+<pre>
+(r1 == 0 &amp;&amp; r2 == 1)
+</pre>
+</blockquote>
+cannot happen.
+
+<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+<br><a href="#qq1answer">Answer</a>
+
+<p>
+This scenario resembles one of the first uses of RCU in
+<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
+which managed a distributed lock manager's transition into
+a state suitable for handling recovery from node failure,
+more or less as follows:
+
+<blockquote>
+<pre>
+ 1 #define STATE_NORMAL 0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING 2
+ 4 #define STATE_WANT_NORMAL 3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10 int state_snap;
+11
+12 rcu_read_lock();
+13 state_snap = READ_ONCE(state);
+14 if (state_snap == STATE_NORMAL)
+15 do_something();
+16 else
+17 do_something_carefully();
+18 rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24 synchronize_rcu();
+25 WRITE_ONCE(state, STATE_RECOVERING);
+26 recovery();
+27 WRITE_ONCE(state, STATE_WANT_NORMAL);
+28 synchronize_rcu();
+29 WRITE_ONCE(state, STATE_NORMAL);
+30 }
+</pre>
+</blockquote>
+
+<p>
+The RCU read-side critical section in <tt>do_something_dlm()</tt>
+works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
+to guarantee that <tt>do_something()</tt> never runs concurrently
+with <tt>recovery()</tt>, but with little or no synchronization
+overhead in <tt>do_something_dlm()</tt>.
+
+<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+<br><a href="#qq2answer">Answer</a>
+
+<p>
+In order to avoid fatal problems such as deadlocks,
+an RCU read-side critical section must not contain calls to
+<tt>synchronize_rcu()</tt>.
+Similarly, an RCU read-side critical section must not
+contain anything that waits, directly or indirectly, on completion of
+an invocation of <tt>synchronize_rcu()</tt>.
+
+<p>
+Although RCU's grace-period guarantee is useful in and of itself, with
+<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
+it would be good to be able to use RCU to coordinate read-side
+access to linked data structures.
+For this, the grace-period guarantee is not sufficient, as can
+be seen in function <tt>add_gp_buggy()</tt> below.
+We will look at the reader's code later, but in the meantime, just think of
+the reader as locklessly picking up the <tt>gp</tt> pointer,
+and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
+<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 gp = p; /* ORDERING BUG */
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+<b>11 gp = p; /* ORDERING BUG */
+12 p-&gt;a = a;
+13 p-&gt;b = a;</b>
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+If an RCU reader fetches <tt>gp</tt> just after
+<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
+it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
+fields.
+And this is but one of many ways in which compiler and hardware optimizations
+could cause trouble.
+Therefore, we clearly need some way to prevent the compiler and the CPU from
+reordering in this manner, which brings us to the publish-subscribe
+guarantee discussed in the next section.
+
+<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
+
+<p>
+RCU's publish-subscribe guarantee allows data to be inserted
+into a linked data structure without disrupting RCU readers.
+The updater uses <tt>rcu_assign_pointer()</tt> to insert the
+new data, and readers use <tt>rcu_dereference()</tt> to
+access data, whether new or old.
+The following shows an example of insertion:
+
+<blockquote>
+<pre>
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 rcu_assign_pointer(gp, p);
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
+equivalent to a simple assignment statement, but also guarantees
+that its assignment will
+happen after the two assignments in lines&nbsp;11 and&nbsp;12,
+similar to the C11 <tt>memory_order_release</tt> store operation.
+It also prevents any number of &ldquo;interesting&rdquo; compiler
+optimizations, for example, the use of <tt>gp</tt> as a scratch
+location immediately preceding the assignment.
+
+<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+<br><a href="#qq3answer">Answer</a>
+
+<p>
+It is tempting to assume that the reader need not do anything special
+to control its accesses to the RCU-protected data,
+as shown in <tt>do_something_gp_buggy()</tt> below:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler
+(to say nothing of
+<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
+can trip this code up.
+For but one example, if the compiler were short of registers, it
+might choose to refetch from <tt>gp</tt> rather than keeping
+a separate copy in <tt>p</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
+<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
+ 6 rcu_read_unlock();
+ 7 return true;
+ 8 }
+ 9 rcu_read_unlock();
+10 return false;
+11 }
+</pre>
+</blockquote>
+
+<p>
+If this function ran concurrently with a series of updates that
+replaced the current structure with a new one,
+the fetches of <tt>gp-&gt;a</tt>
+and <tt>gp-&gt;b</tt> might well come from two different structures,
+which could cause serious confusion.
+To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
+<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = rcu_dereference(gp);
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
+memory barriers in the Linux kernel.
+Should a
+<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
+ever appear, then <tt>rcu_dereference()</tt> could be implemented
+as a <tt>memory_order_consume</tt> load.
+Regardless of the exact implementation, a pointer fetched by
+<tt>rcu_dereference()</tt> may not be used outside of the
+outermost RCU read-side critical section containing that
+<tt>rcu_dereference()</tt>, unless protection of
+the corresponding data element has been passed from RCU to some
+other synchronization mechanism, most commonly locking or
+<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
+
+<p>
+In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
+use <tt>rcu_dereference()</tt>, and these two RCU API elements
+work together to ensure that readers have a consistent view of
+newly added data elements.
+
+<p>
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+<ol>
+<li> Remove the data element from the enclosing structure.
+<li> Wait for all pre-existing RCU read-side critical sections
+ to complete (because only pre-existing readers can possibly have
+ a reference to the newly removed data element).
+<li> At this point, only the updater has a reference to the
+ newly removed data element, so it can safely reclaim
+ the data element, for example, by passing it to <tt>kfree()</tt>.
+</ol>
+
+This process is implemented by <tt>remove_gp_synchronous()</tt>:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3 struct foo *p;
+ 4
+ 5 spin_lock(&amp;gp_lock);
+ 6 p = rcu_access_pointer(gp);
+ 7 if (!p) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 rcu_assign_pointer(gp, NULL);
+12 spin_unlock(&amp;gp_lock);
+13 synchronize_rcu();
+14 kfree(p);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+This function is straightforward, with line&nbsp;13 waiting for a grace
+period before line&nbsp;14 frees the old data element.
+This waiting ensures that readers will reach line&nbsp;7 of
+<tt>do_something_gp()</tt> before the data element referenced by
+<tt>p</tt> is freed.
+The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
+<tt>rcu_dereference()</tt>, except that:
+
+<ol>
+<li> The value returned by <tt>rcu_access_pointer()</tt>
+ cannot be dereferenced.
+ If you want to access the value pointed to as well as
+ the pointer itself, use <tt>rcu_dereference()</tt>
+ instead of <tt>rcu_access_pointer()</tt>.
+<li> The call to <tt>rcu_access_pointer()</tt> need not be
+ protected.
+ In contrast, <tt>rcu_dereference()</tt> must either be
+ within an RCU read-side critical section or in a code
+ segment where the pointer cannot change, for example, in
+ code protected by the corresponding update-side lock.
+</ol>
+
+<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+<br><a href="#qq4answer">Answer</a>
+
+<p>
+In short, RCU's publish-subscribe guarantee is provided by the combination
+of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
+This guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers.
+This guarantee can be used in combination with the grace-period
+guarantee to also allow data elements to be removed from RCU-protected
+linked data structures, again without disrupting RCU readers.
+
+<p>
+This guarantee was only partially premeditated.
+DYNIX/ptx used an explicit memory barrier for publication, but had nothing
+resembling <tt>rcu_dereference()</tt> for subscription, nor did it
+have anything resembling the <tt>smp_read_barrier_depends()</tt>
+that was later subsumed into <tt>rcu_dereference()</tt>.
+The need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company.
+It took the Alpha architects a good hour to convince me that any sort
+of barrier would ever be needed, and it then took me a good <i>two</i> hours
+to convince them that their documentation did not make this point clear.
+More recent work with the C and C++ standards committees have provided
+much education on tricks and traps from the compiler.
+In short, compilers were much less tricky in the early 1990s, but in
+2015, don't even think about omitting <tt>rcu_dereference()</tt>!
+
+<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
+
+<p>
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+<ol>
+<li> Each CPU that has an RCU read-side critical section that
+ begins before <tt>synchronize_rcu()</tt> starts is
+ guaranteed to execute a full memory barrier between the time
+ that the RCU read-side critical section ends and the time that
+ <tt>synchronize_rcu()</tt> returns.
+ Without this guarantee, a pre-existing RCU read-side critical section
+ might hold a reference to the newly removed <tt>struct foo</tt>
+ after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt>.
+<li> Each CPU that has an RCU read-side critical section that ends
+ after <tt>synchronize_rcu()</tt> returns is guaranteed
+ to execute a full memory barrier between the time that
+ <tt>synchronize_rcu()</tt> begins and the time that the RCU
+ read-side critical section begins.
+ Without this guarantee, a later RCU read-side critical section
+ running after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt> might
+ later run <tt>do_something_gp()</tt> and find the
+ newly deleted <tt>struct foo</tt>.
+<li> If the task invoking <tt>synchronize_rcu()</tt> remains
+ on a given CPU, then that CPU is guaranteed to execute a full
+ memory barrier sometime during the execution of
+ <tt>synchronize_rcu()</tt>.
+ This guarantee ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on line&nbsp;11.
+<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
+ among a group of CPUs during that invocation, then each of the
+ CPUs in that group is guaranteed to execute a full memory barrier
+ sometime during the execution of <tt>synchronize_rcu()</tt>.
+ This guarantee also ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on
+ line&nbsp;11, but also in the case where the thread executing the
+ <tt>synchronize_rcu()</tt> migrates in the meantime.
+</ol>
+
+<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+<br><a href="#qq5answer">Answer</a>
+
+<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+<br><a href="#qq6answer">Answer</a>
+
+<p>
+Note that these memory-barrier requirements do not replace the fundamental
+RCU requirement that a grace period wait for all pre-existing readers.
+On the contrary, the memory barriers called out in this section must operate in
+such a way as to <i>enforce</i> this fundamental requirement.
+Of course, different implementations enforce this requirement in different
+ways, but enforce it they must.
+
+<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
+
+<p>
+The common-case RCU primitives are unconditional.
+They are invoked, they do their job, and they return, with no possibility
+of error, and no need to retry.
+This is a key RCU design philosophy.
+
+<p>
+However, this philosophy is pragmatic rather than pigheaded.
+If someone comes up with a good justification for a particular conditional
+RCU primitive, it might well be implemented and added.
+After all, this guarantee was reverse-engineered, not premeditated.
+The unconditional nature of the RCU primitives was initially an
+accident of implementation, and later experience with synchronization
+primitives with conditional primitives caused me to elevate this
+accident to a guarantee.
+Therefore, the justification for adding a conditional primitive to
+RCU would need to be based on detailed and compelling use cases.
+
+<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
+
+<p>
+As far as RCU is concerned, it is always possible to carry out an
+update within an RCU read-side critical section.
+For example, that RCU read-side critical section might search for
+a given data element, and then might acquire the update-side
+spinlock in order to update that element, all while remaining
+in that RCU read-side critical section.
+Of course, it is necessary to exit the RCU read-side critical section
+before invoking <tt>synchronize_rcu()</tt>, however, this
+inconvenience can be avoided through use of the
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
+described later in this document.
+
+<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
+But how does the upgrade-to-write operation exclude other readers?
+<br><a href="#qq7answer">Answer</a>
+
+<p>
+This guarantee allows lookup code to be shared between read-side
+and update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
+
+<p>
+RCU provides extremely lightweight readers, and its read-side guarantees,
+though quite useful, are correspondingly lightweight.
+It is therefore all too easy to assume that RCU is guaranteeing more
+than it really is.
+Of course, the list of things that RCU does not guarantee is infinitely
+long, however, the following sections list a few non-guarantees that
+have caused confusion.
+Except where otherwise noted, these non-guarantees were premeditated.
+
+<ol>
+<li> <a href="#Readers Impose Minimal Ordering">
+ Readers Impose Minimal Ordering</a>
+<li> <a href="#Readers Do Not Exclude Updaters">
+ Readers Do Not Exclude Updaters</a>
+<li> <a href="#Updaters Only Wait For Old Readers">
+ Updaters Only Wait For Old Readers</a>
+<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
+ Grace Periods Don't Partition Read-Side Critical Sections</a>
+<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
+ Read-Side Critical Sections Don't Partition Grace Periods</a>
+<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
+ Disabling Preemption Does Not Block Grace Periods</a>
+</ol>
+
+<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
+
+<p>
+Reader-side markers such as <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
+except through their interaction with the grace-period APIs such as
+<tt>synchronize_rcu()</tt>.
+To see this, consider the following pair of threads:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(x, 1);
+ 5 rcu_read_unlock();
+ 6 rcu_read_lock();
+ 7 WRITE_ONCE(y, 1);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 rcu_read_lock();
+14 r1 = READ_ONCE(y);
+15 rcu_read_unlock();
+16 rcu_read_lock();
+17 r2 = READ_ONCE(x);
+18 rcu_read_unlock();
+19 }
+</pre>
+</blockquote>
+
+<p>
+After <tt>thread0()</tt> and <tt>thread1()</tt> execute
+concurrently, it is quite possible to have
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0)
+</pre>
+</blockquote>
+
+(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
+which would not be possible if <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> had much in the way of ordering
+properties.
+But they do not, so the CPU is within its rights
+to do significant reordering.
+This is by design: Any significant ordering constraints would slow down
+these fast-path APIs.
+
+<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
+Can't the compiler also reorder this code?
+<br><a href="#qq8answer">Answer</a>
+
+<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
+
+<p>
+Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
+exclude updates.
+All they do is to prevent grace periods from ending.
+The following example illustrates this:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 r1 = READ_ONCE(y);
+ 5 if (r1) {
+ 6 do_something_with_nonzero_x();
+ 7 r2 = READ_ONCE(x);
+ 8 WARN_ON(!r2); /* BUG!!! */
+ 9 }
+10 rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15 spin_lock(&amp;my_lock);
+16 WRITE_ONCE(x, 1);
+17 WRITE_ONCE(y, 1);
+18 spin_unlock(&amp;my_lock);
+19 }
+</pre>
+</blockquote>
+
+<p>
+If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
+excluded the <tt>thread1()</tt> function's update,
+the <tt>WARN_ON()</tt> could never fire.
+But the fact is that <tt>rcu_read_lock()</tt> does not exclude
+much of anything aside from subsequent grace periods, of which
+<tt>thread1()</tt> has none, so the
+<tt>WARN_ON()</tt> can and does fire.
+
+<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
+
+<p>
+It might be tempting to assume that after <tt>synchronize_rcu()</tt>
+completes, there are no readers executing.
+This temptation must be avoided because
+new readers can start immediately after <tt>synchronize_rcu()</tt>
+starts, and <tt>synchronize_rcu()</tt> is under no
+obligation to wait for these new readers.
+
+<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+<br><a href="#qq9answer">Answer</a>
+
+<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
+Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
+
+<p>
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections.
+An example of this situation can be illustrated as follows, where
+<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 r2 = READ_ONCE(b);
+20 r3 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+</pre>
+</blockquote>
+
+<p>
+It turns out that the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
+</pre>
+</blockquote>
+
+is entirely possible.
+The following figure show how this can happen, with each circled
+<tt>QS</tt> indicating the point at which RCU recorded a
+<i>quiescent state</i> for each thread, that is, a state in which
+RCU knows that the thread cannot be in the midst of an RCU read-side
+critical section that started before the current grace period:
+
+<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
+
+<p>
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first
+grace period is known to end before the second grace period starts:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 r2 = READ_ONCE(c);
+19 synchronize_rcu();
+20 WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25 rcu_read_lock();
+26 r3 = READ_ONCE(b);
+27 r4 = READ_ONCE(d);
+28 rcu_read_unlock();
+29 }
+</pre>
+</blockquote>
+
+<p>
+Here, if <tt>(r1 == 1)</tt>, then
+<tt>thread0()</tt>'s write to <tt>b</tt> must happen
+before the end of <tt>thread1()</tt>'s grace period.
+If in addition <tt>(r4 == 1)</tt>, then
+<tt>thread3()</tt>'s read from <tt>b</tt> must happen
+after the beginning of <tt>thread2()</tt>'s grace period.
+If it is also the case that <tt>(r2 == 1)</tt>, then the
+end of <tt>thread1()</tt>'s grace period must precede the
+beginning of <tt>thread2()</tt>'s grace period.
+This mean that the two RCU read-side critical sections cannot overlap,
+guaranteeing that <tt>(r3 == 1)</tt>.
+As a result, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
+</pre>
+</blockquote>
+
+cannot happen.
+
+<p>
+This non-requirement was also non-premeditated, but became apparent
+when studying RCU's interaction with memory ordering.
+
+<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
+Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
+
+<p>
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap.
+However, this temptation leads nowhere good, as can be illustrated by
+the following, with all variables initially zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 WRITE_ONCE(d, 1);
+20 r2 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26 r3 = READ_ONCE(d);
+27 synchronize_rcu();
+28 WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33 rcu_read_lock();
+34 r4 = READ_ONCE(b);
+35 r5 = READ_ONCE(e);
+36 rcu_read_unlock();
+37 }
+</pre>
+</blockquote>
+
+<p>
+In this case, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
+</pre>
+</blockquote>
+
+is entirely possible, as illustrated below:
+
+<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
+
+<p>
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire
+grace period.
+As a result, an RCU read-side critical section cannot partition a pair
+of RCU grace periods.
+
+<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+<br><a href="#qq10answer">Answer</a>
+
+<h3><a name="Disabling Preemption Does Not Block Grace Periods">
+Disabling Preemption Does Not Block Grace Periods</a></h3>
+
+<p>
+There was a time when disabling preemption on any given CPU would block
+subsequent grace periods.
+However, this was an accident of implementation and is not a requirement.
+And in the current Linux-kernel implementation, disabling preemption
+on a given CPU in fact does not block grace periods, as Oleg Nesterov
+<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
+
+<p>
+If you need a preempt-disable region to block grace periods, you need to add
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
+as follows:
+
+<blockquote>
+<pre>
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&amp;mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&amp;mylock);
+</pre>
+</blockquote>
+
+<p>
+In theory, you could enter the RCU read-side critical section first,
+but it is more efficient to keep the entire RCU read-side critical
+section contained in the preempt-disable region as shown above.
+Of course, RCU read-side critical sections that extend outside of
+preempt-disable regions will work correctly, but such critical sections
+can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
+more work.
+And no, this is <i>not</i> an invitation to enclose all of your RCU
+read-side critical sections within preempt-disable regions, because
+doing so would degrade real-time response.
+
+<p>
+This non-requirement appeared with preemptible RCU.
+If you need a grace period that waits on non-preemptible code regions, use
+<a href="#Sched Flavor">RCU-sched</a>.
+
+<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
+
+<p>
+These parallelism facts of life are by no means specific to RCU, but
+the RCU implementation must abide by them.
+They therefore bear repeating:
+
+<ol>
+<li> Any CPU or task may be delayed at any time,
+ and any attempts to avoid these delays by disabling
+ preemption, interrupts, or whatever are completely futile.
+ This is most obvious in preemptible user-level
+ environments and in virtualized environments (where
+ a given guest OS's VCPUs can be preempted at any time by
+ the underlying hypervisor), but can also happen in bare-metal
+ environments due to ECC errors, NMIs, and other hardware
+ events.
+ Although a delay of more than about 20 seconds can result
+ in splats, the RCU implementation is obligated to use
+ algorithms that can tolerate extremely long delays, but where
+ &ldquo;extremely long&rdquo; is not long enough to allow
+ wrap-around when incrementing a 64-bit counter.
+<li> Both the compiler and the CPU can reorder memory accesses.
+ Where it matters, RCU must use compiler directives and
+ memory-barrier instructions to preserve ordering.
+<li> Conflicting writes to memory locations in any given cache line
+ will result in expensive cache misses.
+ Greater numbers of concurrent writes and more-frequent
+ concurrent writes will result in more dramatic slowdowns.
+ RCU is therefore obligated to use algorithms that have
+ sufficient locality to avoid significant performance and
+ scalability problems.
+<li> As a rough rule of thumb, only one CPU's worth of processing
+ may be carried out under the protection of any given exclusive
+ lock.
+ RCU must therefore use scalable locking designs.
+<li> Counters are finite, especially on 32-bit systems.
+ RCU's use of counters must therefore tolerate counter wrap,
+ or be designed such that counter wrap would take way more
+ time than a single system is likely to run.
+ An uptime of ten years is quite possible, a runtime
+ of a century much less so.
+ As an example of the latter, RCU's dyntick-idle nesting counter
+ allows 54 bits for interrupt nesting level (this counter
+ is 64 bits even on a 32-bit system).
+ Overflowing this counter requires 2<sup>54</sup>
+ half-interrupts on a given CPU without that CPU ever going idle.
+ If a half-interrupt happened every microsecond, it would take
+ 570 years of runtime to overflow this counter, which is currently
+ believed to be an acceptably long time.
+<li> Linux systems can have thousands of CPUs running a single
+ Linux kernel in a single shared-memory environment.
+ RCU must therefore pay close attention to high-end scalability.
+</ol>
+
+<p>
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life.
+The idea that Linux might scale to systems with thousands of CPUs would
+have been met with some skepticism in the 1990s, but these requirements
+would have otherwise have been unsurprising, even in the early 1990s.
+
+<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
+
+<p>
+These sections list quality-of-implementation requirements.
+Although an RCU implementation that ignores these requirements could
+still be used, it would likely be subject to limitations that would
+make it inappropriate for industrial-strength production use.
+Classes of quality-of-implementation requirements are as follows:
+
+<ol>
+<li> <a href="#Specialization">Specialization</a>
+<li> <a href="#Performance and Scalability">Performance and Scalability</a>
+<li> <a href="#Composability">Composability</a>
+<li> <a href="#Corner Cases">Corner Cases</a>
+</ol>
+
+<p>
+These classes is covered in the following sections.
+
+<h3><a name="Specialization">Specialization</a></h3>
+
+<p>
+RCU is and always has been intended primarily for read-mostly situations, as
+illustrated by the following figure.
+This means that RCU's read-side primitives are optimized, often at the
+expense of its update-side primitives.
+
+<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+
+<p>
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives.
+For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
+examples discussed earlier use RCU to protect readers and locking to
+coordinate updaters.
+However, the need extends much farther, requiring that a variety of
+synchronization primitives be legal within RCU read-side critical sections,
+including spinlocks, sequence locks, atomic operations, reference
+counters, and memory barriers.
+
+<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
+What about sleeping locks?
+<br><a href="#qq11answer">Answer</a>
+
+<p>
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode,
+with network routing being the poster child.
+Internet routing algorithms take significant time to propagate
+updates, so that by the time an update arrives at a given system,
+that system has been sending network traffic the wrong way for
+a considerable length of time.
+Having a few threads continue to send traffic the wrong way for a
+few more milliseconds is clearly not a problem: In the worst case,
+TCP retransmissions will eventually get the data where it needs to go.
+In general, when tracking the state of the universe outside of the
+computer, some level of inconsistency must be tolerated due to
+speed-of-light delays if nothing else.
+
+<p>
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veternarians might use heartbeat to determine
+whether or not a given cat was alive.
+But how long should they wait after the last heartbeat to decide that
+the cat is in fact dead?
+Waiting less than 400 milliseconds makes no sense because this would
+mean that a relaxed cat would be considered to cycle between death
+and life more than 100 times per minute.
+Moreover, just as with human beings, a cat's heart might stop for
+some period of time, so the exact wait period is a judgment call.
+One of our pair of veternarians might wait 30 seconds before pronouncing
+the cat dead, while the other might insist on waiting a full minute.
+The two veternarians would then disagree on the state of the cat during
+the final 30 seconds of the minute following the last heartbeat, as
+fancifully illustrated below:
+
+<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+
+<p>
+Interestingly enough, this same situation applies to hardware.
+When push comes to shove, how do we tell whether or not some
+external server has failed?
+We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time.
+Policy decisions can usually tolerate short
+periods of inconsistency.
+The policy was decided some time ago, and is only now being put into
+effect, so a few milliseconds of delay is normally inconsequential.
+
+<p>
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore
+ID to the corresponding in-kernel data structure is protected by RCU,
+but it is absolutely forbidden to update a semaphore that has just been
+removed.
+In the Linux kernel, this need for consistency is accommodated by acquiring
+spinlocks located in the in-kernel data structure from within
+the RCU read-side critical section, and this is indicated by the
+green box in the figure above.
+Many other techniques may be used, and are in fact used within the
+Linux kernel.
+
+<p>
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows
+the right mix of synchronization tools to be used for a given job.
+
+<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
+
+<p>
+Energy efficiency is a critical component of performance today,
+and Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs.
+I cannot claim that this requirement was premeditated.
+In fact, I learned of it during a telephone conversation in which I
+was given &ldquo;frank and open&rdquo; feedback on the importance
+of energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts.
+So much so that mere Linux-kernel-mailing-list posts are
+insufficient to vent their ire.
+
+<p>
+Memory consumption is not particularly important for in most
+situations, and has become decreasingly
+so as memory sizes have expanded and memory
+costs have plummeted.
+However, as I learned from Matt Mackall's
+<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
+efforts, memory footprint is critically important on single-CPU systems with
+non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
+<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
+was born.
+Josh Triplett has since taken over the small-memory banner with his
+<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
+project, which resulted in
+<a href="#Sleepable RCU">SRCU</a>
+becoming optional for those kernels not needing it.
+
+<p>
+The remaining performance requirements are, for the most part,
+unsurprising.
+For example, in keeping with RCU's read-side specialization,
+<tt>rcu_dereference()</tt> should have negligible overhead (for
+example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
+
+<p>
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have minimal overhead.
+In particular, they should not contain atomic read-modify-write
+operations, memory-barrier instructions, preemption disabling,
+interrupt disabling, or backwards branches.
+However, in the case where the RCU read-side critical section was preempted,
+<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
+This is why it is better to nest an RCU read-side critical section
+within a preempt-disable region than vice versa, at least in cases
+where that critical section is short enough to avoid unduly degrading
+real-time latencies.
+
+<p>
+The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
+optimized for throughput.
+It may therefore incur several milliseconds of latency in addition to
+the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+<tt>synchronize_rcu()</tt> are required to use batching optimizations
+so that they can be satisfied by a single underlying grace-period-wait
+operation.
+For example, in the Linux kernel, it is not unusual for a single
+grace-period-wait operation to serve more than
+<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
+of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
+overhead down to nearly zero.
+However, the grace-period optimization is also required to avoid
+measurable degradation of real-time scheduling and interrupt latencies.
+
+<p>
+In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
+latencies are unacceptable.
+In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
+instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short.
+There are currently no special latency requirements for
+<tt>synchronize_rcu_expedited()</tt> on large systems, but,
+consistent with the empirical nature of the RCU specification,
+that is subject to change.
+However, there most definitely are scalability requirements:
+A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
+CPUs should at least make reasonable forward progress.
+In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
+is permitted to impose modest degradation of real-time latency
+on non-idle online CPUs.
+That said, it will likely be necessary to take further steps to reduce this
+degradation, hopefully to roughly that of a scheduling-clock interrupt.
+
+<p>
+There are a number of situations where even
+<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
+latency is unacceptable.
+In these situations, the asynchronous <tt>call_rcu()</tt> can be
+used in place of <tt>synchronize_rcu()</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9 struct foo *p = container_of(rhp, struct foo, rh);
+10
+11 kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16 struct foo *p;
+17
+18 spin_lock(&amp;gp_lock);
+19 p = rcu_dereference(gp);
+20 if (!p) {
+21 spin_unlock(&amp;gp_lock);
+22 return false;
+23 }
+24 rcu_assign_pointer(gp, NULL);
+25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
+26 spin_unlock(&amp;gp_lock);
+27 return true;
+28 }
+</pre>
+</blockquote>
+
+<p>
+A definition of <tt>struct foo</tt> is finally needed, and appears
+on lines&nbsp;1-5.
+The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
+on line&nbsp;25, and will be invoked after the end of a subsequent
+grace period.
+This gets the same effect as <tt>remove_gp_synchronous()</tt>,
+but without forcing the updater to wait for a grace period to elapse.
+The <tt>call_rcu()</tt> function may be used in a number of
+situations where neither <tt>synchronize_rcu()</tt> nor
+<tt>synchronize_rcu_expedited()</tt> would be legal,
+including within preempt-disable code, <tt>local_bh_disable()</tt> code,
+interrupt-disable code, and interrupt handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+The callback function (<tt>remove_gp_cb()</tt> in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel,
+either within a real softirq handler or under the protection
+of <tt>local_bh_disable()</tt>.
+In both the Linux kernel and in userspace, it is bad practice to
+write an RCU callback function that takes too long.
+Long-running operations should be relegated to separate threads or
+(in the Linux kernel) workqueues.
+
+<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+<br><a href="#qq12answer">Answer</a>
+
+<p>
+However, all that <tt>remove_gp_cb()</tt> is doing is
+invoking <tt>kfree()</tt> on the data element.
+This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
+which allows &ldquo;fire and forget&rdquo; operation as shown below:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9 struct foo *p;
+10
+11 spin_lock(&amp;gp_lock);
+12 p = rcu_dereference(gp);
+13 if (!p) {
+14 spin_unlock(&amp;gp_lock);
+15 return false;
+16 }
+17 rcu_assign_pointer(gp, NULL);
+18 kfree_rcu(p, rh);
+19 spin_unlock(&amp;gp_lock);
+20 return true;
+21 }
+</pre>
+</blockquote>
+
+<p>
+Note that <tt>remove_gp_faf()</tt> simply invokes
+<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
+further attention to the subsequent grace period and <tt>kfree()</tt>.
+It is permissible to invoke <tt>kfree_rcu()</tt> from the same
+environments as for <tt>call_rcu()</tt>.
+Interestingly enough, DYNIX/ptx had the equivalents of
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
+<tt>synchronize_rcu()</tt>.
+This was due to the fact that RCU was not heavily used within DYNIX/ptx,
+so the very few places that needed something like
+<tt>synchronize_rcu()</tt> simply open-coded it.
+
+<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+<br><a href="#qq13answer">Answer</a>
+
+<p>
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks
+that can be carried out in the meantime?
+The polling-style <tt>get_state_synchronize_rcu()</tt> and
+<tt>cond_synchronize_rcu()</tt> functions may be used for this
+purpose, as shown below:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3 struct foo *p;
+ 4 unsigned long s;
+ 5
+ 6 spin_lock(&amp;gp_lock);
+ 7 p = rcu_access_pointer(gp);
+ 8 if (!p) {
+ 9 spin_unlock(&amp;gp_lock);
+10 return false;
+11 }
+12 rcu_assign_pointer(gp, NULL);
+13 spin_unlock(&amp;gp_lock);
+14 s = get_state_synchronize_rcu();
+15 do_something_while_waiting();
+16 cond_synchronize_rcu(s);
+17 kfree(p);
+18 return true;
+19 }
+</pre>
+</blockquote>
+
+<p>
+On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
+&ldquo;cookie&rdquo; from RCU,
+then line&nbsp;15 carries out other tasks,
+and finally, line&nbsp;16 returns immediately if a grace period has
+elapsed in the meantime, but otherwise waits as required.
+The need for <tt>get_state_synchronize_rcu</tt> and
+<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
+so it is too early to tell whether they will stand the test of time.
+
+<p>
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+<h3><a name="Composability">Composability</a></h3>
+
+<p>
+Composability has received much attention in recent years, perhaps in part
+due to the collision of multicore hardware with object-oriented techniques
+designed in single-threaded environments for single-threaded use.
+And in theory, RCU read-side critical sections may be composed, and in
+fact may be nested arbitrarily deeply.
+In practice, as with all real-world implementations of composable
+constructs, there are limitations.
+
+<p>
+Implementations of RCU for which <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> generate no code, such as
+Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
+nested arbitrarily deeply.
+After all, there is no overhead.
+Except that if all these instances of <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> are visible to the compiler,
+compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first.
+If the nesting is not visible to the compiler, as is the case with
+mutually recursive functions each in its own translation unit,
+stack overflow will result.
+If the nesting takes the form of loops, either the control variable
+will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
+Nevertheless, this class of RCU implementations is one
+of the most composable constructs in existence.
+
+<p>
+RCU implementations that explicitly track nesting depth
+are limited by the nesting-depth counter.
+For example, the Linux kernel's preemptible RCU limits nesting to
+<tt>INT_MAX</tt>.
+This should suffice for almost all practical purposes.
+That said, a consecutive pair of RCU read-side critical sections
+between which there is an operation that waits for a grace period
+cannot be enclosed in another RCU read-side critical section.
+This is because it is not legal to wait for a grace period within
+an RCU read-side critical section: To do so would result either
+in deadlock or
+in RCU implicitly splitting the enclosing RCU read-side critical
+section, neither of which is conducive to a long-lived and prosperous
+kernel.
+
+<p>
+It is worth noting that RCU is not alone in limiting composability.
+For example, many transactional-memory implementations prohibit
+composing a pair of transactions separated by an irrevocable
+operation (for example, a network receive operation).
+For another example, lock-based critical sections can be composed
+surprisingly freely, but only if deadlock is avoided.
+
+<p>
+In short, although RCU read-side critical sections are highly composable,
+care is required in some situations, just as is the case for any other
+composable synchronization mechanism.
+
+<h3><a name="Corner Cases">Corner Cases</a></h3>
+
+<p>
+A given RCU workload might have an endless and intense stream of
+RCU read-side critical sections, perhaps even so intense that there
+was never a point in time during which there was not at least one
+RCU read-side critical section in flight.
+RCU cannot allow this situation to block grace periods: As long as
+all the RCU read-side critical sections are finite, grace periods
+must also be finite.
+
+<p>
+That said, preemptible RCU implementations could potentially result
+in RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side
+critical section.
+This situation can arise only in heavily loaded systems, but systems using
+real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case.
+That said, the exact requirements on RCU priority boosting will likely
+evolve as more experience accumulates.
+
+<p>
+Other workloads might have very high update rates.
+Although one can argue that such workloads should instead use
+something other than RCU, the fact remains that RCU must
+handle such workloads gracefully.
+This requirement is another factor driving batching of grace periods,
+but it is also the driving force behind the checks for large numbers
+of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
+Finally, high update rates should not delay RCU read-side critical
+sections, although some read-side delays can occur when using
+<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
+of <tt>try_stop_cpus()</tt>.
+(In the future, <tt>synchronize_rcu_expedited()</tt> will be
+converted to use lighter-weight inter-processor interrupts (IPIs),
+but this will still disturb readers, though to a much smaller degree.)
+
+<p>
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
+in a tight loop
+in the early 2000s suddenly provided a much deeper appreciation of the
+high-update-rate corner case.
+This test also motivated addition of some RCU code to react to high update
+rates, for example, if a given CPU finds itself with more than 10,000
+RCU callbacks queued, it will cause RCU to take evasive action by
+more aggressively starting grace periods and more aggressively forcing
+completion of grace-period processing.
+This evasive action causes the grace period to complete more quickly,
+but at the cost of restricting RCU's batching optimizations, thus
+increasing the CPU overhead incurred by that grace period.
+
+<h2><a name="Software-Engineering Requirements">
+Software-Engineering Requirements</a></h2>
+
+<p>
+Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
+guard against mishaps and misuse:
+
+<ol>
+<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
+ everywhere that it is needed, so kernels built with
+ <tt>CONFIG_PROVE_RCU=y</tt> will spat if
+ <tt>rcu_dereference()</tt> is used outside of an
+ RCU read-side critical section.
+ Update-side code can use <tt>rcu_dereference_protected()</tt>,
+ which takes a
+ <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
+ to indicate what is providing the protection.
+ If the indicated protection is not provided, a lockdep splat
+ is emitted.
+
+ <p>
+ Code shared between readers and updaters can use
+ <tt>rcu_dereference_check()</tt>, which also takes a
+ lockdep expression, and emits a lockdep splat if neither
+ <tt>rcu_read_lock()</tt> nor the indicated protection
+ is in place.
+ In addition, <tt>rcu_dereference_raw()</tt> is used in those
+ (hopefully rare) cases where the required protection cannot
+ be easily described.
+ Finally, <tt>rcu_read_lock_held()</tt> is provided to
+ allow a function to verify that it has been invoked within
+ an RCU read-side critical section.
+ I was made aware of this set of requirements shortly after Thomas
+ Gleixner audited a number of RCU uses.
+<li> A given function might wish to check for RCU-related preconditions
+ upon entry, before using any other RCU API.
+ The <tt>rcu_lockdep_assert()</tt> does this job,
+ asserting the expression in kernels having lockdep enabled
+ and doing nothing otherwise.
+<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
+ and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
+ substituting a simple assignment.
+ To catch this sort of error, a given RCU-protected pointer may be
+ tagged with <tt>__rcu</tt>, after which running sparse
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
+ about simple-assignment accesses to that pointer.
+ Arnd Bergmann made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lwn.net/Articles/376011/">patch series</a>.
+<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
+ will splat if a data element is passed to <tt>call_rcu()</tt>
+ twice in a row, without a grace period in between.
+ (This error is similar to a double free.)
+ The corresponding <tt>rcu_head</tt> structures that are
+ dynamically allocated are automatically tracked, but
+ <tt>rcu_head</tt> structures allocated on the stack
+ must be initialized with <tt>init_rcu_head_on_stack()</tt>
+ and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
+ Similarly, statically allocated non-stack <tt>rcu_head</tt>
+ structures must be initialized with <tt>init_rcu_head()</tt>
+ and cleaned up with <tt>destroy_rcu_head()</tt>.
+ Mathieu Desnoyers made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
+<li> An infinite loop in an RCU read-side critical section will
+ eventually trigger an RCU CPU stall warning splat, with
+ the duration of &ldquo;eventually&rdquo; being controlled by the
+ <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
+ alternatively, by the
+ <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
+ parameter.
+ However, RCU is not obligated to produce this splat
+ unless there is a grace period waiting on that particular
+ RCU read-side critical section.
+ <p>
+ Some extreme workloads might intentionally delay
+ RCU grace periods, and systems running those workloads can
+ be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
+ to suppress the splats.
+ This kernel parameter may also be set via <tt>sysfs</tt>.
+ Furthermore, RCU CPU stall warnings are counter-productive
+ during sysrq dumps and during panics.
+ RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
+ <tt>rcu_sysrq_end()</tt> API members to be called before
+ and after long sysrq dumps.
+ RCU also supplies the <tt>rcu_panic()</tt> notifier that is
+ automatically invoked at the beginning of a panic to suppress
+ further RCU CPU stall warnings.
+
+ <p>
+ This requirement made itself known in the early 1990s, pretty
+ much the first time that it was necessary to debug a CPU stall.
+ That said, the initial implementation in DYNIX/ptx was quite
+ generic in comparison with that of Linux.
+<li> Although it would be very good to detect pointers leaking out
+ of RCU read-side critical sections, there is currently no
+ good way of doing this.
+ One complication is the need to distinguish between pointers
+ leaking and pointers that have been handed off from RCU to
+ some other synchronization mechanism, for example, reference
+ counting.
+<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
+ information is provided via both debugfs and event tracing.
+<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
+ <tt>rcu_dereference()</tt> to create typical linked
+ data structures can be surprisingly error-prone.
+ Therefore, RCU-protected
+ <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
+ and, more recently, RCU-protected
+ <a href="https://lwn.net/Articles/612100/">hash tables</a>
+ are available.
+ Many other special-purpose RCU-protected data structures are
+ available in the Linux kernel and the userspace RCU library.
+<li> Some linked structures are created at compile time, but still
+ require <tt>__rcu</tt> checking.
+ The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
+ purpose.
+<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
+ when creating linked structures that are to be published via
+ a single external pointer.
+ The <tt>RCU_INIT_POINTER()</tt> macro is provided for
+ this task and also for assigning <tt>NULL</tt> pointers
+ at runtime.
+</ol>
+
+<p>
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found
+in real-world RCU usage.
+
+<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
+
+<p>
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU.
+Some of the relevant points of interest are as follows:
+
+<ol>
+<li> <a href="#Configuration">Configuration</a>.
+<li> <a href="#Firmware Interface">Firmware Interface</a>.
+<li> <a href="#Early Boot">Early Boot</a>.
+<li> <a href="#Interrupts and NMIs">
+ Interrupts and non-maskable interrupts (NMIs)</a>.
+<li> <a href="#Loadable Modules">Loadable Modules</a>.
+<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
+<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
+<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
+<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
+<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
+<li> <a href="#Performance, Scalability, Response Time, and Reliability">
+ Performance, Scalability, Response Time, and Reliability</a>.
+</ol>
+
+<p>
+This list is probably incomplete, but it does give a feel for the
+most notable Linux-kernel complications.
+Each of the following sections covers one of the above topics.
+
+<h3><a name="Configuration">Configuration</a></h3>
+
+<p>
+RCU's goal is automatic configuration, so that almost nobody
+needs to worry about RCU's <tt>Kconfig</tt> options.
+And for almost all users, RCU does in fact work well
+&ldquo;out of the box.&rdquo;
+
+<p>
+However, there are specialized use cases that are handled by
+kernel boot parameters and <tt>Kconfig</tt> options.
+Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
+about new <tt>Kconfig</tt> options, which requires almost all of them
+be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
+
+<p>
+This all should be quite obvious, but the fact remains that
+Linus Torvalds recently had to
+<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
+me of this requirement.
+
+<h3><a name="Firmware Interface">Firmware Interface</a></h3>
+
+<p>
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation.
+Or the translation is accurate, but the original message is bogus.
+
+<p>
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor.
+If RCU naively believed the firmware, as it used to do,
+it would create too many per-CPU kthreads.
+Although the resulting system will still run correctly, the extra
+kthreads needlessly consume memory and can cause confusion
+when they show up in <tt>ps</tt> listings.
+
+<p>
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists.
+The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
+come online) cause a number of
+<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
+
+<h3><a name="Early Boot">Early Boot</a></h3>
+
+<p>
+The Linux kernel's boot sequence is an interesting process,
+and RCU is used early, even before <tt>rcu_init()</tt>
+is invoked.
+In fact, a number of RCU's primitives can be used as soon as the
+initial task's <tt>task_struct</tt> is available and the
+boot CPU's per-CPU variables are set up.
+The read-side primitives (<tt>rcu_read_lock()</tt>,
+<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
+and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
+as will <tt>rcu_assign_pointer()</tt>.
+
+<p>
+Although <tt>call_rcu()</tt> may be invoked at any
+time during boot, callbacks are not guaranteed to be invoked until after
+the scheduler is fully up and running.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full initialization
+cannot occur until after the scheduler has initialized itself to the
+point where RCU can spawn and run its kthreads.
+In theory, it would be possible to invoke callbacks earlier,
+however, this is not a panacea because there would be severe restrictions
+on what operations those callbacks could invoke.
+
+<p>
+Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
+<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
+(<a href="#Bottom-Half Flavor">discussed below</a>),
+and
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+will all operate normally
+during very early boot, the reason being that there is only one CPU
+and preemption is disabled.
+This means that the call <tt>synchronize_rcu()</tt> (or friends)
+itself is a quiescent
+state and thus a grace period, so the early-boot implementation can
+be a no-op.
+
+<p>
+Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
+continue to operate normally through the remainder of boot, courtesy
+of the fact that preemption is disabled across their RCU read-side
+critical sections and also courtesy of the fact that there is still
+only one CPU.
+However, once the scheduler starts initializing, preemption is enabled.
+There is still only a single CPU, but the fact that preemption is enabled
+means that the no-op implementation of <tt>synchronize_rcu()</tt> no
+longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
+Therefore, as soon as the scheduler starts initializing, the early-boot
+fastpath is disabled.
+This means that <tt>synchronize_rcu()</tt> switches to its runtime
+mode of operation where it posts callbacks, which in turn means that
+any call to <tt>synchronize_rcu()</tt> will block until the corresponding
+callback is invoked.
+Unfortunately, the callback cannot be invoked until RCU's runtime
+grace-period machinery is up and running, which cannot happen until
+the scheduler has initialized itself sufficiently to allow RCU's
+kthreads to be spawned.
+Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
+initialization can result in deadlock.
+
+<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+<br><a href="#qq14answer">Answer</a>
+
+<p>
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
+
+<p>
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions
+of code, as are invocations of <tt>call_rcu()</tt>.
+
+<p>
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead stealthily
+transitioning back to process context.
+This trick is sometimes used to invoke system calls from inside the kernel.
+These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
+about how it counts interrupt nesting levels.
+I learned of this requirement the hard way during a rewrite
+of RCU's dyntick-idle code.
+
+<p>
+The Linux kernel has non-maskable interrupts (NMIs), and
+RCU read-side critical sections are legal within NMI handlers.
+Thankfully, RCU update-side primitives, including
+<tt>call_rcu()</tt>, are prohibited within NMI handlers.
+
+<p>
+The name notwithstanding, some Linux-kernel architectures
+can have nested NMIs, which RCU must handle correctly.
+Andy Lutomirski
+<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
+with this requirement;
+he also kindly surprised me with
+<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
+that meets this requirement.
+
+<h3><a name="Loadable Modules">Loadable Modules</a></h3>
+
+<p>
+The Linux kernel has loadable modules, and these modules can
+also be unloaded.
+After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault.
+The module-unload functions must therefore cancel any
+delayed calls to loadable-module functions, for example,
+any outstanding <tt>mod_timer()</tt> must be dealt with
+via <tt>del_timer_sync()</tt> or similar.
+
+<p>
+Unfortunately, there is no way to cancel an RCU callback;
+once you invoke <tt>call_rcu()</tt>, the callback function is
+going to eventually be invoked, unless the system goes down first.
+Because it is normally considered socially irresponsible to crash the system
+in response to a module unload request, we need some other way
+to deal with in-flight RCU callbacks.
+
+<p>
+RCU therefore provides
+<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
+which waits until all in-flight RCU callbacks have been invoked.
+If a module uses <tt>call_rcu()</tt>, its exit function should therefore
+prevent any future invocation of <tt>call_rcu()</tt>, then invoke
+<tt>rcu_barrier()</tt>.
+In theory, the underlying module-unload code could invoke
+<tt>rcu_barrier()</tt> unconditionally, but in practice this would
+incur unacceptable latencies.
+
+<p>
+Nikita Danilov noted this requirement for an analogous filesystem-unmount
+situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
+The need for <tt>rcu_barrier()</tt> for module unloading became
+apparent later.
+
+<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
+
+<p>
+The Linux kernel supports CPU hotplug, which means that CPUs
+can come and go.
+It is of course illegal to use any RCU API member from an offline CPU.
+This requirement was present from day one in DYNIX/ptx, but
+on the other hand, the Linux kernel's CPU-hotplug implementation
+is &ldquo;interesting.&rdquo;
+
+<p>
+The Linux-kernel CPU-hotplug implementation has notifiers that
+are used to allow the various kernel subsystems (including RCU)
+to respond appropriately to a given CPU-hotplug operation.
+Most RCU operations may be invoked from CPU-hotplug notifiers,
+including even normal synchronous grace-period operations
+such as <tt>synchronize_rcu()</tt>.
+However, expedited grace-period operations such as
+<tt>synchronize_rcu_expedited()</tt> are not supported,
+due to the fact that current implementations block CPU-hotplug
+operations, which could result in deadlock.
+
+<p>
+In addition, all-callback-wait operations such as
+<tt>rcu_barrier()</tt> are also not supported, due to the
+fact that there are phases of CPU-hotplug operations where
+the outgoing CPU's callbacks will not be invoked until after
+the CPU-hotplug operation ends, which could also result in deadlock.
+
+<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
+
+<p>
+RCU depends on the scheduler, and the scheduler uses RCU to
+protect some of its data structures.
+This means the scheduler is forbidden from acquiring
+the runqueue locks and the priority-inheritance locks
+in the middle of an outermost RCU read-side critical section unless either
+(1)&nbsp;it releases them before exiting that same
+RCU read-side critical section, or
+(2)&nbsp;interrupts are disabled across
+that entire RCU read-side critical section.
+This same prohibition also applies (recursively!) to any lock that is acquired
+while holding any lock to which this prohibition applies.
+Adhering to this rule prevents preemptible RCU from invoking
+<tt>rcu_read_unlock_special()</tt> while either runqueue or
+priority-inheritance locks are held, thus avoiding deadlock.
+
+<p>
+Prior to v4.4, it was only necessary to disable preemption across
+RCU read-side critical sections that acquired scheduler locks.
+In v4.4, expedited grace periods started using IPIs, and these
+IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
+Therefore, this expedited-grace-period change required disabling of
+interrupts, not just preemption.
+
+<p>
+For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must be written carefully to avoid similar deadlocks.
+In particular, <tt>rcu_read_unlock()</tt> must tolerate an
+interrupt where the interrupt handler invokes both
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+This possibility requires <tt>rcu_read_unlock()</tt> to use
+negative nesting levels to avoid destructive recursion via
+interrupt handler's use of RCU.
+
+<p>
+This pair of mutual scheduler-RCU requirements came as a
+<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
+
+<p>
+As noted above, RCU makes use of kthreads, and it is necessary to
+avoid excessive CPU-time accumulation by these kthreads.
+This requirement was no surprise, but RCU's violation of it
+when running context-switch-heavy workloads when built with
+<tt>CONFIG_NO_HZ_FULL=y</tt>
+<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
+RCU has made good progress towards meeting this requirement, even
+for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+but there is room for further improvement.
+
+<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
+
+<p>
+It is possible to use tracing on RCU code, but tracing itself
+uses RCU.
+For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+is provided for use by tracing, which avoids the destructive
+recursion that could otherwise ensue.
+This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing
+cannot be used.
+The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
+
+<p>
+Interrupting idle CPUs is considered socially unacceptable,
+especially by people with battery-powered embedded systems.
+RCU therefore conserves energy by detecting which CPUs are
+idle, including tracking CPUs that have been interrupted from idle.
+This is a large part of the energy-efficiency requirement,
+so I learned of this via an irate phone call.
+
+<p>
+Because RCU avoids interrupting idle CPUs, it is illegal to
+execute an RCU read-side critical section on an idle CPU.
+(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
+if you try it.)
+The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
+event tracing is provided to work around this restriction.
+In addition, <tt>rcu_is_watching()</tt> may be used to
+test whether or not it is currently legal to run RCU read-side
+critical sections on this CPU.
+I learned of the need for diagnostics on the one hand
+and <tt>RCU_NONIDLE()</tt> on the other while inspecting
+idle-loop code.
+Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
+which is used quite heavily in the idle loop.
+
+<p>
+It is similarly socially unacceptable to interrupt an
+<tt>nohz_full</tt> CPU running in userspace.
+RCU must therefore track <tt>nohz_full</tt> userspace
+execution.
+And in
+<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
+kernels, RCU must separately track idle CPUs on the one hand and
+CPUs that are either idle or executing in userspace on the other.
+In both cases, RCU must be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent
+any time idle and/or executing in userspace.
+
+<p>
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of
+which was finally able to demonstrate
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
+As noted earlier,
+I learned of many of these requirements via angry phone calls:
+Flaming me on the Linux-kernel mailing list was apparently not
+sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
+
+<p>
+Although small-memory non-realtime systems can simply use Tiny RCU,
+code size is only one aspect of memory efficiency.
+Another aspect is the size of the <tt>rcu_head</tt> structure
+used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
+Although this structure contains nothing more than a pair of pointers,
+it does appear in many RCU-protected data structures, including
+some that are size critical.
+The <tt>page</tt> structure is a case in point, as evidenced by
+the many occurrences of the <tt>union</tt> keyword within that structure.
+
+<p>
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the <tt>rcu_head</tt> structures that
+are waiting for a grace period to elapse.
+It is also the reason why <tt>rcu_head</tt> structures do not contain
+debug information, such as fields tracking the file and line of the
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
+Although this information might appear in debug-only kernel builds at some
+point, in the meantime, the <tt>-&gt;func</tt> field will often provide
+the needed debug information.
+
+<p>
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures.
+Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
+shares storage with a great many other structures that are used at
+various points in the corresponding page's lifetime.
+In order to correctly resolve certain
+<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
+the Linux kernel's memory-management subsystem needs a particular bit
+to remain zero during all phases of grace-period processing,
+and that bit happens to map to the bottom bit of the
+<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
+RCU makes this guarantee as long as <tt>call_rcu()</tt>
+is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
+or some future &ldquo;lazy&rdquo;
+variant of <tt>call_rcu()</tt> that might one day be created for
+energy-efficiency purposes.
+
+<h3><a name="Performance, Scalability, Response Time, and Reliability">
+Performance, Scalability, Response Time, and Reliability</a></h3>
+
+<p>
+Expanding on the
+<a href="#Performance and Scalability">earlier discussion</a>,
+RCU is used heavily by hot code paths in performance-critical
+portions of the Linux kernel's networking, security, virtualization,
+and scheduling code paths.
+RCU must therefore use efficient implementations, especially in its
+read-side primitives.
+To that end, it would be good if preemptible RCU's implementation
+of <tt>rcu_read_lock()</tt> could be inlined, however, doing
+this requires resolving <tt>#include</tt> issues with the
+<tt>task_struct</tt> structure.
+
+<p>
+The Linux kernel supports hardware configurations with up to
+4096 CPUs, which means that RCU must be extremely scalable.
+Algorithms that involve frequent acquisitions of global locks or
+frequent atomic operations on global variables simply cannot be
+tolerated within the RCU implementation.
+RCU therefore makes heavy use of a combining tree based on the
+<tt>rcu_node</tt> structure.
+RCU is required to tolerate all CPUs continuously invoking any
+combination of RCU's runtime primitives with minimal per-operation
+overhead.
+In fact, in many cases, increasing load must <i>decrease</i> the
+per-operation overhead, witness the batching optimizations for
+<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
+<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
+As a general rule, RCU must cheerfully accept whatever the
+rest of the Linux kernel decides to throw at it.
+
+<p>
+The Linux kernel is used for real-time workloads, especially
+in conjunction with the
+<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
+The real-time-latency response requirements are such that the
+traditional approach of disabling preemption across RCU
+read-side critical sections is inappropriate.
+Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
+use an RCU implementation that allows RCU read-side critical
+sections to be preempted.
+This requirement made its presence known after users made it
+clear that an earlier
+<a href="https://lwn.net/Articles/107930/">real-time patch</a>
+did not meet their needs, in conjunction with some
+<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
+encountered by a very early version of the -rt patchset.
+
+<p>
+In addition, RCU must make do with a sub-100-microsecond real-time latency
+budget.
+In fact, on smaller systems with the -rt patchset, the Linux kernel
+provides sub-20-microsecond real-time latencies for the whole kernel,
+including RCU.
+RCU's scalability and latency must therefore be sufficient for
+these sorts of configurations.
+To my surprise, the sub-100-microsecond real-time latency budget
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
+applies to even the largest systems [PDF]</a>,
+up to and including systems with 4096 CPUs.
+This real-time requirement motivated the grace-period kthread, which
+also simplified handling of a number of race conditions.
+
+<p>
+Finally, RCU's status as a synchronization primitive means that
+any RCU failure can result in arbitrary memory corruption that can be
+extremely difficult to debug.
+This means that RCU must be extremely reliable, which in
+practice also means that RCU must have an aggressive stress-test
+suite.
+This stress-test suite is called <tt>rcutorture</tt>.
+
+<p>
+Although the need for <tt>rcutorture</tt> was no surprise,
+the current immense popularity of the Linux kernel is posing
+interesting&mdash;and perhaps unprecedented&mdash;validation
+challenges.
+To see this, keep in mind that there are well over one billion
+instances of the Linux kernel running today, given Android
+smartphones, Linux-powered televisions, and servers.
+This number can be expected to increase sharply with the advent of
+the celebrated Internet of Things.
+
+<p>
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime.
+This bug will be occurring about three times per <i>day</i> across
+the installed base.
+RCU could simply hide behind hardware error rates, given that no one
+should really expect their smartphone to last for a million years.
+However, anyone taking too much comfort from this thought should
+consider the fact that in most jurisdictions, a successful multi-year
+test of a given mechanism, which might include a Linux kernel,
+suffices for a number of types of safety-critical certifications.
+In fact, rumor has it that the Linux kernel is already being used
+in production for safety-critical applications.
+I don't know about you, but I would feel quite bad if a bug in RCU
+killed someone.
+Which might explain my recent focus on validation and verification.
+
+<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
+
+<p>
+One of the more surprising things about RCU is that there are now
+no fewer than five <i>flavors</i>, or API families.
+In addition, the primary flavor that has been the sole focus up to
+this point has two different implementations, non-preemptible and
+preemptible.
+The other four flavors are listed below, with requirements for each
+described in a separate section.
+
+<ol>
+<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
+<li> <a href="#Sched Flavor">Sched Flavor</a>
+<li> <a href="#Sleepable RCU">Sleepable RCU</a>
+<li> <a href="#Tasks RCU">Tasks RCU</a>
+</ol>
+
+<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
+
+<p>
+The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
+hence the &ldquo;_bh&rdquo; abbreviations)
+flavor of RCU, or <i>RCU-bh</i>, was developed by
+Dipankar Sarma to provide a flavor of RCU that could withstand the
+network-based denial-of-service attacks researched by Robert
+Olsson.
+These attacks placed so much networking load on the system
+that some of the CPUs never exited softirq execution,
+which in turn prevented those CPUs from ever executing a context switch,
+which, in the RCU implementation of that time, prevented grace periods
+from ever ending.
+The result was an out-of-memory condition and a system hang.
+
+<p>
+The solution was the creation of RCU-bh, which does
+<tt>local_bh_disable()</tt>
+across its read-side critical sections, and which uses the transition
+from one type of softirq processing to another as a quiescent state
+in addition to context switch, idle, user mode, and offline.
+This means that RCU-bh grace periods can complete even when some of
+the CPUs execute in softirq indefinitely, thus allowing algorithms
+based on RCU-bh to withstand network-based denial-of-service attacks.
+
+<p>
+Because
+<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
+disable and re-enable softirq handlers, any attempt to start a softirq
+handlers during the
+RCU-bh read-side critical section will be deferred.
+In this case, <tt>rcu_read_unlock_bh()</tt>
+will invoke softirq processing, which can take considerable time.
+One can of course argue that this softirq overhead should be associated
+with the code following the RCU-bh read-side critical section rather
+than <tt>rcu_read_unlock_bh()</tt>, but the fact
+is that most profiling tools cannot be expected to make this sort
+of fine distinction.
+For example, suppose that a three-millisecond-long RCU-bh read-side
+critical section executes during a time of heavy networking load.
+There will very likely be an attempt to invoke at least one softirq
+handler during that three milliseconds, but any such invocation will
+be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
+This can of course make it appear at first glance as if
+<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
+includes
+<tt>rcu_read_lock_bh()</tt>,
+<tt>rcu_read_unlock_bh()</tt>,
+<tt>rcu_dereference_bh()</tt>,
+<tt>rcu_dereference_bh_check()</tt>,
+<tt>synchronize_rcu_bh()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>,
+<tt>call_rcu_bh()</tt>,
+<tt>rcu_barrier_bh()</tt>, and
+<tt>rcu_read_lock_bh_held()</tt>.
+
+<h3><a name="Sched Flavor">Sched Flavor</a></h3>
+
+<p>
+Before preemptible RCU, waiting for an RCU grace period had the
+side effect of also waiting for all pre-existing interrupt
+and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that
+do not have this property, given that any point in the code outside
+of an RCU read-side critical section can be a quiescent state.
+Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
+RCU in that an RCU-sched grace period waits for for pre-existing
+interrupt and NMI handlers.
+In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
+APIs have identical implementations, while kernels built with
+<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
+
+<p>
+Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
+<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
+disable and re-enable preemption, respectively.
+This means that if there was a preemption attempt during the
+RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
+will enter the scheduler, with all the latency and overhead entailed.
+Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
+as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
+However, the highest-priority task won't be preempted, so that task
+will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
+includes
+<tt>rcu_read_lock_sched()</tt>,
+<tt>rcu_read_unlock_sched()</tt>,
+<tt>rcu_read_lock_sched_notrace()</tt>,
+<tt>rcu_read_unlock_sched_notrace()</tt>,
+<tt>rcu_dereference_sched()</tt>,
+<tt>rcu_dereference_sched_check()</tt>,
+<tt>synchronize_sched()</tt>,
+<tt>synchronize_rcu_sched_expedited()</tt>,
+<tt>call_rcu_sched()</tt>,
+<tt>rcu_barrier_sched()</tt>, and
+<tt>rcu_read_lock_sched_held()</tt>.
+However, anything that disables preemption also marks an RCU-sched
+read-side critical section, including
+<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
+<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
+and so on.
+
+<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
+
+<p>
+For well over a decade, someone saying &ldquo;I need to block within
+an RCU read-side critical section&rdquo; was a reliable indication
+that this someone did not understand RCU.
+After all, if you are always blocking in an RCU read-side critical
+section, you can probably afford to use a higher-overhead synchronization
+mechanism.
+However, that changed with the advent of the Linux kernel's notifiers,
+whose RCU read-side critical
+sections almost never sleep, but sometimes need to.
+This resulted in the introduction of
+<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
+or <i>SRCU</i>.
+
+<p>
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an <tt>srcu_struct</tt> structure.
+A pointer to this structure must be passed in to each SRCU function,
+for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
+<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
+The key benefit of these domains is that a slow SRCU reader in one
+domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code
+must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
+to <tt>srcu_read_unlock()</tt>, for example, as follows:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+As noted above, it is legal to block within SRCU read-side critical sections,
+however, with great power comes great responsibility.
+If you block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can block waiting, either directly or indirectly, for that domain's
+grace period to elapse.
+For example, this results in a self-deadlock:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 synchronize_srcu(&amp;ss);
+ 6 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+However, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
+deadlock would still be possible.
+Furthermore, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
+and if an <tt>ss1</tt>-domain SRCU read-side critical section
+acquired another mutex that was held across as <tt>ss</tt>-domain
+<tt>synchronize_srcu()</tt>,
+deadlock would again be possible.
+Such a deadlock cycle could extend across an arbitrarily large number
+of different SRCU domains.
+Again, with great power comes great responsibility.
+
+<p>
+Unlike the other RCU flavors, SRCU read-side critical sections can
+run on idle and even offline CPUs.
+This ability requires that <tt>srcu_read_lock()</tt> and
+<tt>srcu_read_unlock()</tt> contain memory barriers, which means
+that SRCU readers will run a bit slower than would RCU readers.
+It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
+API, which, in combination with <tt>srcu_read_unlock()</tt>,
+guarantees a full memory barrier.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
+includes
+<tt>srcu_read_lock()</tt>,
+<tt>srcu_read_unlock()</tt>,
+<tt>srcu_dereference()</tt>,
+<tt>srcu_dereference_check()</tt>,
+<tt>synchronize_srcu()</tt>,
+<tt>synchronize_srcu_expedited()</tt>,
+<tt>call_srcu()</tt>,
+<tt>srcu_barrier()</tt>, and
+<tt>srcu_read_lock_held()</tt>.
+It also includes
+<tt>DEFINE_SRCU()</tt>,
+<tt>DEFINE_STATIC_SRCU()</tt>, and
+<tt>init_srcu_struct()</tt>
+APIs for defining and initializing <tt>srcu_struct</tt> structures.
+
+<h3><a name="Tasks RCU">Tasks RCU</a></h3>
+
+<p>
+Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
+binary rewriting required to install different types of probes.
+It would be good to be able to free old trampolines, which sounds
+like a job for some form of RCU.
+However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers
+such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+In addition, it does not work to have these markers in the trampoline
+itself, because there would need to be instructions following
+<tt>rcu_read_unlock()</tt>.
+Although <tt>synchronize_rcu()</tt> would guarantee that execution
+reached the <tt>rcu_read_unlock()</tt>, it would not be able to
+guarantee that execution had completely left the trampoline.
+
+<p>
+The solution, in the form of
+<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
+is to have implicit
+read-side critical sections that are delimited by voluntary context
+switches, that is, calls to <tt>schedule()</tt>,
+<tt>cond_resched_rcu_qs()</tt>, and
+<tt>synchronize_rcu_tasks()</tt>.
+In addition, transitions to and from userspace execution also delimit
+tasks-RCU read-side critical sections.
+
+<p>
+The tasks-RCU API is quite compact, consisting only of
+<tt>call_rcu_tasks()</tt>,
+<tt>synchronize_rcu_tasks()</tt>, and
+<tt>rcu_barrier_tasks()</tt>.
+
+<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
+
+<p>
+One of the tricks that RCU uses to attain update-side scalability is
+to increase grace-period latency with increasing numbers of CPUs.
+If this becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+<p>
+Expedited grace periods scan the CPUs, so their latency and overhead
+increases with increasing numbers of CPUs.
+If this becomes a serious problem on large systems, it will be necessary
+to do some redesign to avoid this scalability problem.
+
+<p>
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+expedited grace-period and <tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use expedited grace periods in CPU-hotplug
+notifiers, it will be necessary to avoid disabling CPU hotplug.
+This would introduce some complexity, so there had better be a <i>very</i>
+good reason.
+
+<p>
+The tradeoff between grace-period latency on the one hand and interruptions
+of other CPUs on the other hand may need to be re-examined.
+The desire is of course for zero grace-period latency as well as zero
+interprocessor interrupts undertaken during an expedited grace period
+operation.
+While this ideal is unlikely to be achievable, it is quite possible that
+further improvements can be made.
+
+<p>
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such
+as sockets or cores.
+Such spreading and alignment is currently believed to be unnecessary
+because the hotpath read-side primitives do not access the combining
+tree, nor does <tt>call_rcu()</tt> in the common case.
+If you believe that your architecture needs such spreading and alignment,
+then your architecture should also benefit from the
+<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
+to the number of CPUs in a socket, NUMA node, or whatever.
+If the number of CPUs is too large, use a fraction of the number of
+CPUs.
+If the number of CPUs is a large prime number, well, that certainly
+is an &ldquo;interesting&rdquo; architectural choice!
+More flexible arrangements might be considered, but only if
+<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
+if the inadequacy has been demonstrated by a carefully run and
+realistic system-level workload.
+
+<p>
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+<p>
+There is an embarrassingly large number of flavors of RCU, and this
+number has been increasing over time.
+Perhaps it will be possible to combine some at some future date.
+
+<p>
+RCU's various kthreads are reasonably recent additions.
+It is quite likely that adjustments will be required to more gracefully
+handle extreme loads.
+It might also be necessary to be able to relate CPU utilization by
+RCU's kthreads and softirq handlers to the code that instigated this
+CPU utilization.
+For example, RCU callback overhead might be charged back to the
+originating <tt>call_rcu()</tt> instance, though probably not
+in production kernels.
+
+<h2><a name="Summary">Summary</a></h2>
+
+<p>
+This document has presented more than two decade's worth of RCU
+requirements.
+Given that the requirements keep changing, this will not be the last
+word on this subject, but at least it serves to get an important
+subset of the requirements set forth.
+
+<h2><a name="Acknowledgments">Acknowledgments</a></h2>
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
+Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
+Andy Lutomirski for their help in rendering
+this article human readable, and to Michelle Rankin for her support
+of this effort.
+Other contributions are acknowledged in the Linux kernel's git archive.
+The cartoon is copyright (c) 2013 by Melissa Broussard,
+and is provided
+under the terms of the Creative Commons Attribution-Share Alike 3.0
+United States license.
+
+<h3><a name="Answers to Quick Quizzes">
+Answers to Quick Quizzes</a></h3>
+
+<a name="qq1answer"></a>
+<p><b>Quick Quiz 1</b>:
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+
+
+</p><p><b>Answer</b>:
+First, if updaters do not wish to be blocked by readers, they can use
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+be discussed later.
+Second, even when using <tt>synchronize_rcu()</tt>, the other
+update-side code does run concurrently with readers, whether pre-existing
+or not.
+
+
+</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
+
+<a name="qq2answer"></a>
+<p><b>Quick Quiz 2</b>:
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+
+
+</p><p><b>Answer</b>:
+Without that extra grace period, memory reordering could result in
+<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+concurrently with the last bits of <tt>recovery()</tt>.
+
+
+</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
+
+<a name="qq3answer"></a>
+<p><b>Quick Quiz 3</b>:
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+
+
+</p><p><b>Answer</b>:
+No, it cannot.
+The readers cannot see either of these two fields until
+the assignment to <tt>gp</tt>, by which time both fields are
+fully initialized.
+So reordering the assignments
+to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+cause any problems.
+
+
+</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
+
+<a name="qq4answer"></a>
+<p><b>Quick Quiz 4</b>:
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+
+
+</p><p><b>Answer</b>:
+Let's start with what happens to <tt>do_something_gp()</tt>
+if it fails to use <tt>rcu_dereference()</tt>.
+It could reuse a value formerly fetched from this same pointer.
+It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+mash-up of two distince pointer values.
+It might even use value-speculation optimizations, where it makes a wrong
+guess, but by the time it gets around to checking the value, an update
+has changed the pointer to match the wrong guess.
+Too bad about any dereferences that returned pre-initialization garbage
+in the meantime!
+
+<p>
+For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+the above optimizations are harmless.
+However,
+with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+<tt>sparse</tt> will complain if you
+define <tt>gp</tt> with <tt>__rcu</tt> and then
+access it without using
+either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+
+
+</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
+
+<a name="qq5answer"></a>
+<p><b>Quick Quiz 5</b>:
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+
+
+</p><p><b>Answer</b>:
+If RCU cannot tell whether or not a given
+RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>,
+then it must assume that the RCU read-side critical section
+started first.
+In other words, a given instance of <tt>synchronize_rcu()</tt>
+can avoid waiting on a given RCU read-side critical section only
+if it can prove that <tt>synchronize_rcu()</tt> started first.
+
+
+</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
+
+<a name="qq6answer"></a>
+<p><b>Quick Quiz 6</b>:
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+
+
+</p><p><b>Answer</b>:
+Yes, they really are required.
+To see why the first guarantee is required, consider the following
+sequence of events:
+
+<ol>
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>do_something_with(q-&gt;a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+</ol>
+
+<p>
+Therefore, there absolutely must be a full memory barrier between the
+end of the RCU read-side critical section and the end of the
+grace period.
+
+<p>
+The sequence of events demonstrating the necessity of the second rule
+is roughly similar:
+
+<ol>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+</ol>
+
+<p>
+And similarly, without a memory barrier between the beginning of the
+grace period and the beginning of the RCU read-side critical section,
+CPU&nbsp;1 might end up accessing the freelist.
+
+<p>
+The &ldquo;as if&rdquo; rule of course applies, so that any implementation
+that acts as if the appropriate memory barriers were in place is a
+correct implementation.
+That said, it is much easier to fool yourself into believing that you have
+adhered to the as-if rule than it is to actually adhere to it!
+
+
+</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
+
+<a name="qq7answer"></a>
+<p><b>Quick Quiz 7</b>:
+But how does the upgrade-to-write operation exclude other readers?
+
+
+</p><p><b>Answer</b>:
+It doesn't, just like normal RCU updates, which also do not exclude
+RCU readers.
+
+
+</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
+
+<a name="qq8answer"></a>
+<p><b>Quick Quiz 8</b>:
+Can't the compiler also reorder this code?
+
+
+</p><p><b>Answer</b>:
+No, the volatile casts in <tt>READ_ONCE()</tt> and
+<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+this particular case.
+
+
+</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
+
+<a name="qq9answer"></a>
+<p><b>Quick Quiz 9</b>:
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+
+
+</p><p><b>Answer</b>:
+No.
+Even if <tt>synchronize_rcu()</tt> were to wait until
+all readers had completed, a new reader might start immediately after
+<tt>synchronize_rcu()</tt> completed.
+Therefore, the code following
+<tt>synchronize_rcu()</tt> cannot rely on there being no readers
+in any case.
+
+
+</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
+
+<a name="qq10answer"></a>
+<p><b>Quick Quiz 10</b>:
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+
+
+</p><p><b>Answer</b>:
+In theory, an infinite number.
+In practice, an unknown number that is sensitive to both implementation
+details and timing considerations.
+Therefore, even in practice, RCU users must abide by the theoretical rather
+than the practical answer.
+
+
+</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
+
+<a name="qq11answer"></a>
+<p><b>Quick Quiz 11</b>:
+What about sleeping locks?
+
+
+</p><p><b>Answer</b>:
+These are forbidden within Linux-kernel RCU read-side critical sections
+because it is not legal to place a quiescent state (in this case,
+voluntary context switch) within an RCU read-side critical section.
+However, sleeping locks may be used within userspace RCU read-side critical
+sections, and also within Linux-kernel sleepable RCU
+<a href="#Sleepable RCU">(SRCU)</a>
+read-side critical sections.
+In addition, the -rt patchset turns spinlocks into a sleeping locks so
+that the corresponding critical sections can be preempted, which
+also means that these sleeplockified spinlocks (but not other sleeping locks!)
+may be acquire within -rt-Linux-kernel RCU read-side critical sections.
+
+<p>
+Note that it <i>is</i> legal for a normal RCU read-side critical section
+to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
+but only as long as it does not loop indefinitely attempting to
+conditionally acquire that sleeping locks.
+The key point is that things like <tt>mutex_trylock()</tt>
+either return with the mutex held, or return an error indication if
+the mutex was not immediately available.
+Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
+
+
+</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
+
+<a name="qq12answer"></a>
+<p><b>Quick Quiz 12</b>:
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+
+
+</p><p><b>Answer</b>:
+Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+any changes, including any insertions that <tt>rcu_dereference()</tt>
+would protect against.
+Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
+is released on line&nbsp;25, which in turn means that
+<tt>rcu_access_pointer()</tt> suffices.
+
+
+</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
+
+<a name="qq13answer"></a>
+<p><b>Quick Quiz 13</b>:
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+
+
+</p><p><b>Answer</b>:
+We could define things this way, but keep in mind that this sort of
+definition would say that updates in garbage-collected languages
+cannot complete until the next time the garbage collector runs,
+which does not seem at all reasonable.
+The key point is that in most cases, an updater using either
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+next update as soon as it has invoked <tt>call_rcu()</tt> or
+<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+grace period.
+
+
+</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
+
+<a name="qq14answer"></a>
+<p><b>Quick Quiz 14</b>:
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+
+
+</p><p><b>Answer</b>:
+In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+maps directly to <tt>synchronize_sched()</tt>.
+Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+during scheduler initialization.
+
+
+</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
+
+
+</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
new file mode 100644
index 000000000000..3a97ba490c42
--- /dev/null
+++ b/Documentation/RCU/Design/Requirements/Requirements.htmlx
@@ -0,0 +1,2741 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
+
+<h1>A Tour Through RCU's Requirements</h1>
+
+<p>Copyright IBM Corporation, 2015</p>
+<p>Author: Paul E.&nbsp;McKenney</p>
+<p><i>The initial version of this document appeared in the
+<a href="https://lwn.net/">LWN</a> articles
+<a href="https://lwn.net/Articles/652156/">here</a>,
+<a href="https://lwn.net/Articles/652677/">here</a>, and
+<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
+
+<h2>Introduction</h2>
+
+<p>
+Read-copy update (RCU) is a synchronization mechanism that is often
+used as a replacement for reader-writer locking.
+RCU is unusual in that updaters do not block readers,
+which means that RCU's read-side primitives can be exceedingly fast
+and scalable.
+In addition, updaters can make useful forward progress concurrently
+with readers.
+However, all this concurrency between RCU readers and updaters does raise
+the question of exactly what RCU readers are doing, which in turn
+raises the question of exactly what RCU's requirements are.
+
+<p>
+This document therefore summarizes RCU's requirements, and can be thought
+of as an informal, high-level specification for RCU.
+It is important to understand that RCU's specification is primarily
+empirical in nature;
+in fact, I learned about many of these requirements the hard way.
+This situation might cause some consternation, however, not only
+has this learning process been a lot of fun, but it has also been
+a great privilege to work with so many people willing to apply
+technologies in interesting new ways.
+
+<p>
+All that aside, here are the categories of currently known RCU requirements:
+</p>
+
+<ol>
+<li> <a href="#Fundamental Requirements">
+ Fundamental Requirements</a>
+<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
+<li> <a href="#Parallelism Facts of Life">
+ Parallelism Facts of Life</a>
+<li> <a href="#Quality-of-Implementation Requirements">
+ Quality-of-Implementation Requirements</a>
+<li> <a href="#Linux Kernel Complications">
+ Linux Kernel Complications</a>
+<li> <a href="#Software-Engineering Requirements">
+ Software-Engineering Requirements</a>
+<li> <a href="#Other RCU Flavors">
+ Other RCU Flavors</a>
+<li> <a href="#Possible Future Changes">
+ Possible Future Changes</a>
+</ol>
+
+<p>
+This is followed by a <a href="#Summary">summary</a>,
+which is in turn followed by the inevitable
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
+
+<p>
+RCU's fundamental requirements are the closest thing RCU has to hard
+mathematical requirements.
+These are:
+
+<ol>
+<li> <a href="#Grace-Period Guarantee">
+ Grace-Period Guarantee</a>
+<li> <a href="#Publish-Subscribe Guarantee">
+ Publish-Subscribe Guarantee</a>
+<li> <a href="#Memory-Barrier Guarantees">
+ Memory-Barrier Guarantees</a>
+<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
+ RCU Primitives Guaranteed to Execute Unconditionally</a>
+<li> <a href="#Guaranteed Read-to-Write Upgrade">
+ Guaranteed Read-to-Write Upgrade</a>
+</ol>
+
+<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
+
+<p>
+RCU's grace-period guarantee is unusual in being premeditated:
+Jack Slingwine and I had this guarantee firmly in mind when we started
+work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
+That said, the past two decades of experience with RCU have produced
+a much more detailed understanding of this guarantee.
+
+<p>
+RCU's grace-period guarantee allows updaters to wait for the completion
+of all pre-existing RCU read-side critical sections.
+An RCU read-side critical section
+begins with the marker <tt>rcu_read_lock()</tt> and ends with
+the marker <tt>rcu_read_unlock()</tt>.
+These markers may be nested, and RCU treats a nested set as one
+big RCU read-side critical section.
+Production-quality implementations of <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
+fact have exactly zero overhead in Linux kernels built for production
+use with <tt>CONFIG_PREEMPT=n</tt>.
+
+<p>
+This guarantee allows ordering to be enforced with extremely low
+overhead to readers, for example:
+
+<blockquote>
+<pre>
+ 1 int x, y;
+ 2
+ 3 void thread0(void)
+ 4 {
+ 5 rcu_read_lock();
+ 6 r1 = READ_ONCE(x);
+ 7 r2 = READ_ONCE(y);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 WRITE_ONCE(x, 1);
+14 synchronize_rcu();
+15 WRITE_ONCE(y, 1);
+16 }
+</pre>
+</blockquote>
+
+<p>
+Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
+all pre-existing readers, any instance of <tt>thread0()</tt> that
+loads a value of zero from <tt>x</tt> must complete before
+<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
+also load a value of zero from <tt>y</tt>.
+Similarly, any instance of <tt>thread0()</tt> that loads a value of
+one from <tt>y</tt> must have started after the
+<tt>synchronize_rcu()</tt> started, and must therefore also load
+a value of one from <tt>x</tt>.
+Therefore, the outcome:
+<blockquote>
+<pre>
+(r1 == 0 &amp;&amp; r2 == 1)
+</pre>
+</blockquote>
+cannot happen.
+
+<p>@@QQ@@
+Wait a minute!
+You said that updaters can make useful forward progress concurrently
+with readers, but pre-existing readers will block
+<tt>synchronize_rcu()</tt>!!!
+Just who are you trying to fool???
+<p>@@QQA@@
+First, if updaters do not wish to be blocked by readers, they can use
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+be discussed later.
+Second, even when using <tt>synchronize_rcu()</tt>, the other
+update-side code does run concurrently with readers, whether pre-existing
+or not.
+<p>@@QQE@@
+
+<p>
+This scenario resembles one of the first uses of RCU in
+<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
+which managed a distributed lock manager's transition into
+a state suitable for handling recovery from node failure,
+more or less as follows:
+
+<blockquote>
+<pre>
+ 1 #define STATE_NORMAL 0
+ 2 #define STATE_WANT_RECOVERY 1
+ 3 #define STATE_RECOVERING 2
+ 4 #define STATE_WANT_NORMAL 3
+ 5
+ 6 int state = STATE_NORMAL;
+ 7
+ 8 void do_something_dlm(void)
+ 9 {
+10 int state_snap;
+11
+12 rcu_read_lock();
+13 state_snap = READ_ONCE(state);
+14 if (state_snap == STATE_NORMAL)
+15 do_something();
+16 else
+17 do_something_carefully();
+18 rcu_read_unlock();
+19 }
+20
+21 void start_recovery(void)
+22 {
+23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
+24 synchronize_rcu();
+25 WRITE_ONCE(state, STATE_RECOVERING);
+26 recovery();
+27 WRITE_ONCE(state, STATE_WANT_NORMAL);
+28 synchronize_rcu();
+29 WRITE_ONCE(state, STATE_NORMAL);
+30 }
+</pre>
+</blockquote>
+
+<p>
+The RCU read-side critical section in <tt>do_something_dlm()</tt>
+works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
+to guarantee that <tt>do_something()</tt> never runs concurrently
+with <tt>recovery()</tt>, but with little or no synchronization
+overhead in <tt>do_something_dlm()</tt>.
+
+<p>@@QQ@@
+Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+<p>@@QQA@@
+Without that extra grace period, memory reordering could result in
+<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+concurrently with the last bits of <tt>recovery()</tt>.
+<p>@@QQE@@
+
+<p>
+In order to avoid fatal problems such as deadlocks,
+an RCU read-side critical section must not contain calls to
+<tt>synchronize_rcu()</tt>.
+Similarly, an RCU read-side critical section must not
+contain anything that waits, directly or indirectly, on completion of
+an invocation of <tt>synchronize_rcu()</tt>.
+
+<p>
+Although RCU's grace-period guarantee is useful in and of itself, with
+<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
+it would be good to be able to use RCU to coordinate read-side
+access to linked data structures.
+For this, the grace-period guarantee is not sufficient, as can
+be seen in function <tt>add_gp_buggy()</tt> below.
+We will look at the reader's code later, but in the meantime, just think of
+the reader as locklessly picking up the <tt>gp</tt> pointer,
+and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
+<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 gp = p; /* ORDERING BUG */
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The problem is that both the compiler and weakly ordered CPUs are within
+their rights to reorder this code as follows:
+
+<blockquote>
+<pre>
+ 1 bool add_gp_buggy_optimized(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+<b>11 gp = p; /* ORDERING BUG */
+12 p-&gt;a = a;
+13 p-&gt;b = a;</b>
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+If an RCU reader fetches <tt>gp</tt> just after
+<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
+it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
+fields.
+And this is but one of many ways in which compiler and hardware optimizations
+could cause trouble.
+Therefore, we clearly need some way to prevent the compiler and the CPU from
+reordering in this manner, which brings us to the publish-subscribe
+guarantee discussed in the next section.
+
+<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
+
+<p>
+RCU's publish-subscribe guarantee allows data to be inserted
+into a linked data structure without disrupting RCU readers.
+The updater uses <tt>rcu_assign_pointer()</tt> to insert the
+new data, and readers use <tt>rcu_dereference()</tt> to
+access data, whether new or old.
+The following shows an example of insertion:
+
+<blockquote>
+<pre>
+ 1 bool add_gp(int a, int b)
+ 2 {
+ 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
+ 4 if (!p)
+ 5 return -ENOMEM;
+ 6 spin_lock(&amp;gp_lock);
+ 7 if (rcu_access_pointer(gp)) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 p-&gt;a = a;
+12 p-&gt;b = a;
+13 rcu_assign_pointer(gp, p);
+14 spin_unlock(&amp;gp_lock);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
+equivalent to a simple assignment statement, but also guarantees
+that its assignment will
+happen after the two assignments in lines&nbsp;11 and&nbsp;12,
+similar to the C11 <tt>memory_order_release</tt> store operation.
+It also prevents any number of &ldquo;interesting&rdquo; compiler
+optimizations, for example, the use of <tt>gp</tt> as a scratch
+location immediately preceding the assignment.
+
+<p>@@QQ@@
+But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+from being reordered.
+Can't that also cause problems?
+<p>@@QQA@@
+No, it cannot.
+The readers cannot see either of these two fields until
+the assignment to <tt>gp</tt>, by which time both fields are
+fully initialized.
+So reordering the assignments
+to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+cause any problems.
+<p>@@QQE@@
+
+<p>
+It is tempting to assume that the reader need not do anything special
+to control its accesses to the RCU-protected data,
+as shown in <tt>do_something_gp_buggy()</tt> below:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+However, this temptation must be resisted because there are a
+surprisingly large number of ways that the compiler
+(to say nothing of
+<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
+can trip this code up.
+For but one example, if the compiler were short of registers, it
+might choose to refetch from <tt>gp</tt> rather than keeping
+a separate copy in <tt>p</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp_buggy_optimized(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
+<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
+ 6 rcu_read_unlock();
+ 7 return true;
+ 8 }
+ 9 rcu_read_unlock();
+10 return false;
+11 }
+</pre>
+</blockquote>
+
+<p>
+If this function ran concurrently with a series of updates that
+replaced the current structure with a new one,
+the fetches of <tt>gp-&gt;a</tt>
+and <tt>gp-&gt;b</tt> might well come from two different structures,
+which could cause serious confusion.
+To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
+<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
+
+<blockquote>
+<pre>
+ 1 bool do_something_gp(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 p = rcu_dereference(gp);
+ 5 if (p) {
+ 6 do_something(p-&gt;a, p-&gt;b);
+ 7 rcu_read_unlock();
+ 8 return true;
+ 9 }
+10 rcu_read_unlock();
+11 return false;
+12 }
+</pre>
+</blockquote>
+
+<p>
+The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
+memory barriers in the Linux kernel.
+Should a
+<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
+ever appear, then <tt>rcu_dereference()</tt> could be implemented
+as a <tt>memory_order_consume</tt> load.
+Regardless of the exact implementation, a pointer fetched by
+<tt>rcu_dereference()</tt> may not be used outside of the
+outermost RCU read-side critical section containing that
+<tt>rcu_dereference()</tt>, unless protection of
+the corresponding data element has been passed from RCU to some
+other synchronization mechanism, most commonly locking or
+<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
+
+<p>
+In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
+use <tt>rcu_dereference()</tt>, and these two RCU API elements
+work together to ensure that readers have a consistent view of
+newly added data elements.
+
+<p>
+Of course, it is also necessary to remove elements from RCU-protected
+data structures, for example, using the following process:
+
+<ol>
+<li> Remove the data element from the enclosing structure.
+<li> Wait for all pre-existing RCU read-side critical sections
+ to complete (because only pre-existing readers can possibly have
+ a reference to the newly removed data element).
+<li> At this point, only the updater has a reference to the
+ newly removed data element, so it can safely reclaim
+ the data element, for example, by passing it to <tt>kfree()</tt>.
+</ol>
+
+This process is implemented by <tt>remove_gp_synchronous()</tt>:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_synchronous(void)
+ 2 {
+ 3 struct foo *p;
+ 4
+ 5 spin_lock(&amp;gp_lock);
+ 6 p = rcu_access_pointer(gp);
+ 7 if (!p) {
+ 8 spin_unlock(&amp;gp_lock);
+ 9 return false;
+10 }
+11 rcu_assign_pointer(gp, NULL);
+12 spin_unlock(&amp;gp_lock);
+13 synchronize_rcu();
+14 kfree(p);
+15 return true;
+16 }
+</pre>
+</blockquote>
+
+<p>
+This function is straightforward, with line&nbsp;13 waiting for a grace
+period before line&nbsp;14 frees the old data element.
+This waiting ensures that readers will reach line&nbsp;7 of
+<tt>do_something_gp()</tt> before the data element referenced by
+<tt>p</tt> is freed.
+The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
+<tt>rcu_dereference()</tt>, except that:
+
+<ol>
+<li> The value returned by <tt>rcu_access_pointer()</tt>
+ cannot be dereferenced.
+ If you want to access the value pointed to as well as
+ the pointer itself, use <tt>rcu_dereference()</tt>
+ instead of <tt>rcu_access_pointer()</tt>.
+<li> The call to <tt>rcu_access_pointer()</tt> need not be
+ protected.
+ In contrast, <tt>rcu_dereference()</tt> must either be
+ within an RCU read-side critical section or in a code
+ segment where the pointer cannot change, for example, in
+ code protected by the corresponding update-side lock.
+</ol>
+
+<p>@@QQ@@
+Without the <tt>rcu_dereference()</tt> or the
+<tt>rcu_access_pointer()</tt>, what destructive optimizations
+might the compiler make use of?
+<p>@@QQA@@
+Let's start with what happens to <tt>do_something_gp()</tt>
+if it fails to use <tt>rcu_dereference()</tt>.
+It could reuse a value formerly fetched from this same pointer.
+It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+mash-up of two distince pointer values.
+It might even use value-speculation optimizations, where it makes a wrong
+guess, but by the time it gets around to checking the value, an update
+has changed the pointer to match the wrong guess.
+Too bad about any dereferences that returned pre-initialization garbage
+in the meantime!
+
+<p>
+For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+the above optimizations are harmless.
+However,
+with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+<tt>sparse</tt> will complain if you
+define <tt>gp</tt> with <tt>__rcu</tt> and then
+access it without using
+either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+<p>@@QQE@@
+
+<p>
+In short, RCU's publish-subscribe guarantee is provided by the combination
+of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
+This guarantee allows data elements to be safely added to RCU-protected
+linked data structures without disrupting RCU readers.
+This guarantee can be used in combination with the grace-period
+guarantee to also allow data elements to be removed from RCU-protected
+linked data structures, again without disrupting RCU readers.
+
+<p>
+This guarantee was only partially premeditated.
+DYNIX/ptx used an explicit memory barrier for publication, but had nothing
+resembling <tt>rcu_dereference()</tt> for subscription, nor did it
+have anything resembling the <tt>smp_read_barrier_depends()</tt>
+that was later subsumed into <tt>rcu_dereference()</tt>.
+The need for these operations made itself known quite suddenly at a
+late-1990s meeting with the DEC Alpha architects, back in the days when
+DEC was still a free-standing company.
+It took the Alpha architects a good hour to convince me that any sort
+of barrier would ever be needed, and it then took me a good <i>two</i> hours
+to convince them that their documentation did not make this point clear.
+More recent work with the C and C++ standards committees have provided
+much education on tricks and traps from the compiler.
+In short, compilers were much less tricky in the early 1990s, but in
+2015, don't even think about omitting <tt>rcu_dereference()</tt>!
+
+<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
+
+<p>
+The previous section's simple linked-data-structure scenario clearly
+demonstrates the need for RCU's stringent memory-ordering guarantees on
+systems with more than one CPU:
+
+<ol>
+<li> Each CPU that has an RCU read-side critical section that
+ begins before <tt>synchronize_rcu()</tt> starts is
+ guaranteed to execute a full memory barrier between the time
+ that the RCU read-side critical section ends and the time that
+ <tt>synchronize_rcu()</tt> returns.
+ Without this guarantee, a pre-existing RCU read-side critical section
+ might hold a reference to the newly removed <tt>struct foo</tt>
+ after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt>.
+<li> Each CPU that has an RCU read-side critical section that ends
+ after <tt>synchronize_rcu()</tt> returns is guaranteed
+ to execute a full memory barrier between the time that
+ <tt>synchronize_rcu()</tt> begins and the time that the RCU
+ read-side critical section begins.
+ Without this guarantee, a later RCU read-side critical section
+ running after the <tt>kfree()</tt> on line&nbsp;14 of
+ <tt>remove_gp_synchronous()</tt> might
+ later run <tt>do_something_gp()</tt> and find the
+ newly deleted <tt>struct foo</tt>.
+<li> If the task invoking <tt>synchronize_rcu()</tt> remains
+ on a given CPU, then that CPU is guaranteed to execute a full
+ memory barrier sometime during the execution of
+ <tt>synchronize_rcu()</tt>.
+ This guarantee ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on line&nbsp;11.
+<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
+ among a group of CPUs during that invocation, then each of the
+ CPUs in that group is guaranteed to execute a full memory barrier
+ sometime during the execution of <tt>synchronize_rcu()</tt>.
+ This guarantee also ensures that the <tt>kfree()</tt> on
+ line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
+ execute after the removal on
+ line&nbsp;11, but also in the case where the thread executing the
+ <tt>synchronize_rcu()</tt> migrates in the meantime.
+</ol>
+
+<p>@@QQ@@
+Given that multiple CPUs can start RCU read-side critical sections
+at any time without any ordering whatsoever, how can RCU possibly tell whether
+or not a given RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>?
+<p>@@QQA@@
+If RCU cannot tell whether or not a given
+RCU read-side critical section starts before a
+given instance of <tt>synchronize_rcu()</tt>,
+then it must assume that the RCU read-side critical section
+started first.
+In other words, a given instance of <tt>synchronize_rcu()</tt>
+can avoid waiting on a given RCU read-side critical section only
+if it can prove that <tt>synchronize_rcu()</tt> started first.
+<p>@@QQE@@
+
+<p>@@QQ@@
+The first and second guarantees require unbelievably strict ordering!
+Are all these memory barriers <i> really</i> required?
+<p>@@QQA@@
+Yes, they really are required.
+To see why the first guarantee is required, consider the following
+sequence of events:
+
+<ol>
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>do_something_with(q-&gt;a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+</ol>
+
+<p>
+Therefore, there absolutely must be a full memory barrier between the
+end of the RCU read-side critical section and the end of the
+grace period.
+
+<p>
+The sequence of events demonstrating the necessity of the second rule
+is roughly similar:
+
+<ol>
+<li> CPU 0: <tt>list_del_rcu(p);</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
+<li> CPU 1: <tt>rcu_read_lock()</tt>
+<li> CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
+<li> CPU 0: <tt>kfree(p);</tt>
+<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+<li> CPU 1: <tt>rcu_read_unlock()</tt>
+</ol>
+
+<p>
+And similarly, without a memory barrier between the beginning of the
+grace period and the beginning of the RCU read-side critical section,
+CPU&nbsp;1 might end up accessing the freelist.
+
+<p>
+The &ldquo;as if&rdquo; rule of course applies, so that any implementation
+that acts as if the appropriate memory barriers were in place is a
+correct implementation.
+That said, it is much easier to fool yourself into believing that you have
+adhered to the as-if rule than it is to actually adhere to it!
+<p>@@QQE@@
+
+<p>
+Note that these memory-barrier requirements do not replace the fundamental
+RCU requirement that a grace period wait for all pre-existing readers.
+On the contrary, the memory barriers called out in this section must operate in
+such a way as to <i>enforce</i> this fundamental requirement.
+Of course, different implementations enforce this requirement in different
+ways, but enforce it they must.
+
+<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
+
+<p>
+The common-case RCU primitives are unconditional.
+They are invoked, they do their job, and they return, with no possibility
+of error, and no need to retry.
+This is a key RCU design philosophy.
+
+<p>
+However, this philosophy is pragmatic rather than pigheaded.
+If someone comes up with a good justification for a particular conditional
+RCU primitive, it might well be implemented and added.
+After all, this guarantee was reverse-engineered, not premeditated.
+The unconditional nature of the RCU primitives was initially an
+accident of implementation, and later experience with synchronization
+primitives with conditional primitives caused me to elevate this
+accident to a guarantee.
+Therefore, the justification for adding a conditional primitive to
+RCU would need to be based on detailed and compelling use cases.
+
+<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
+
+<p>
+As far as RCU is concerned, it is always possible to carry out an
+update within an RCU read-side critical section.
+For example, that RCU read-side critical section might search for
+a given data element, and then might acquire the update-side
+spinlock in order to update that element, all while remaining
+in that RCU read-side critical section.
+Of course, it is necessary to exit the RCU read-side critical section
+before invoking <tt>synchronize_rcu()</tt>, however, this
+inconvenience can be avoided through use of the
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
+described later in this document.
+
+<p>@@QQ@@
+But how does the upgrade-to-write operation exclude other readers?
+<p>@@QQA@@
+It doesn't, just like normal RCU updates, which also do not exclude
+RCU readers.
+<p>@@QQE@@
+
+<p>
+This guarantee allows lookup code to be shared between read-side
+and update-side code, and was premeditated, appearing in the earliest
+DYNIX/ptx RCU documentation.
+
+<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
+
+<p>
+RCU provides extremely lightweight readers, and its read-side guarantees,
+though quite useful, are correspondingly lightweight.
+It is therefore all too easy to assume that RCU is guaranteeing more
+than it really is.
+Of course, the list of things that RCU does not guarantee is infinitely
+long, however, the following sections list a few non-guarantees that
+have caused confusion.
+Except where otherwise noted, these non-guarantees were premeditated.
+
+<ol>
+<li> <a href="#Readers Impose Minimal Ordering">
+ Readers Impose Minimal Ordering</a>
+<li> <a href="#Readers Do Not Exclude Updaters">
+ Readers Do Not Exclude Updaters</a>
+<li> <a href="#Updaters Only Wait For Old Readers">
+ Updaters Only Wait For Old Readers</a>
+<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
+ Grace Periods Don't Partition Read-Side Critical Sections</a>
+<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
+ Read-Side Critical Sections Don't Partition Grace Periods</a>
+<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
+ Disabling Preemption Does Not Block Grace Periods</a>
+</ol>
+
+<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
+
+<p>
+Reader-side markers such as <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
+except through their interaction with the grace-period APIs such as
+<tt>synchronize_rcu()</tt>.
+To see this, consider the following pair of threads:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(x, 1);
+ 5 rcu_read_unlock();
+ 6 rcu_read_lock();
+ 7 WRITE_ONCE(y, 1);
+ 8 rcu_read_unlock();
+ 9 }
+10
+11 void thread1(void)
+12 {
+13 rcu_read_lock();
+14 r1 = READ_ONCE(y);
+15 rcu_read_unlock();
+16 rcu_read_lock();
+17 r2 = READ_ONCE(x);
+18 rcu_read_unlock();
+19 }
+</pre>
+</blockquote>
+
+<p>
+After <tt>thread0()</tt> and <tt>thread1()</tt> execute
+concurrently, it is quite possible to have
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0)
+</pre>
+</blockquote>
+
+(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
+which would not be possible if <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> had much in the way of ordering
+properties.
+But they do not, so the CPU is within its rights
+to do significant reordering.
+This is by design: Any significant ordering constraints would slow down
+these fast-path APIs.
+
+<p>@@QQ@@
+Can't the compiler also reorder this code?
+<p>@@QQA@@
+No, the volatile casts in <tt>READ_ONCE()</tt> and
+<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+this particular case.
+<p>@@QQE@@
+
+<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
+
+<p>
+Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
+exclude updates.
+All they do is to prevent grace periods from ending.
+The following example illustrates this:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 r1 = READ_ONCE(y);
+ 5 if (r1) {
+ 6 do_something_with_nonzero_x();
+ 7 r2 = READ_ONCE(x);
+ 8 WARN_ON(!r2); /* BUG!!! */
+ 9 }
+10 rcu_read_unlock();
+11 }
+12
+13 void thread1(void)
+14 {
+15 spin_lock(&amp;my_lock);
+16 WRITE_ONCE(x, 1);
+17 WRITE_ONCE(y, 1);
+18 spin_unlock(&amp;my_lock);
+19 }
+</pre>
+</blockquote>
+
+<p>
+If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
+excluded the <tt>thread1()</tt> function's update,
+the <tt>WARN_ON()</tt> could never fire.
+But the fact is that <tt>rcu_read_lock()</tt> does not exclude
+much of anything aside from subsequent grace periods, of which
+<tt>thread1()</tt> has none, so the
+<tt>WARN_ON()</tt> can and does fire.
+
+<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
+
+<p>
+It might be tempting to assume that after <tt>synchronize_rcu()</tt>
+completes, there are no readers executing.
+This temptation must be avoided because
+new readers can start immediately after <tt>synchronize_rcu()</tt>
+starts, and <tt>synchronize_rcu()</tt> is under no
+obligation to wait for these new readers.
+
+<p>@@QQ@@
+Suppose that synchronize_rcu() did wait until all readers had completed.
+Would the updater be able to rely on this?
+<p>@@QQA@@
+No.
+Even if <tt>synchronize_rcu()</tt> were to wait until
+all readers had completed, a new reader might start immediately after
+<tt>synchronize_rcu()</tt> completed.
+Therefore, the code following
+<tt>synchronize_rcu()</tt> cannot rely on there being no readers
+in any case.
+<p>@@QQE@@
+
+<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
+Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
+
+<p>
+It is tempting to assume that if any part of one RCU read-side critical
+section precedes a given grace period, and if any part of another RCU
+read-side critical section follows that same grace period, then all of
+the first RCU read-side critical section must precede all of the second.
+However, this just isn't the case: A single grace period does not
+partition the set of RCU read-side critical sections.
+An example of this situation can be illustrated as follows, where
+<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 r2 = READ_ONCE(b);
+20 r3 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+</pre>
+</blockquote>
+
+<p>
+It turns out that the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
+</pre>
+</blockquote>
+
+is entirely possible.
+The following figure show how this can happen, with each circled
+<tt>QS</tt> indicating the point at which RCU recorded a
+<i>quiescent state</i> for each thread, that is, a state in which
+RCU knows that the thread cannot be in the midst of an RCU read-side
+critical section that started before the current grace period:
+
+<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
+
+<p>
+If it is necessary to partition RCU read-side critical sections in this
+manner, it is necessary to use two grace periods, where the first
+grace period is known to end before the second grace period starts:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 r2 = READ_ONCE(c);
+19 synchronize_rcu();
+20 WRITE_ONCE(d, 1);
+21 }
+22
+23 void thread3(void)
+24 {
+25 rcu_read_lock();
+26 r3 = READ_ONCE(b);
+27 r4 = READ_ONCE(d);
+28 rcu_read_unlock();
+29 }
+</pre>
+</blockquote>
+
+<p>
+Here, if <tt>(r1 == 1)</tt>, then
+<tt>thread0()</tt>'s write to <tt>b</tt> must happen
+before the end of <tt>thread1()</tt>'s grace period.
+If in addition <tt>(r4 == 1)</tt>, then
+<tt>thread3()</tt>'s read from <tt>b</tt> must happen
+after the beginning of <tt>thread2()</tt>'s grace period.
+If it is also the case that <tt>(r2 == 1)</tt>, then the
+end of <tt>thread1()</tt>'s grace period must precede the
+beginning of <tt>thread2()</tt>'s grace period.
+This mean that the two RCU read-side critical sections cannot overlap,
+guaranteeing that <tt>(r3 == 1)</tt>.
+As a result, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
+</pre>
+</blockquote>
+
+cannot happen.
+
+<p>
+This non-requirement was also non-premeditated, but became apparent
+when studying RCU's interaction with memory ordering.
+
+<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
+Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
+
+<p>
+It is also tempting to assume that if an RCU read-side critical section
+happens between a pair of grace periods, then those grace periods cannot
+overlap.
+However, this temptation leads nowhere good, as can be illustrated by
+the following, with all variables initially zero:
+
+<blockquote>
+<pre>
+ 1 void thread0(void)
+ 2 {
+ 3 rcu_read_lock();
+ 4 WRITE_ONCE(a, 1);
+ 5 WRITE_ONCE(b, 1);
+ 6 rcu_read_unlock();
+ 7 }
+ 8
+ 9 void thread1(void)
+10 {
+11 r1 = READ_ONCE(a);
+12 synchronize_rcu();
+13 WRITE_ONCE(c, 1);
+14 }
+15
+16 void thread2(void)
+17 {
+18 rcu_read_lock();
+19 WRITE_ONCE(d, 1);
+20 r2 = READ_ONCE(c);
+21 rcu_read_unlock();
+22 }
+23
+24 void thread3(void)
+25 {
+26 r3 = READ_ONCE(d);
+27 synchronize_rcu();
+28 WRITE_ONCE(e, 1);
+29 }
+30
+31 void thread4(void)
+32 {
+33 rcu_read_lock();
+34 r4 = READ_ONCE(b);
+35 r5 = READ_ONCE(e);
+36 rcu_read_unlock();
+37 }
+</pre>
+</blockquote>
+
+<p>
+In this case, the outcome:
+
+<blockquote>
+<pre>
+(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
+</pre>
+</blockquote>
+
+is entirely possible, as illustrated below:
+
+<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
+
+<p>
+Again, an RCU read-side critical section can overlap almost all of a
+given grace period, just so long as it does not overlap the entire
+grace period.
+As a result, an RCU read-side critical section cannot partition a pair
+of RCU grace periods.
+
+<p>@@QQ@@
+How long a sequence of grace periods, each separated by an RCU read-side
+critical section, would be required to partition the RCU read-side
+critical sections at the beginning and end of the chain?
+<p>@@QQA@@
+In theory, an infinite number.
+In practice, an unknown number that is sensitive to both implementation
+details and timing considerations.
+Therefore, even in practice, RCU users must abide by the theoretical rather
+than the practical answer.
+<p>@@QQE@@
+
+<h3><a name="Disabling Preemption Does Not Block Grace Periods">
+Disabling Preemption Does Not Block Grace Periods</a></h3>
+
+<p>
+There was a time when disabling preemption on any given CPU would block
+subsequent grace periods.
+However, this was an accident of implementation and is not a requirement.
+And in the current Linux-kernel implementation, disabling preemption
+on a given CPU in fact does not block grace periods, as Oleg Nesterov
+<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
+
+<p>
+If you need a preempt-disable region to block grace periods, you need to add
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
+as follows:
+
+<blockquote>
+<pre>
+ 1 preempt_disable();
+ 2 rcu_read_lock();
+ 3 do_something();
+ 4 rcu_read_unlock();
+ 5 preempt_enable();
+ 6
+ 7 /* Spinlocks implicitly disable preemption. */
+ 8 spin_lock(&amp;mylock);
+ 9 rcu_read_lock();
+10 do_something();
+11 rcu_read_unlock();
+12 spin_unlock(&amp;mylock);
+</pre>
+</blockquote>
+
+<p>
+In theory, you could enter the RCU read-side critical section first,
+but it is more efficient to keep the entire RCU read-side critical
+section contained in the preempt-disable region as shown above.
+Of course, RCU read-side critical sections that extend outside of
+preempt-disable regions will work correctly, but such critical sections
+can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
+more work.
+And no, this is <i>not</i> an invitation to enclose all of your RCU
+read-side critical sections within preempt-disable regions, because
+doing so would degrade real-time response.
+
+<p>
+This non-requirement appeared with preemptible RCU.
+If you need a grace period that waits on non-preemptible code regions, use
+<a href="#Sched Flavor">RCU-sched</a>.
+
+<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
+
+<p>
+These parallelism facts of life are by no means specific to RCU, but
+the RCU implementation must abide by them.
+They therefore bear repeating:
+
+<ol>
+<li> Any CPU or task may be delayed at any time,
+ and any attempts to avoid these delays by disabling
+ preemption, interrupts, or whatever are completely futile.
+ This is most obvious in preemptible user-level
+ environments and in virtualized environments (where
+ a given guest OS's VCPUs can be preempted at any time by
+ the underlying hypervisor), but can also happen in bare-metal
+ environments due to ECC errors, NMIs, and other hardware
+ events.
+ Although a delay of more than about 20 seconds can result
+ in splats, the RCU implementation is obligated to use
+ algorithms that can tolerate extremely long delays, but where
+ &ldquo;extremely long&rdquo; is not long enough to allow
+ wrap-around when incrementing a 64-bit counter.
+<li> Both the compiler and the CPU can reorder memory accesses.
+ Where it matters, RCU must use compiler directives and
+ memory-barrier instructions to preserve ordering.
+<li> Conflicting writes to memory locations in any given cache line
+ will result in expensive cache misses.
+ Greater numbers of concurrent writes and more-frequent
+ concurrent writes will result in more dramatic slowdowns.
+ RCU is therefore obligated to use algorithms that have
+ sufficient locality to avoid significant performance and
+ scalability problems.
+<li> As a rough rule of thumb, only one CPU's worth of processing
+ may be carried out under the protection of any given exclusive
+ lock.
+ RCU must therefore use scalable locking designs.
+<li> Counters are finite, especially on 32-bit systems.
+ RCU's use of counters must therefore tolerate counter wrap,
+ or be designed such that counter wrap would take way more
+ time than a single system is likely to run.
+ An uptime of ten years is quite possible, a runtime
+ of a century much less so.
+ As an example of the latter, RCU's dyntick-idle nesting counter
+ allows 54 bits for interrupt nesting level (this counter
+ is 64 bits even on a 32-bit system).
+ Overflowing this counter requires 2<sup>54</sup>
+ half-interrupts on a given CPU without that CPU ever going idle.
+ If a half-interrupt happened every microsecond, it would take
+ 570 years of runtime to overflow this counter, which is currently
+ believed to be an acceptably long time.
+<li> Linux systems can have thousands of CPUs running a single
+ Linux kernel in a single shared-memory environment.
+ RCU must therefore pay close attention to high-end scalability.
+</ol>
+
+<p>
+This last parallelism fact of life means that RCU must pay special
+attention to the preceding facts of life.
+The idea that Linux might scale to systems with thousands of CPUs would
+have been met with some skepticism in the 1990s, but these requirements
+would have otherwise have been unsurprising, even in the early 1990s.
+
+<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
+
+<p>
+These sections list quality-of-implementation requirements.
+Although an RCU implementation that ignores these requirements could
+still be used, it would likely be subject to limitations that would
+make it inappropriate for industrial-strength production use.
+Classes of quality-of-implementation requirements are as follows:
+
+<ol>
+<li> <a href="#Specialization">Specialization</a>
+<li> <a href="#Performance and Scalability">Performance and Scalability</a>
+<li> <a href="#Composability">Composability</a>
+<li> <a href="#Corner Cases">Corner Cases</a>
+</ol>
+
+<p>
+These classes is covered in the following sections.
+
+<h3><a name="Specialization">Specialization</a></h3>
+
+<p>
+RCU is and always has been intended primarily for read-mostly situations, as
+illustrated by the following figure.
+This means that RCU's read-side primitives are optimized, often at the
+expense of its update-side primitives.
+
+<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+
+<p>
+This focus on read-mostly situations means that RCU must interoperate
+with other synchronization primitives.
+For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
+examples discussed earlier use RCU to protect readers and locking to
+coordinate updaters.
+However, the need extends much farther, requiring that a variety of
+synchronization primitives be legal within RCU read-side critical sections,
+including spinlocks, sequence locks, atomic operations, reference
+counters, and memory barriers.
+
+<p>@@QQ@@
+What about sleeping locks?
+<p>@@QQA@@
+These are forbidden within Linux-kernel RCU read-side critical sections
+because it is not legal to place a quiescent state (in this case,
+voluntary context switch) within an RCU read-side critical section.
+However, sleeping locks may be used within userspace RCU read-side critical
+sections, and also within Linux-kernel sleepable RCU
+<a href="#Sleepable RCU">(SRCU)</a>
+read-side critical sections.
+In addition, the -rt patchset turns spinlocks into a sleeping locks so
+that the corresponding critical sections can be preempted, which
+also means that these sleeplockified spinlocks (but not other sleeping locks!)
+may be acquire within -rt-Linux-kernel RCU read-side critical sections.
+
+<p>
+Note that it <i>is</i> legal for a normal RCU read-side critical section
+to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
+but only as long as it does not loop indefinitely attempting to
+conditionally acquire that sleeping locks.
+The key point is that things like <tt>mutex_trylock()</tt>
+either return with the mutex held, or return an error indication if
+the mutex was not immediately available.
+Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
+<p>@@QQE@@
+
+<p>
+It often comes as a surprise that many algorithms do not require a
+consistent view of data, but many can function in that mode,
+with network routing being the poster child.
+Internet routing algorithms take significant time to propagate
+updates, so that by the time an update arrives at a given system,
+that system has been sending network traffic the wrong way for
+a considerable length of time.
+Having a few threads continue to send traffic the wrong way for a
+few more milliseconds is clearly not a problem: In the worst case,
+TCP retransmissions will eventually get the data where it needs to go.
+In general, when tracking the state of the universe outside of the
+computer, some level of inconsistency must be tolerated due to
+speed-of-light delays if nothing else.
+
+<p>
+Furthermore, uncertainty about external state is inherent in many cases.
+For example, a pair of veternarians might use heartbeat to determine
+whether or not a given cat was alive.
+But how long should they wait after the last heartbeat to decide that
+the cat is in fact dead?
+Waiting less than 400 milliseconds makes no sense because this would
+mean that a relaxed cat would be considered to cycle between death
+and life more than 100 times per minute.
+Moreover, just as with human beings, a cat's heart might stop for
+some period of time, so the exact wait period is a judgment call.
+One of our pair of veternarians might wait 30 seconds before pronouncing
+the cat dead, while the other might insist on waiting a full minute.
+The two veternarians would then disagree on the state of the cat during
+the final 30 seconds of the minute following the last heartbeat, as
+fancifully illustrated below:
+
+<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+
+<p>
+Interestingly enough, this same situation applies to hardware.
+When push comes to shove, how do we tell whether or not some
+external server has failed?
+We send messages to it periodically, and declare it failed if we
+don't receive a response within a given period of time.
+Policy decisions can usually tolerate short
+periods of inconsistency.
+The policy was decided some time ago, and is only now being put into
+effect, so a few milliseconds of delay is normally inconsequential.
+
+<p>
+However, there are algorithms that absolutely must see consistent data.
+For example, the translation between a user-level SystemV semaphore
+ID to the corresponding in-kernel data structure is protected by RCU,
+but it is absolutely forbidden to update a semaphore that has just been
+removed.
+In the Linux kernel, this need for consistency is accommodated by acquiring
+spinlocks located in the in-kernel data structure from within
+the RCU read-side critical section, and this is indicated by the
+green box in the figure above.
+Many other techniques may be used, and are in fact used within the
+Linux kernel.
+
+<p>
+In short, RCU is not required to maintain consistency, and other
+mechanisms may be used in concert with RCU when consistency is required.
+RCU's specialization allows it to do its job extremely well, and its
+ability to interoperate with other synchronization mechanisms allows
+the right mix of synchronization tools to be used for a given job.
+
+<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
+
+<p>
+Energy efficiency is a critical component of performance today,
+and Linux-kernel RCU implementations must therefore avoid unnecessarily
+awakening idle CPUs.
+I cannot claim that this requirement was premeditated.
+In fact, I learned of it during a telephone conversation in which I
+was given &ldquo;frank and open&rdquo; feedback on the importance
+of energy efficiency in battery-powered systems and on specific
+energy-efficiency shortcomings of the Linux-kernel RCU implementation.
+In my experience, the battery-powered embedded community will consider
+any unnecessary wakeups to be extremely unfriendly acts.
+So much so that mere Linux-kernel-mailing-list posts are
+insufficient to vent their ire.
+
+<p>
+Memory consumption is not particularly important for in most
+situations, and has become decreasingly
+so as memory sizes have expanded and memory
+costs have plummeted.
+However, as I learned from Matt Mackall's
+<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
+efforts, memory footprint is critically important on single-CPU systems with
+non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
+<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
+was born.
+Josh Triplett has since taken over the small-memory banner with his
+<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
+project, which resulted in
+<a href="#Sleepable RCU">SRCU</a>
+becoming optional for those kernels not needing it.
+
+<p>
+The remaining performance requirements are, for the most part,
+unsurprising.
+For example, in keeping with RCU's read-side specialization,
+<tt>rcu_dereference()</tt> should have negligible overhead (for
+example, suppression of a few minor compiler optimizations).
+Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
+
+<p>
+In preemptible environments, in the case where the RCU read-side
+critical section was not preempted (as will be the case for the
+highest-priority real-time process), <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> should have minimal overhead.
+In particular, they should not contain atomic read-modify-write
+operations, memory-barrier instructions, preemption disabling,
+interrupt disabling, or backwards branches.
+However, in the case where the RCU read-side critical section was preempted,
+<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
+This is why it is better to nest an RCU read-side critical section
+within a preempt-disable region than vice versa, at least in cases
+where that critical section is short enough to avoid unduly degrading
+real-time latencies.
+
+<p>
+The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
+optimized for throughput.
+It may therefore incur several milliseconds of latency in addition to
+the duration of the longest RCU read-side critical section.
+On the other hand, multiple concurrent invocations of
+<tt>synchronize_rcu()</tt> are required to use batching optimizations
+so that they can be satisfied by a single underlying grace-period-wait
+operation.
+For example, in the Linux kernel, it is not unusual for a single
+grace-period-wait operation to serve more than
+<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
+of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
+overhead down to nearly zero.
+However, the grace-period optimization is also required to avoid
+measurable degradation of real-time scheduling and interrupt latencies.
+
+<p>
+In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
+latencies are unacceptable.
+In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
+instead, reducing the grace-period latency down to a few tens of
+microseconds on small systems, at least in cases where the RCU read-side
+critical sections are short.
+There are currently no special latency requirements for
+<tt>synchronize_rcu_expedited()</tt> on large systems, but,
+consistent with the empirical nature of the RCU specification,
+that is subject to change.
+However, there most definitely are scalability requirements:
+A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
+CPUs should at least make reasonable forward progress.
+In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
+is permitted to impose modest degradation of real-time latency
+on non-idle online CPUs.
+That said, it will likely be necessary to take further steps to reduce this
+degradation, hopefully to roughly that of a scheduling-clock interrupt.
+
+<p>
+There are a number of situations where even
+<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
+latency is unacceptable.
+In these situations, the asynchronous <tt>call_rcu()</tt> can be
+used in place of <tt>synchronize_rcu()</tt> as follows:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 static void remove_gp_cb(struct rcu_head *rhp)
+ 8 {
+ 9 struct foo *p = container_of(rhp, struct foo, rh);
+10
+11 kfree(p);
+12 }
+13
+14 bool remove_gp_asynchronous(void)
+15 {
+16 struct foo *p;
+17
+18 spin_lock(&amp;gp_lock);
+19 p = rcu_dereference(gp);
+20 if (!p) {
+21 spin_unlock(&amp;gp_lock);
+22 return false;
+23 }
+24 rcu_assign_pointer(gp, NULL);
+25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
+26 spin_unlock(&amp;gp_lock);
+27 return true;
+28 }
+</pre>
+</blockquote>
+
+<p>
+A definition of <tt>struct foo</tt> is finally needed, and appears
+on lines&nbsp;1-5.
+The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
+on line&nbsp;25, and will be invoked after the end of a subsequent
+grace period.
+This gets the same effect as <tt>remove_gp_synchronous()</tt>,
+but without forcing the updater to wait for a grace period to elapse.
+The <tt>call_rcu()</tt> function may be used in a number of
+situations where neither <tt>synchronize_rcu()</tt> nor
+<tt>synchronize_rcu_expedited()</tt> would be legal,
+including within preempt-disable code, <tt>local_bh_disable()</tt> code,
+interrupt-disable code, and interrupt handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+The callback function (<tt>remove_gp_cb()</tt> in this case) will be
+executed within softirq (software interrupt) environment within the
+Linux kernel,
+either within a real softirq handler or under the protection
+of <tt>local_bh_disable()</tt>.
+In both the Linux kernel and in userspace, it is bad practice to
+write an RCU callback function that takes too long.
+Long-running operations should be relegated to separate threads or
+(in the Linux kernel) workqueues.
+
+<p>@@QQ@@
+Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+structure, which would interact badly with concurrent insertions.
+Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+<p>@@QQA@@
+Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+any changes, including any insertions that <tt>rcu_dereference()</tt>
+would protect against.
+Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
+is released on line&nbsp;25, which in turn means that
+<tt>rcu_access_pointer()</tt> suffices.
+<p>@@QQE@@
+
+<p>
+However, all that <tt>remove_gp_cb()</tt> is doing is
+invoking <tt>kfree()</tt> on the data element.
+This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
+which allows &ldquo;fire and forget&rdquo; operation as shown below:
+
+<blockquote>
+<pre>
+ 1 struct foo {
+ 2 int a;
+ 3 int b;
+ 4 struct rcu_head rh;
+ 5 };
+ 6
+ 7 bool remove_gp_faf(void)
+ 8 {
+ 9 struct foo *p;
+10
+11 spin_lock(&amp;gp_lock);
+12 p = rcu_dereference(gp);
+13 if (!p) {
+14 spin_unlock(&amp;gp_lock);
+15 return false;
+16 }
+17 rcu_assign_pointer(gp, NULL);
+18 kfree_rcu(p, rh);
+19 spin_unlock(&amp;gp_lock);
+20 return true;
+21 }
+</pre>
+</blockquote>
+
+<p>
+Note that <tt>remove_gp_faf()</tt> simply invokes
+<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
+further attention to the subsequent grace period and <tt>kfree()</tt>.
+It is permissible to invoke <tt>kfree_rcu()</tt> from the same
+environments as for <tt>call_rcu()</tt>.
+Interestingly enough, DYNIX/ptx had the equivalents of
+<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
+<tt>synchronize_rcu()</tt>.
+This was due to the fact that RCU was not heavily used within DYNIX/ptx,
+so the very few places that needed something like
+<tt>synchronize_rcu()</tt> simply open-coded it.
+
+<p>@@QQ@@
+Earlier it was claimed that <tt>call_rcu()</tt> and
+<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+by readers.
+But how can that be correct, given that the invocation of the callback
+and the freeing of the memory (respectively) must still wait for
+a grace period to elapse?
+<p>@@QQA@@
+We could define things this way, but keep in mind that this sort of
+definition would say that updates in garbage-collected languages
+cannot complete until the next time the garbage collector runs,
+which does not seem at all reasonable.
+The key point is that in most cases, an updater using either
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+next update as soon as it has invoked <tt>call_rcu()</tt> or
+<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+grace period.
+<p>@@QQE@@
+
+<p>
+But what if the updater must wait for the completion of code to be
+executed after the end of the grace period, but has other tasks
+that can be carried out in the meantime?
+The polling-style <tt>get_state_synchronize_rcu()</tt> and
+<tt>cond_synchronize_rcu()</tt> functions may be used for this
+purpose, as shown below:
+
+<blockquote>
+<pre>
+ 1 bool remove_gp_poll(void)
+ 2 {
+ 3 struct foo *p;
+ 4 unsigned long s;
+ 5
+ 6 spin_lock(&amp;gp_lock);
+ 7 p = rcu_access_pointer(gp);
+ 8 if (!p) {
+ 9 spin_unlock(&amp;gp_lock);
+10 return false;
+11 }
+12 rcu_assign_pointer(gp, NULL);
+13 spin_unlock(&amp;gp_lock);
+14 s = get_state_synchronize_rcu();
+15 do_something_while_waiting();
+16 cond_synchronize_rcu(s);
+17 kfree(p);
+18 return true;
+19 }
+</pre>
+</blockquote>
+
+<p>
+On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
+&ldquo;cookie&rdquo; from RCU,
+then line&nbsp;15 carries out other tasks,
+and finally, line&nbsp;16 returns immediately if a grace period has
+elapsed in the meantime, but otherwise waits as required.
+The need for <tt>get_state_synchronize_rcu</tt> and
+<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
+so it is too early to tell whether they will stand the test of time.
+
+<p>
+RCU thus provides a range of tools to allow updaters to strike the
+required tradeoff between latency, flexibility and CPU overhead.
+
+<h3><a name="Composability">Composability</a></h3>
+
+<p>
+Composability has received much attention in recent years, perhaps in part
+due to the collision of multicore hardware with object-oriented techniques
+designed in single-threaded environments for single-threaded use.
+And in theory, RCU read-side critical sections may be composed, and in
+fact may be nested arbitrarily deeply.
+In practice, as with all real-world implementations of composable
+constructs, there are limitations.
+
+<p>
+Implementations of RCU for which <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> generate no code, such as
+Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
+nested arbitrarily deeply.
+After all, there is no overhead.
+Except that if all these instances of <tt>rcu_read_lock()</tt>
+and <tt>rcu_read_unlock()</tt> are visible to the compiler,
+compilation will eventually fail due to exhausting memory,
+mass storage, or user patience, whichever comes first.
+If the nesting is not visible to the compiler, as is the case with
+mutually recursive functions each in its own translation unit,
+stack overflow will result.
+If the nesting takes the form of loops, either the control variable
+will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
+Nevertheless, this class of RCU implementations is one
+of the most composable constructs in existence.
+
+<p>
+RCU implementations that explicitly track nesting depth
+are limited by the nesting-depth counter.
+For example, the Linux kernel's preemptible RCU limits nesting to
+<tt>INT_MAX</tt>.
+This should suffice for almost all practical purposes.
+That said, a consecutive pair of RCU read-side critical sections
+between which there is an operation that waits for a grace period
+cannot be enclosed in another RCU read-side critical section.
+This is because it is not legal to wait for a grace period within
+an RCU read-side critical section: To do so would result either
+in deadlock or
+in RCU implicitly splitting the enclosing RCU read-side critical
+section, neither of which is conducive to a long-lived and prosperous
+kernel.
+
+<p>
+It is worth noting that RCU is not alone in limiting composability.
+For example, many transactional-memory implementations prohibit
+composing a pair of transactions separated by an irrevocable
+operation (for example, a network receive operation).
+For another example, lock-based critical sections can be composed
+surprisingly freely, but only if deadlock is avoided.
+
+<p>
+In short, although RCU read-side critical sections are highly composable,
+care is required in some situations, just as is the case for any other
+composable synchronization mechanism.
+
+<h3><a name="Corner Cases">Corner Cases</a></h3>
+
+<p>
+A given RCU workload might have an endless and intense stream of
+RCU read-side critical sections, perhaps even so intense that there
+was never a point in time during which there was not at least one
+RCU read-side critical section in flight.
+RCU cannot allow this situation to block grace periods: As long as
+all the RCU read-side critical sections are finite, grace periods
+must also be finite.
+
+<p>
+That said, preemptible RCU implementations could potentially result
+in RCU read-side critical sections being preempted for long durations,
+which has the effect of creating a long-duration RCU read-side
+critical section.
+This situation can arise only in heavily loaded systems, but systems using
+real-time priorities are of course more vulnerable.
+Therefore, RCU priority boosting is provided to help deal with this
+case.
+That said, the exact requirements on RCU priority boosting will likely
+evolve as more experience accumulates.
+
+<p>
+Other workloads might have very high update rates.
+Although one can argue that such workloads should instead use
+something other than RCU, the fact remains that RCU must
+handle such workloads gracefully.
+This requirement is another factor driving batching of grace periods,
+but it is also the driving force behind the checks for large numbers
+of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
+Finally, high update rates should not delay RCU read-side critical
+sections, although some read-side delays can occur when using
+<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
+of <tt>try_stop_cpus()</tt>.
+(In the future, <tt>synchronize_rcu_expedited()</tt> will be
+converted to use lighter-weight inter-processor interrupts (IPIs),
+but this will still disturb readers, though to a much smaller degree.)
+
+<p>
+Although all three of these corner cases were understood in the early
+1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
+in a tight loop
+in the early 2000s suddenly provided a much deeper appreciation of the
+high-update-rate corner case.
+This test also motivated addition of some RCU code to react to high update
+rates, for example, if a given CPU finds itself with more than 10,000
+RCU callbacks queued, it will cause RCU to take evasive action by
+more aggressively starting grace periods and more aggressively forcing
+completion of grace-period processing.
+This evasive action causes the grace period to complete more quickly,
+but at the cost of restricting RCU's batching optimizations, thus
+increasing the CPU overhead incurred by that grace period.
+
+<h2><a name="Software-Engineering Requirements">
+Software-Engineering Requirements</a></h2>
+
+<p>
+Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
+guard against mishaps and misuse:
+
+<ol>
+<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
+ everywhere that it is needed, so kernels built with
+ <tt>CONFIG_PROVE_RCU=y</tt> will spat if
+ <tt>rcu_dereference()</tt> is used outside of an
+ RCU read-side critical section.
+ Update-side code can use <tt>rcu_dereference_protected()</tt>,
+ which takes a
+ <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
+ to indicate what is providing the protection.
+ If the indicated protection is not provided, a lockdep splat
+ is emitted.
+
+ <p>
+ Code shared between readers and updaters can use
+ <tt>rcu_dereference_check()</tt>, which also takes a
+ lockdep expression, and emits a lockdep splat if neither
+ <tt>rcu_read_lock()</tt> nor the indicated protection
+ is in place.
+ In addition, <tt>rcu_dereference_raw()</tt> is used in those
+ (hopefully rare) cases where the required protection cannot
+ be easily described.
+ Finally, <tt>rcu_read_lock_held()</tt> is provided to
+ allow a function to verify that it has been invoked within
+ an RCU read-side critical section.
+ I was made aware of this set of requirements shortly after Thomas
+ Gleixner audited a number of RCU uses.
+<li> A given function might wish to check for RCU-related preconditions
+ upon entry, before using any other RCU API.
+ The <tt>rcu_lockdep_assert()</tt> does this job,
+ asserting the expression in kernels having lockdep enabled
+ and doing nothing otherwise.
+<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
+ and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
+ substituting a simple assignment.
+ To catch this sort of error, a given RCU-protected pointer may be
+ tagged with <tt>__rcu</tt>, after which running sparse
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
+ about simple-assignment accesses to that pointer.
+ Arnd Bergmann made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lwn.net/Articles/376011/">patch series</a>.
+<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
+ will splat if a data element is passed to <tt>call_rcu()</tt>
+ twice in a row, without a grace period in between.
+ (This error is similar to a double free.)
+ The corresponding <tt>rcu_head</tt> structures that are
+ dynamically allocated are automatically tracked, but
+ <tt>rcu_head</tt> structures allocated on the stack
+ must be initialized with <tt>init_rcu_head_on_stack()</tt>
+ and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
+ Similarly, statically allocated non-stack <tt>rcu_head</tt>
+ structures must be initialized with <tt>init_rcu_head()</tt>
+ and cleaned up with <tt>destroy_rcu_head()</tt>.
+ Mathieu Desnoyers made me aware of this requirement, and also
+ supplied the needed
+ <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
+<li> An infinite loop in an RCU read-side critical section will
+ eventually trigger an RCU CPU stall warning splat, with
+ the duration of &ldquo;eventually&rdquo; being controlled by the
+ <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
+ alternatively, by the
+ <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
+ parameter.
+ However, RCU is not obligated to produce this splat
+ unless there is a grace period waiting on that particular
+ RCU read-side critical section.
+ <p>
+ Some extreme workloads might intentionally delay
+ RCU grace periods, and systems running those workloads can
+ be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
+ to suppress the splats.
+ This kernel parameter may also be set via <tt>sysfs</tt>.
+ Furthermore, RCU CPU stall warnings are counter-productive
+ during sysrq dumps and during panics.
+ RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
+ <tt>rcu_sysrq_end()</tt> API members to be called before
+ and after long sysrq dumps.
+ RCU also supplies the <tt>rcu_panic()</tt> notifier that is
+ automatically invoked at the beginning of a panic to suppress
+ further RCU CPU stall warnings.
+
+ <p>
+ This requirement made itself known in the early 1990s, pretty
+ much the first time that it was necessary to debug a CPU stall.
+ That said, the initial implementation in DYNIX/ptx was quite
+ generic in comparison with that of Linux.
+<li> Although it would be very good to detect pointers leaking out
+ of RCU read-side critical sections, there is currently no
+ good way of doing this.
+ One complication is the need to distinguish between pointers
+ leaking and pointers that have been handed off from RCU to
+ some other synchronization mechanism, for example, reference
+ counting.
+<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
+ information is provided via both debugfs and event tracing.
+<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
+ <tt>rcu_dereference()</tt> to create typical linked
+ data structures can be surprisingly error-prone.
+ Therefore, RCU-protected
+ <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
+ and, more recently, RCU-protected
+ <a href="https://lwn.net/Articles/612100/">hash tables</a>
+ are available.
+ Many other special-purpose RCU-protected data structures are
+ available in the Linux kernel and the userspace RCU library.
+<li> Some linked structures are created at compile time, but still
+ require <tt>__rcu</tt> checking.
+ The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
+ purpose.
+<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
+ when creating linked structures that are to be published via
+ a single external pointer.
+ The <tt>RCU_INIT_POINTER()</tt> macro is provided for
+ this task and also for assigning <tt>NULL</tt> pointers
+ at runtime.
+</ol>
+
+<p>
+This not a hard-and-fast list: RCU's diagnostic capabilities will
+continue to be guided by the number and type of usage bugs found
+in real-world RCU usage.
+
+<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
+
+<p>
+The Linux kernel provides an interesting environment for all kinds of
+software, including RCU.
+Some of the relevant points of interest are as follows:
+
+<ol>
+<li> <a href="#Configuration">Configuration</a>.
+<li> <a href="#Firmware Interface">Firmware Interface</a>.
+<li> <a href="#Early Boot">Early Boot</a>.
+<li> <a href="#Interrupts and NMIs">
+ Interrupts and non-maskable interrupts (NMIs)</a>.
+<li> <a href="#Loadable Modules">Loadable Modules</a>.
+<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
+<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
+<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
+<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
+<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
+<li> <a href="#Performance, Scalability, Response Time, and Reliability">
+ Performance, Scalability, Response Time, and Reliability</a>.
+</ol>
+
+<p>
+This list is probably incomplete, but it does give a feel for the
+most notable Linux-kernel complications.
+Each of the following sections covers one of the above topics.
+
+<h3><a name="Configuration">Configuration</a></h3>
+
+<p>
+RCU's goal is automatic configuration, so that almost nobody
+needs to worry about RCU's <tt>Kconfig</tt> options.
+And for almost all users, RCU does in fact work well
+&ldquo;out of the box.&rdquo;
+
+<p>
+However, there are specialized use cases that are handled by
+kernel boot parameters and <tt>Kconfig</tt> options.
+Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
+about new <tt>Kconfig</tt> options, which requires almost all of them
+be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
+
+<p>
+This all should be quite obvious, but the fact remains that
+Linus Torvalds recently had to
+<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
+me of this requirement.
+
+<h3><a name="Firmware Interface">Firmware Interface</a></h3>
+
+<p>
+In many cases, kernel obtains information about the system from the
+firmware, and sometimes things are lost in translation.
+Or the translation is accurate, but the original message is bogus.
+
+<p>
+For example, some systems' firmware overreports the number of CPUs,
+sometimes by a large factor.
+If RCU naively believed the firmware, as it used to do,
+it would create too many per-CPU kthreads.
+Although the resulting system will still run correctly, the extra
+kthreads needlessly consume memory and can cause confusion
+when they show up in <tt>ps</tt> listings.
+
+<p>
+RCU must therefore wait for a given CPU to actually come online before
+it can allow itself to believe that the CPU actually exists.
+The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
+come online) cause a number of
+<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
+
+<h3><a name="Early Boot">Early Boot</a></h3>
+
+<p>
+The Linux kernel's boot sequence is an interesting process,
+and RCU is used early, even before <tt>rcu_init()</tt>
+is invoked.
+In fact, a number of RCU's primitives can be used as soon as the
+initial task's <tt>task_struct</tt> is available and the
+boot CPU's per-CPU variables are set up.
+The read-side primitives (<tt>rcu_read_lock()</tt>,
+<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
+and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
+as will <tt>rcu_assign_pointer()</tt>.
+
+<p>
+Although <tt>call_rcu()</tt> may be invoked at any
+time during boot, callbacks are not guaranteed to be invoked until after
+the scheduler is fully up and running.
+This delay in callback invocation is due to the fact that RCU does not
+invoke callbacks until it is fully initialized, and this full initialization
+cannot occur until after the scheduler has initialized itself to the
+point where RCU can spawn and run its kthreads.
+In theory, it would be possible to invoke callbacks earlier,
+however, this is not a panacea because there would be severe restrictions
+on what operations those callbacks could invoke.
+
+<p>
+Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
+<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
+(<a href="#Bottom-Half Flavor">discussed below</a>),
+and
+<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
+will all operate normally
+during very early boot, the reason being that there is only one CPU
+and preemption is disabled.
+This means that the call <tt>synchronize_rcu()</tt> (or friends)
+itself is a quiescent
+state and thus a grace period, so the early-boot implementation can
+be a no-op.
+
+<p>
+Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
+continue to operate normally through the remainder of boot, courtesy
+of the fact that preemption is disabled across their RCU read-side
+critical sections and also courtesy of the fact that there is still
+only one CPU.
+However, once the scheduler starts initializing, preemption is enabled.
+There is still only a single CPU, but the fact that preemption is enabled
+means that the no-op implementation of <tt>synchronize_rcu()</tt> no
+longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
+Therefore, as soon as the scheduler starts initializing, the early-boot
+fastpath is disabled.
+This means that <tt>synchronize_rcu()</tt> switches to its runtime
+mode of operation where it posts callbacks, which in turn means that
+any call to <tt>synchronize_rcu()</tt> will block until the corresponding
+callback is invoked.
+Unfortunately, the callback cannot be invoked until RCU's runtime
+grace-period machinery is up and running, which cannot happen until
+the scheduler has initialized itself sufficiently to allow RCU's
+kthreads to be spawned.
+Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
+initialization can result in deadlock.
+
+<p>@@QQ@@
+So what happens with <tt>synchronize_rcu()</tt> during
+scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+kernels?
+<p>@@QQA@@
+In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+maps directly to <tt>synchronize_sched()</tt>.
+Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+during scheduler initialization.
+<p>@@QQE@@
+
+<p>
+I learned of these boot-time requirements as a result of a series of
+system hangs.
+
+<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
+
+<p>
+The Linux kernel has interrupts, and RCU read-side critical sections are
+legal within interrupt handlers and within interrupt-disabled regions
+of code, as are invocations of <tt>call_rcu()</tt>.
+
+<p>
+Some Linux-kernel architectures can enter an interrupt handler from
+non-idle process context, and then just never leave it, instead stealthily
+transitioning back to process context.
+This trick is sometimes used to invoke system calls from inside the kernel.
+These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
+about how it counts interrupt nesting levels.
+I learned of this requirement the hard way during a rewrite
+of RCU's dyntick-idle code.
+
+<p>
+The Linux kernel has non-maskable interrupts (NMIs), and
+RCU read-side critical sections are legal within NMI handlers.
+Thankfully, RCU update-side primitives, including
+<tt>call_rcu()</tt>, are prohibited within NMI handlers.
+
+<p>
+The name notwithstanding, some Linux-kernel architectures
+can have nested NMIs, which RCU must handle correctly.
+Andy Lutomirski
+<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
+with this requirement;
+he also kindly surprised me with
+<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
+that meets this requirement.
+
+<h3><a name="Loadable Modules">Loadable Modules</a></h3>
+
+<p>
+The Linux kernel has loadable modules, and these modules can
+also be unloaded.
+After a given module has been unloaded, any attempt to call
+one of its functions results in a segmentation fault.
+The module-unload functions must therefore cancel any
+delayed calls to loadable-module functions, for example,
+any outstanding <tt>mod_timer()</tt> must be dealt with
+via <tt>del_timer_sync()</tt> or similar.
+
+<p>
+Unfortunately, there is no way to cancel an RCU callback;
+once you invoke <tt>call_rcu()</tt>, the callback function is
+going to eventually be invoked, unless the system goes down first.
+Because it is normally considered socially irresponsible to crash the system
+in response to a module unload request, we need some other way
+to deal with in-flight RCU callbacks.
+
+<p>
+RCU therefore provides
+<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
+which waits until all in-flight RCU callbacks have been invoked.
+If a module uses <tt>call_rcu()</tt>, its exit function should therefore
+prevent any future invocation of <tt>call_rcu()</tt>, then invoke
+<tt>rcu_barrier()</tt>.
+In theory, the underlying module-unload code could invoke
+<tt>rcu_barrier()</tt> unconditionally, but in practice this would
+incur unacceptable latencies.
+
+<p>
+Nikita Danilov noted this requirement for an analogous filesystem-unmount
+situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
+The need for <tt>rcu_barrier()</tt> for module unloading became
+apparent later.
+
+<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
+
+<p>
+The Linux kernel supports CPU hotplug, which means that CPUs
+can come and go.
+It is of course illegal to use any RCU API member from an offline CPU.
+This requirement was present from day one in DYNIX/ptx, but
+on the other hand, the Linux kernel's CPU-hotplug implementation
+is &ldquo;interesting.&rdquo;
+
+<p>
+The Linux-kernel CPU-hotplug implementation has notifiers that
+are used to allow the various kernel subsystems (including RCU)
+to respond appropriately to a given CPU-hotplug operation.
+Most RCU operations may be invoked from CPU-hotplug notifiers,
+including even normal synchronous grace-period operations
+such as <tt>synchronize_rcu()</tt>.
+However, expedited grace-period operations such as
+<tt>synchronize_rcu_expedited()</tt> are not supported,
+due to the fact that current implementations block CPU-hotplug
+operations, which could result in deadlock.
+
+<p>
+In addition, all-callback-wait operations such as
+<tt>rcu_barrier()</tt> are also not supported, due to the
+fact that there are phases of CPU-hotplug operations where
+the outgoing CPU's callbacks will not be invoked until after
+the CPU-hotplug operation ends, which could also result in deadlock.
+
+<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
+
+<p>
+RCU depends on the scheduler, and the scheduler uses RCU to
+protect some of its data structures.
+This means the scheduler is forbidden from acquiring
+the runqueue locks and the priority-inheritance locks
+in the middle of an outermost RCU read-side critical section unless either
+(1)&nbsp;it releases them before exiting that same
+RCU read-side critical section, or
+(2)&nbsp;interrupts are disabled across
+that entire RCU read-side critical section.
+This same prohibition also applies (recursively!) to any lock that is acquired
+while holding any lock to which this prohibition applies.
+Adhering to this rule prevents preemptible RCU from invoking
+<tt>rcu_read_unlock_special()</tt> while either runqueue or
+priority-inheritance locks are held, thus avoiding deadlock.
+
+<p>
+Prior to v4.4, it was only necessary to disable preemption across
+RCU read-side critical sections that acquired scheduler locks.
+In v4.4, expedited grace periods started using IPIs, and these
+IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
+Therefore, this expedited-grace-period change required disabling of
+interrupts, not just preemption.
+
+<p>
+For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must be written carefully to avoid similar deadlocks.
+In particular, <tt>rcu_read_unlock()</tt> must tolerate an
+interrupt where the interrupt handler invokes both
+<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+This possibility requires <tt>rcu_read_unlock()</tt> to use
+negative nesting levels to avoid destructive recursion via
+interrupt handler's use of RCU.
+
+<p>
+This pair of mutual scheduler-RCU requirements came as a
+<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
+
+<p>
+As noted above, RCU makes use of kthreads, and it is necessary to
+avoid excessive CPU-time accumulation by these kthreads.
+This requirement was no surprise, but RCU's violation of it
+when running context-switch-heavy workloads when built with
+<tt>CONFIG_NO_HZ_FULL=y</tt>
+<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
+RCU has made good progress towards meeting this requirement, even
+for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+but there is room for further improvement.
+
+<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
+
+<p>
+It is possible to use tracing on RCU code, but tracing itself
+uses RCU.
+For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+is provided for use by tracing, which avoids the destructive
+recursion that could otherwise ensue.
+This API is also used by virtualization in some architectures,
+where RCU readers execute in environments in which tracing
+cannot be used.
+The tracing folks both located the requirement and provided the
+needed fix, so this surprise requirement was relatively painless.
+
+<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
+
+<p>
+Interrupting idle CPUs is considered socially unacceptable,
+especially by people with battery-powered embedded systems.
+RCU therefore conserves energy by detecting which CPUs are
+idle, including tracking CPUs that have been interrupted from idle.
+This is a large part of the energy-efficiency requirement,
+so I learned of this via an irate phone call.
+
+<p>
+Because RCU avoids interrupting idle CPUs, it is illegal to
+execute an RCU read-side critical section on an idle CPU.
+(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
+if you try it.)
+The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
+event tracing is provided to work around this restriction.
+In addition, <tt>rcu_is_watching()</tt> may be used to
+test whether or not it is currently legal to run RCU read-side
+critical sections on this CPU.
+I learned of the need for diagnostics on the one hand
+and <tt>RCU_NONIDLE()</tt> on the other while inspecting
+idle-loop code.
+Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
+which is used quite heavily in the idle loop.
+
+<p>
+It is similarly socially unacceptable to interrupt an
+<tt>nohz_full</tt> CPU running in userspace.
+RCU must therefore track <tt>nohz_full</tt> userspace
+execution.
+And in
+<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
+kernels, RCU must separately track idle CPUs on the one hand and
+CPUs that are either idle or executing in userspace on the other.
+In both cases, RCU must be able to sample state at two points in
+time, and be able to determine whether or not some other CPU spent
+any time idle and/or executing in userspace.
+
+<p>
+These energy-efficiency requirements have proven quite difficult to
+understand and to meet, for example, there have been more than five
+clean-sheet rewrites of RCU's energy-efficiency code, the last of
+which was finally able to demonstrate
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
+As noted earlier,
+I learned of many of these requirements via angry phone calls:
+Flaming me on the Linux-kernel mailing list was apparently not
+sufficient to fully vent their ire at RCU's energy-efficiency bugs!
+
+<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
+
+<p>
+Although small-memory non-realtime systems can simply use Tiny RCU,
+code size is only one aspect of memory efficiency.
+Another aspect is the size of the <tt>rcu_head</tt> structure
+used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
+Although this structure contains nothing more than a pair of pointers,
+it does appear in many RCU-protected data structures, including
+some that are size critical.
+The <tt>page</tt> structure is a case in point, as evidenced by
+the many occurrences of the <tt>union</tt> keyword within that structure.
+
+<p>
+This need for memory efficiency is one reason that RCU uses hand-crafted
+singly linked lists to track the <tt>rcu_head</tt> structures that
+are waiting for a grace period to elapse.
+It is also the reason why <tt>rcu_head</tt> structures do not contain
+debug information, such as fields tracking the file and line of the
+<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
+Although this information might appear in debug-only kernel builds at some
+point, in the meantime, the <tt>-&gt;func</tt> field will often provide
+the needed debug information.
+
+<p>
+However, in some cases, the need for memory efficiency leads to even
+more extreme measures.
+Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
+shares storage with a great many other structures that are used at
+various points in the corresponding page's lifetime.
+In order to correctly resolve certain
+<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
+the Linux kernel's memory-management subsystem needs a particular bit
+to remain zero during all phases of grace-period processing,
+and that bit happens to map to the bottom bit of the
+<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
+RCU makes this guarantee as long as <tt>call_rcu()</tt>
+is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
+or some future &ldquo;lazy&rdquo;
+variant of <tt>call_rcu()</tt> that might one day be created for
+energy-efficiency purposes.
+
+<h3><a name="Performance, Scalability, Response Time, and Reliability">
+Performance, Scalability, Response Time, and Reliability</a></h3>
+
+<p>
+Expanding on the
+<a href="#Performance and Scalability">earlier discussion</a>,
+RCU is used heavily by hot code paths in performance-critical
+portions of the Linux kernel's networking, security, virtualization,
+and scheduling code paths.
+RCU must therefore use efficient implementations, especially in its
+read-side primitives.
+To that end, it would be good if preemptible RCU's implementation
+of <tt>rcu_read_lock()</tt> could be inlined, however, doing
+this requires resolving <tt>#include</tt> issues with the
+<tt>task_struct</tt> structure.
+
+<p>
+The Linux kernel supports hardware configurations with up to
+4096 CPUs, which means that RCU must be extremely scalable.
+Algorithms that involve frequent acquisitions of global locks or
+frequent atomic operations on global variables simply cannot be
+tolerated within the RCU implementation.
+RCU therefore makes heavy use of a combining tree based on the
+<tt>rcu_node</tt> structure.
+RCU is required to tolerate all CPUs continuously invoking any
+combination of RCU's runtime primitives with minimal per-operation
+overhead.
+In fact, in many cases, increasing load must <i>decrease</i> the
+per-operation overhead, witness the batching optimizations for
+<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
+<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
+As a general rule, RCU must cheerfully accept whatever the
+rest of the Linux kernel decides to throw at it.
+
+<p>
+The Linux kernel is used for real-time workloads, especially
+in conjunction with the
+<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
+The real-time-latency response requirements are such that the
+traditional approach of disabling preemption across RCU
+read-side critical sections is inappropriate.
+Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
+use an RCU implementation that allows RCU read-side critical
+sections to be preempted.
+This requirement made its presence known after users made it
+clear that an earlier
+<a href="https://lwn.net/Articles/107930/">real-time patch</a>
+did not meet their needs, in conjunction with some
+<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
+encountered by a very early version of the -rt patchset.
+
+<p>
+In addition, RCU must make do with a sub-100-microsecond real-time latency
+budget.
+In fact, on smaller systems with the -rt patchset, the Linux kernel
+provides sub-20-microsecond real-time latencies for the whole kernel,
+including RCU.
+RCU's scalability and latency must therefore be sufficient for
+these sorts of configurations.
+To my surprise, the sub-100-microsecond real-time latency budget
+<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
+applies to even the largest systems [PDF]</a>,
+up to and including systems with 4096 CPUs.
+This real-time requirement motivated the grace-period kthread, which
+also simplified handling of a number of race conditions.
+
+<p>
+Finally, RCU's status as a synchronization primitive means that
+any RCU failure can result in arbitrary memory corruption that can be
+extremely difficult to debug.
+This means that RCU must be extremely reliable, which in
+practice also means that RCU must have an aggressive stress-test
+suite.
+This stress-test suite is called <tt>rcutorture</tt>.
+
+<p>
+Although the need for <tt>rcutorture</tt> was no surprise,
+the current immense popularity of the Linux kernel is posing
+interesting&mdash;and perhaps unprecedented&mdash;validation
+challenges.
+To see this, keep in mind that there are well over one billion
+instances of the Linux kernel running today, given Android
+smartphones, Linux-powered televisions, and servers.
+This number can be expected to increase sharply with the advent of
+the celebrated Internet of Things.
+
+<p>
+Suppose that RCU contains a race condition that manifests on average
+once per million years of runtime.
+This bug will be occurring about three times per <i>day</i> across
+the installed base.
+RCU could simply hide behind hardware error rates, given that no one
+should really expect their smartphone to last for a million years.
+However, anyone taking too much comfort from this thought should
+consider the fact that in most jurisdictions, a successful multi-year
+test of a given mechanism, which might include a Linux kernel,
+suffices for a number of types of safety-critical certifications.
+In fact, rumor has it that the Linux kernel is already being used
+in production for safety-critical applications.
+I don't know about you, but I would feel quite bad if a bug in RCU
+killed someone.
+Which might explain my recent focus on validation and verification.
+
+<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
+
+<p>
+One of the more surprising things about RCU is that there are now
+no fewer than five <i>flavors</i>, or API families.
+In addition, the primary flavor that has been the sole focus up to
+this point has two different implementations, non-preemptible and
+preemptible.
+The other four flavors are listed below, with requirements for each
+described in a separate section.
+
+<ol>
+<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
+<li> <a href="#Sched Flavor">Sched Flavor</a>
+<li> <a href="#Sleepable RCU">Sleepable RCU</a>
+<li> <a href="#Tasks RCU">Tasks RCU</a>
+</ol>
+
+<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
+
+<p>
+The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
+hence the &ldquo;_bh&rdquo; abbreviations)
+flavor of RCU, or <i>RCU-bh</i>, was developed by
+Dipankar Sarma to provide a flavor of RCU that could withstand the
+network-based denial-of-service attacks researched by Robert
+Olsson.
+These attacks placed so much networking load on the system
+that some of the CPUs never exited softirq execution,
+which in turn prevented those CPUs from ever executing a context switch,
+which, in the RCU implementation of that time, prevented grace periods
+from ever ending.
+The result was an out-of-memory condition and a system hang.
+
+<p>
+The solution was the creation of RCU-bh, which does
+<tt>local_bh_disable()</tt>
+across its read-side critical sections, and which uses the transition
+from one type of softirq processing to another as a quiescent state
+in addition to context switch, idle, user mode, and offline.
+This means that RCU-bh grace periods can complete even when some of
+the CPUs execute in softirq indefinitely, thus allowing algorithms
+based on RCU-bh to withstand network-based denial-of-service attacks.
+
+<p>
+Because
+<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
+disable and re-enable softirq handlers, any attempt to start a softirq
+handlers during the
+RCU-bh read-side critical section will be deferred.
+In this case, <tt>rcu_read_unlock_bh()</tt>
+will invoke softirq processing, which can take considerable time.
+One can of course argue that this softirq overhead should be associated
+with the code following the RCU-bh read-side critical section rather
+than <tt>rcu_read_unlock_bh()</tt>, but the fact
+is that most profiling tools cannot be expected to make this sort
+of fine distinction.
+For example, suppose that a three-millisecond-long RCU-bh read-side
+critical section executes during a time of heavy networking load.
+There will very likely be an attempt to invoke at least one softirq
+handler during that three milliseconds, but any such invocation will
+be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
+This can of course make it appear at first glance as if
+<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
+includes
+<tt>rcu_read_lock_bh()</tt>,
+<tt>rcu_read_unlock_bh()</tt>,
+<tt>rcu_dereference_bh()</tt>,
+<tt>rcu_dereference_bh_check()</tt>,
+<tt>synchronize_rcu_bh()</tt>,
+<tt>synchronize_rcu_bh_expedited()</tt>,
+<tt>call_rcu_bh()</tt>,
+<tt>rcu_barrier_bh()</tt>, and
+<tt>rcu_read_lock_bh_held()</tt>.
+
+<h3><a name="Sched Flavor">Sched Flavor</a></h3>
+
+<p>
+Before preemptible RCU, waiting for an RCU grace period had the
+side effect of also waiting for all pre-existing interrupt
+and NMI handlers.
+However, there are legitimate preemptible-RCU implementations that
+do not have this property, given that any point in the code outside
+of an RCU read-side critical section can be a quiescent state.
+Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
+RCU in that an RCU-sched grace period waits for for pre-existing
+interrupt and NMI handlers.
+In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
+APIs have identical implementations, while kernels built with
+<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
+
+<p>
+Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
+<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
+disable and re-enable preemption, respectively.
+This means that if there was a preemption attempt during the
+RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
+will enter the scheduler, with all the latency and overhead entailed.
+Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
+as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
+However, the highest-priority task won't be preempted, so that task
+will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
+includes
+<tt>rcu_read_lock_sched()</tt>,
+<tt>rcu_read_unlock_sched()</tt>,
+<tt>rcu_read_lock_sched_notrace()</tt>,
+<tt>rcu_read_unlock_sched_notrace()</tt>,
+<tt>rcu_dereference_sched()</tt>,
+<tt>rcu_dereference_sched_check()</tt>,
+<tt>synchronize_sched()</tt>,
+<tt>synchronize_rcu_sched_expedited()</tt>,
+<tt>call_rcu_sched()</tt>,
+<tt>rcu_barrier_sched()</tt>, and
+<tt>rcu_read_lock_sched_held()</tt>.
+However, anything that disables preemption also marks an RCU-sched
+read-side critical section, including
+<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
+<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
+and so on.
+
+<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
+
+<p>
+For well over a decade, someone saying &ldquo;I need to block within
+an RCU read-side critical section&rdquo; was a reliable indication
+that this someone did not understand RCU.
+After all, if you are always blocking in an RCU read-side critical
+section, you can probably afford to use a higher-overhead synchronization
+mechanism.
+However, that changed with the advent of the Linux kernel's notifiers,
+whose RCU read-side critical
+sections almost never sleep, but sometimes need to.
+This resulted in the introduction of
+<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
+or <i>SRCU</i>.
+
+<p>
+SRCU allows different domains to be defined, with each such domain
+defined by an instance of an <tt>srcu_struct</tt> structure.
+A pointer to this structure must be passed in to each SRCU function,
+for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
+<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
+The key benefit of these domains is that a slow SRCU reader in one
+domain does not delay an SRCU grace period in some other domain.
+That said, one consequence of these domains is that read-side code
+must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
+to <tt>srcu_read_unlock()</tt>, for example, as follows:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+As noted above, it is legal to block within SRCU read-side critical sections,
+however, with great power comes great responsibility.
+If you block forever in one of a given domain's SRCU read-side critical
+sections, then that domain's grace periods will also be blocked forever.
+Of course, one good way to block forever is to deadlock, which can
+happen if any operation in a given domain's SRCU read-side critical
+section can block waiting, either directly or indirectly, for that domain's
+grace period to elapse.
+For example, this results in a self-deadlock:
+
+<blockquote>
+<pre>
+ 1 int idx;
+ 2
+ 3 idx = srcu_read_lock(&amp;ss);
+ 4 do_something();
+ 5 synchronize_srcu(&amp;ss);
+ 6 srcu_read_unlock(&amp;ss, idx);
+</pre>
+</blockquote>
+
+<p>
+However, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
+deadlock would still be possible.
+Furthermore, if line&nbsp;5 acquired a mutex that was held across
+a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
+and if an <tt>ss1</tt>-domain SRCU read-side critical section
+acquired another mutex that was held across as <tt>ss</tt>-domain
+<tt>synchronize_srcu()</tt>,
+deadlock would again be possible.
+Such a deadlock cycle could extend across an arbitrarily large number
+of different SRCU domains.
+Again, with great power comes great responsibility.
+
+<p>
+Unlike the other RCU flavors, SRCU read-side critical sections can
+run on idle and even offline CPUs.
+This ability requires that <tt>srcu_read_lock()</tt> and
+<tt>srcu_read_unlock()</tt> contain memory barriers, which means
+that SRCU readers will run a bit slower than would RCU readers.
+It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
+API, which, in combination with <tt>srcu_read_unlock()</tt>,
+guarantees a full memory barrier.
+
+<p>
+The
+<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
+includes
+<tt>srcu_read_lock()</tt>,
+<tt>srcu_read_unlock()</tt>,
+<tt>srcu_dereference()</tt>,
+<tt>srcu_dereference_check()</tt>,
+<tt>synchronize_srcu()</tt>,
+<tt>synchronize_srcu_expedited()</tt>,
+<tt>call_srcu()</tt>,
+<tt>srcu_barrier()</tt>, and
+<tt>srcu_read_lock_held()</tt>.
+It also includes
+<tt>DEFINE_SRCU()</tt>,
+<tt>DEFINE_STATIC_SRCU()</tt>, and
+<tt>init_srcu_struct()</tt>
+APIs for defining and initializing <tt>srcu_struct</tt> structures.
+
+<h3><a name="Tasks RCU">Tasks RCU</a></h3>
+
+<p>
+Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
+binary rewriting required to install different types of probes.
+It would be good to be able to free old trampolines, which sounds
+like a job for some form of RCU.
+However, because it is necessary to be able to install a trace
+anywhere in the code, it is not possible to use read-side markers
+such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
+In addition, it does not work to have these markers in the trampoline
+itself, because there would need to be instructions following
+<tt>rcu_read_unlock()</tt>.
+Although <tt>synchronize_rcu()</tt> would guarantee that execution
+reached the <tt>rcu_read_unlock()</tt>, it would not be able to
+guarantee that execution had completely left the trampoline.
+
+<p>
+The solution, in the form of
+<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
+is to have implicit
+read-side critical sections that are delimited by voluntary context
+switches, that is, calls to <tt>schedule()</tt>,
+<tt>cond_resched_rcu_qs()</tt>, and
+<tt>synchronize_rcu_tasks()</tt>.
+In addition, transitions to and from userspace execution also delimit
+tasks-RCU read-side critical sections.
+
+<p>
+The tasks-RCU API is quite compact, consisting only of
+<tt>call_rcu_tasks()</tt>,
+<tt>synchronize_rcu_tasks()</tt>, and
+<tt>rcu_barrier_tasks()</tt>.
+
+<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
+
+<p>
+One of the tricks that RCU uses to attain update-side scalability is
+to increase grace-period latency with increasing numbers of CPUs.
+If this becomes a serious problem, it will be necessary to rework the
+grace-period state machine so as to avoid the need for the additional
+latency.
+
+<p>
+Expedited grace periods scan the CPUs, so their latency and overhead
+increases with increasing numbers of CPUs.
+If this becomes a serious problem on large systems, it will be necessary
+to do some redesign to avoid this scalability problem.
+
+<p>
+RCU disables CPU hotplug in a few places, perhaps most notably in the
+expedited grace-period and <tt>rcu_barrier()</tt> operations.
+If there is a strong reason to use expedited grace periods in CPU-hotplug
+notifiers, it will be necessary to avoid disabling CPU hotplug.
+This would introduce some complexity, so there had better be a <i>very</i>
+good reason.
+
+<p>
+The tradeoff between grace-period latency on the one hand and interruptions
+of other CPUs on the other hand may need to be re-examined.
+The desire is of course for zero grace-period latency as well as zero
+interprocessor interrupts undertaken during an expedited grace period
+operation.
+While this ideal is unlikely to be achievable, it is quite possible that
+further improvements can be made.
+
+<p>
+The multiprocessor implementations of RCU use a combining tree that
+groups CPUs so as to reduce lock contention and increase cache locality.
+However, this combining tree does not spread its memory across NUMA
+nodes nor does it align the CPU groups with hardware features such
+as sockets or cores.
+Such spreading and alignment is currently believed to be unnecessary
+because the hotpath read-side primitives do not access the combining
+tree, nor does <tt>call_rcu()</tt> in the common case.
+If you believe that your architecture needs such spreading and alignment,
+then your architecture should also benefit from the
+<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
+to the number of CPUs in a socket, NUMA node, or whatever.
+If the number of CPUs is too large, use a fraction of the number of
+CPUs.
+If the number of CPUs is a large prime number, well, that certainly
+is an &ldquo;interesting&rdquo; architectural choice!
+More flexible arrangements might be considered, but only if
+<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
+if the inadequacy has been demonstrated by a carefully run and
+realistic system-level workload.
+
+<p>
+Please note that arrangements that require RCU to remap CPU numbers will
+require extremely good demonstration of need and full exploration of
+alternatives.
+
+<p>
+There is an embarrassingly large number of flavors of RCU, and this
+number has been increasing over time.
+Perhaps it will be possible to combine some at some future date.
+
+<p>
+RCU's various kthreads are reasonably recent additions.
+It is quite likely that adjustments will be required to more gracefully
+handle extreme loads.
+It might also be necessary to be able to relate CPU utilization by
+RCU's kthreads and softirq handlers to the code that instigated this
+CPU utilization.
+For example, RCU callback overhead might be charged back to the
+originating <tt>call_rcu()</tt> instance, though probably not
+in production kernels.
+
+<h2><a name="Summary">Summary</a></h2>
+
+<p>
+This document has presented more than two decade's worth of RCU
+requirements.
+Given that the requirements keep changing, this will not be the last
+word on this subject, but at least it serves to get an important
+subset of the requirements set forth.
+
+<h2><a name="Acknowledgments">Acknowledgments</a></h2>
+
+I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
+Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
+Andy Lutomirski for their help in rendering
+this article human readable, and to Michelle Rankin for her support
+of this effort.
+Other contributions are acknowledged in the Linux kernel's git archive.
+The cartoon is copyright (c) 2013 by Melissa Broussard,
+and is provided
+under the terms of the Creative Commons Attribution-Share Alike 3.0
+United States license.
+
+<p>@@QQAL@@
+
+</body></html>
diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
new file mode 100755
index 000000000000..d354f069559b
--- /dev/null
+++ b/Documentation/RCU/Design/htmlqqz.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+#
+# Usage: sh htmlqqz.sh file
+#
+# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
+# Commands, all of which must be on a line by themselves:
+#
+# "<p>@@QQ@@": Start of a quick quiz.
+# "<p>@@QQA@@": Start of a quick-quiz answer.
+# "<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
+# "<p>@@QQAL@@": Place to put quick-quiz answer list.
+#
+# Places the result in file.html.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
+
+fn=$1
+if test ! -r $fn.htmlx
+then
+ echo "Error: $fn.htmlx unreadable."
+ exit 1
+fi
+
+echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
+echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
+awk < $fn.htmlx >> $fn.html '
+
+state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
+ print $0;
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
+ next;
+}
+
+state == "" && $1 == "<p>@@QQ@@" {
+ qqn++;
+ qqlineno = NR;
+ haveqq = 1;
+ state = "qq";
+ print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
+ next;
+}
+
+state == "qq" && $1 != "<p>@@QQA@@" {
+ qq[qqn] = qq[qqn] $0 "\n";
+ print $0
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
+ next;
+}
+
+state == "qq" && $1 == "<p>@@QQA@@" {
+ state = "qqa";
+ print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
+ next;
+}
+
+state == "qqa" && $1 != "<p>@@QQE@@" {
+ qqa[qqn] = qqa[qqn] $0 "\n";
+ if ($0 ~ /^<p>@@QQ/)
+ print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
+ next;
+}
+
+state == "qqa" && $1 == "<p>@@QQE@@" {
+ state = "";
+ next;
+}
+
+state == "" && $1 == "<p>@@QQAL@@" {
+ haveqq = "";
+ print "<h3><a name=\"Answers to Quick Quizzes\">"
+ print "Answers to Quick Quizzes</a></h3>"
+ print "";
+ for (i = 1; i <= qqn; i++) {
+ print "<a name=\"qq" i "answer\"></a>"
+ print "<p><b>Quick Quiz " i "</b>:"
+ print qq[i];
+ print "";
+ print "</p><p><b>Answer</b>:"
+ print qqa[i];
+ print "";
+ print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
+ print "";
+ }
+ next;
+}
+
+END {
+ if (state != "")
+ print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
+ else if (haveqq)
+ print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
+}'
diff --git a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt b/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt
index 20ac9bbfa1fd..60872838f1ad 100644
--- a/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt
+++ b/Documentation/devicetree/bindings/ata/brcm,sata-brcmstb.txt
@@ -4,7 +4,9 @@ SATA nodes are defined to describe on-chip Serial ATA controllers.
Each SATA controller should have its own node.
Required properties:
-- compatible : compatible list, may contain "brcm,bcm7445-ahci" and/or
+- compatible : should be one or more of
+ "brcm,bcm7425-ahci"
+ "brcm,bcm7445-ahci"
"brcm,sata3-ahci"
- reg : register mappings for AHCI and SATA_TOP_CTRL
- reg-names : "ahci" and "top-ctrl"
diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt
index 2493a5a31655..0764f9ab63dc 100644
--- a/Documentation/devicetree/bindings/ata/sata_rcar.txt
+++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt
@@ -8,6 +8,7 @@ Required properties:
- "renesas,sata-r8a7790" for R-Car H2 other than ES1
- "renesas,sata-r8a7791" for R-Car M2-W
- "renesas,sata-r8a7793" for R-Car M2-N
+ - "renesas,sata-r8a7795" for R-Car H3
- reg : address and length of the SATA registers;
- interrupts : must consist of one interrupt specifier.
- clocks : must contain a reference to the functional clock.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt
index d1c5cdabc3e0..81cd3692405e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt
@@ -4,7 +4,7 @@ Allwinner Sunxi NMI Controller
Required properties:
- compatible : should be "allwinner,sun7i-a20-sc-nmi" or
- "allwinner,sun6i-a31-sc-nmi"
+ "allwinner,sun6i-a31-sc-nmi" or "allwinner,sun9i-a80-nmi"
- reg : Specifies base physical address and size of the registers.
- interrupt-controller : Identifies the node as an interrupt controller
- #interrupt-cells : Specifies the number of cells needed to encode an
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
index cc56021eb60b..5a1cb4bc3dfe 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt
@@ -18,6 +18,7 @@ Main node required properties:
"arm,cortex-a9-gic"
"arm,gic-400"
"arm,pl390"
+ "arm,tc11mp-gic"
"brcm,brahma-b15-gic"
"qcom,msm-8660-qgic"
"qcom,msm-qgic2"
diff --git a/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
new file mode 100644
index 000000000000..720f7c92e9a1
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/hisilicon,mbigen-v2.txt
@@ -0,0 +1,74 @@
+Hisilicon mbigen device tree bindings.
+=======================================
+
+Mbigen means: message based interrupt generator.
+
+MBI is kind of msi interrupt only used on Non-PCI devices.
+
+To reduce the wired interrupt number connected to GIC,
+Hisilicon designed mbigen to collect and generate interrupt.
+
+
+Non-pci devices can connect to mbigen and generate the
+interrupt by writing ITS register.
+
+The mbigen chip and devices connect to mbigen have the following properties:
+
+Mbigen main node required properties:
+-------------------------------------------
+- compatible: Should be "hisilicon,mbigen-v2"
+
+- reg: Specifies the base physical address and size of the Mbigen
+ registers.
+
+- interrupt controller: Identifies the node as an interrupt controller
+
+- msi-parent: Specifies the MSI controller this mbigen use.
+ For more detail information,please refer to the generic msi-parent binding in
+ Documentation/devicetree/bindings/interrupt-controller/msi.txt.
+
+- num-pins: the total number of pins implemented in this Mbigen
+ instance.
+
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value must be 2.
+
+ The 1st cell is hardware pin number of the interrupt.This number is local to
+ each mbigen chip and in the range from 0 to the maximum interrupts number
+ of the mbigen.
+
+ The 2nd cell is the interrupt trigger type.
+ The value of this cell should be:
+ 1: rising edge triggered
+ or
+ 4: high level triggered
+
+Examples:
+
+ mbigen_device_gmac:intc {
+ compatible = "hisilicon,mbigen-v2";
+ reg = <0x0 0xc0080000 0x0 0x10000>;
+ interrupt-controller;
+ msi-parent = <&its_dsa 0x40b1c>;
+ num-pins = <9>;
+ #interrupt-cells = <2>;
+ };
+
+Devices connect to mbigen required properties:
+----------------------------------------------------
+-interrupt-parent: Specifies the mbigen device node which device connected.
+
+-interrupts:Specifies the interrupt source.
+ For the specific information of each cell in this property,please refer to
+ the "interrupt-cells" description mentioned above.
+
+Examples:
+ gmac0: ethernet@c2080000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0 0xc2080000 0 0x20000>,
+ <0 0xc0000000 0 0x1000>;
+ interrupt-parent = <&mbigen_device_gmac>;
+ interrupts = <656 1>,
+ <657 1>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt
new file mode 100644
index 000000000000..7f15f1b0325b
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/technologic,ts4800.txt
@@ -0,0 +1,16 @@
+TS-4800 FPGA interrupt controller
+
+TS-4800 FPGA has an internal interrupt controller. When one of the
+interrupts is triggered, the SoC is notified, usually using a GPIO as
+parent interrupt source.
+
+Required properties:
+- compatible: should be "technologic,ts4800-irqc"
+- interrupt-controller: identifies the node as an interrupt controller
+- reg: physical base address of the controller and length of memory mapped
+ region
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: phandle to the parent interrupt controller this one is
+ cascaded from
+- interrupts: specifies the interrupt line in the interrupt-parent controller
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
index cae29eb5733d..ff611fa66871 100644
--- a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
@@ -11,6 +11,7 @@ Required properties:
- "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
- "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs
- "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs
+ - "renesas,mmcif-r8a7793" for the MMCIF found in r8a7793 SoCs
- "renesas,mmcif-r8a7794" for the MMCIF found in r8a7794 SoCs
- clocks: reference to the functional clock
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
index b321b26780dc..9213b27e1036 100644
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
@@ -17,7 +17,10 @@ Required properties:
"allwinner,sun8i-a23-pinctrl"
"allwinner,sun8i-a23-r-pinctrl"
"allwinner,sun8i-a33-pinctrl"
+ "allwinner,sun9i-a80-pinctrl"
+ "allwinner,sun9i-a80-r-pinctrl"
"allwinner,sun8i-a83t-pinctrl"
+ "allwinner,sun8i-h3-pinctrl"
- reg: Should contain the register physical address and length for the
pin controller.
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt
index 16589fb6f420..e4277921f3e3 100644
--- a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,iproc-gpio.txt
@@ -1,4 +1,4 @@
-Broadcom Cygnus GPIO/PINCONF Controller
+Broadcom iProc GPIO/PINCONF Controller
Required properties:
@@ -7,9 +7,12 @@ Required properties:
"brcm,cygnus-crmu-gpio" or "brcm,iproc-gpio"
- reg:
- Define the base and range of the I/O address space that contains the Cygnus
+ Define the base and range of the I/O address space that contains SoC
GPIO/PINCONF controller registers
+- ngpios:
+ Total number of in-use slots in GPIO controller
+
- #gpio-cells:
Must be two. The first cell is the GPIO pin number (within the
controller's pin space) and the second cell is used for the following:
@@ -57,6 +60,7 @@ Example:
compatible = "brcm,cygnus-ccm-gpio";
reg = <0x1800a000 0x50>,
<0x0301d164 0x20>;
+ ngpios = <24>;
#gpio-cells = <2>;
gpio-controller;
interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
@@ -78,6 +82,7 @@ Example:
gpio_asiu: gpio@180a5000 {
compatible = "brcm,cygnus-asiu-gpio";
reg = <0x180a5000 0x668>;
+ ngpios = <146>;
#gpio-cells = <2>;
gpio-controller;
interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt
new file mode 100644
index 000000000000..0844168a6dd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,nsp-gpio.txt
@@ -0,0 +1,80 @@
+Broadcom Northstar plus (NSP) GPIO/PINCONF Controller
+
+Required properties:
+- compatible:
+ Must be "brcm,nsp-gpio-a"
+
+- reg:
+ Should contain the register physical address and length for each of
+ GPIO base, IO control registers
+
+- #gpio-cells:
+ Must be two. The first cell is the GPIO pin number (within the
+ controller's pin space) and the second cell is used for the following:
+ bit[0]: polarity (0 for active high and 1 for active low)
+
+- gpio-controller:
+ Specifies that the node is a GPIO controller
+
+- ngpios:
+ Number of gpios supported (58x25 supports 32 and 58x23 supports 24)
+
+Optional properties:
+- interrupts:
+ Interrupt ID
+
+- interrupt-controller:
+ Specifies that the node is an interrupt controller
+
+- gpio-ranges:
+ Specifies the mapping between gpio controller and pin-controllers pins.
+ This requires 4 fields in cells defined as -
+ 1. Phandle of pin-controller.
+ 2. GPIO base pin offset.
+ 3 Pin-control base pin offset.
+ 4. number of gpio pins which are linearly mapped from pin base.
+
+Supported generic PINCONF properties in child nodes:
+- pins:
+ The list of pins (within the controller's own pin space) that properties
+ in the node apply to. Pin names are "gpio-<pin>"
+
+- bias-disable:
+ Disable pin bias
+
+- bias-pull-up:
+ Enable internal pull up resistor
+
+- bias-pull-down:
+ Enable internal pull down resistor
+
+- drive-strength:
+ Valid drive strength values include 2, 4, 6, 8, 10, 12, 14, 16 (mA)
+
+Example:
+
+ gpioa: gpio@18000020 {
+ compatible = "brcm,nsp-gpio-a";
+ reg = <0x18000020 0x100>,
+ <0x1803f1c4 0x1c>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ ngpios = <32>;
+ gpio-ranges = <&pinctrl 0 0 31>;
+ interrupt-controller;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
+
+ /* Hog a few default settings */
+ pinctrl-names = "default";
+ pinctrl-0 = <&led>;
+ led: led {
+ pins = "gpio-1";
+ bias-pull-up;
+ };
+
+ pwr: pwr {
+ gpio-hog;
+ gpios = <3 1>;
+ output-high;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt
index e89b4677567d..8e5216bcd748 100644
--- a/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt
+++ b/Documentation/devicetree/bindings/pinctrl/lantiq,pinctrl-xway.txt
@@ -1,7 +1,16 @@
Lantiq XWAY pinmux controller
Required properties:
-- compatible: "lantiq,pinctrl-xway" or "lantiq,pinctrl-xr9"
+- compatible: "lantiq,pinctrl-xway", (DEPRECATED: Use "lantiq,pinctrl-danube")
+ "lantiq,pinctrl-xr9", (DEPRECATED: Use "lantiq,xrx100-pinctrl" or
+ "lantiq,xrx200-pinctrl")
+ "lantiq,pinctrl-ase", (DEPRECATED: Use "lantiq,ase-pinctrl")
+ "lantiq,<chip>-pinctrl", where <chip> is:
+ "ase" (XWAY AMAZON Family)
+ "danube" (XWAY DANUBE Family)
+ "xrx100" (XWAY xRX100 Family)
+ "xrx200" (XWAY xRX200 Family)
+ "xrx300" (XWAY xRX300 Family)
- reg: Should contain the physical address and length of the gpio/pinmux
register range
@@ -36,19 +45,87 @@ Required subnode-properties:
Valid values for group and function names:
+XWAY: (DEPRECATED: Use DANUBE)
mux groups:
exin0, exin1, exin2, jtag, ebu a23, ebu a24, ebu a25, ebu clk, ebu cs1,
ebu wait, nand ale, nand cs1, nand cle, spi, spi_cs1, spi_cs2, spi_cs3,
- spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi , gpt1, gpt2,
+ spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi, gpt1, gpt2,
gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, gnt2, gnt3, req1, req2,
req3
- additional mux groups (XR9 only):
- mdio, nand rdy, nand rd, exin3, exin4, gnt4, req4
+ functions:
+ spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu
+
+XR9: ( DEPRECATED: Use xRX100/xRX200)
+ mux groups:
+ exin0, exin1, exin2, exin3, exin4, jtag, ebu a23, ebu a24, ebu a25,
+ ebu clk, ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy,
+ nand rd, spi, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5, spi_cs6,
+ asc0, asc0 cts rts, stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1,
+ clkout2, clkout3, gnt1, gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio,
+ gphy0 led0, gphy0 led1, gphy0 led2, gphy1 led0, gphy1 led1, gphy1 led2
+
+ functions:
+ spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, mdio, gphy
+
+AMAZON:
+ mux groups:
+ exin0, exin1, exin2, jtag, spi_di, spi_do, spi_clk, spi_cs1, spi_cs2,
+ spi_cs3, spi_cs4, spi_cs5, spi_cs6, asc, stp, gpt1, gpt2, gpt3, clkout0,
+ clkout1, clkout2, mdio, dfe led0, dfe led1, ephy led0, ephy led1, ephy led2
+
+ functions:
+ spi, asc, cgu, jtag, exin, stp, gpt, mdio, ephy, dfe
+
+DANUBE:
+ mux groups:
+ exin0, exin1, exin2, jtag, ebu a23, ebu a24, ebu a25, ebu clk, ebu cs1,
+ ebu wait, nand ale, nand cs1, nand cle, spi_di, spi_do, spi_clk, spi_cs1,
+ spi_cs2, spi_cs3, spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi,
+ gpt1, gpt2, gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, gnt2, gnt3,
+ req1, req2, req3, dfe led0, dfe led1
functions:
- spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, mdio
+ spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, dfe
+xRX100:
+ mux groups:
+ exin0, exin1, exin2, exin3, exin4, ebu a23, ebu a24, ebu a25, ebu clk,
+ ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy, nand rd,
+ spi_di, spi_do, spi_clk, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5,
+ spi_cs6, asc0, asc0 cts rts, stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1,
+ clkout2, clkout3, gnt1, gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio,
+ dfe led0, dfe led1
+
+ functions:
+ spi, asc, cgu, exin, stp, gpt, nmi, pci, ebu, mdio, dfe
+
+xRX200:
+ mux groups:
+ exin0, exin1, exin2, exin3, exin4, ebu a23, ebu a24, ebu a25, ebu clk,
+ ebu cs1, ebu wait, nand ale, nand cs1, nand cle, nand rdy, nand rd,
+ spi_di, spi_do, spi_clk, spi_cs1, spi_cs2, spi_cs3, spi_cs4, spi_cs5,
+ spi_cs6, usif uart_rx, usif uart_tx, usif uart_rts, usif uart_cts,
+ usif uart_dtr, usif uart_dsr, usif uart_dcd, usif uart_ri, usif spi_di,
+ usif spi_do, usif spi_clk, usif spi_cs0, usif spi_cs1, usif spi_cs2,
+ stp, nmi, gpt1, gpt2, gpt3, clkout0, clkout1, clkout2, clkout3, gnt1,
+ gnt2, gnt3, gnt4, req1, req2, req3, req4, mdio, dfe led0, dfe led1,
+ gphy0 led0, gphy0 led1, gphy0 led2, gphy1 led0, gphy1 led1, gphy1 led2
+
+ functions:
+ spi, usif, cgu, exin, stp, gpt, nmi, pci, ebu, mdio, dfe, gphy
+
+xRX300:
+ mux groups:
+ exin0, exin1, exin2, exin4, nand ale, nand cs0, nand cs1, nand cle,
+ nand rdy, nand rd, nand_d0, nand_d1, nand_d2, nand_d3, nand_d4, nand_d5,
+ nand_d6, nand_d7, nand_d1, nand wr, nand wp, nand se, spi_di, spi_do,
+ spi_clk, spi_cs1, spi_cs4, spi_cs6, usif uart_rx, usif uart_tx,
+ usif spi_di, usif spi_do, usif spi_clk, usif spi_cs0, stp, clkout2,
+ mdio, dfe led0, dfe led1, ephy0 led0, ephy0 led1, ephy1 led0, ephy1 led1
+
+ functions:
+ spi, usif, cgu, exin, stp, ebu, mdio, dfe, ephy
Definition of pin configurations:
@@ -62,15 +139,32 @@ Optional subnode-properties:
0: none, 1: down, 2: up.
- lantiq,open-drain: Boolean, enables open-drain on the defined pin.
-Valid values for XWAY pin names:
+Valid values for XWAY pin names: (DEPRECATED: Use DANUBE)
Pinconf pins can be referenced via the names io0-io31.
-Valid values for XR9 pin names:
+Valid values for XR9 pin names: (DEPRECATED: Use xrX100/xRX200)
Pinconf pins can be referenced via the names io0-io55.
+Valid values for AMAZON pin names:
+ Pinconf pins can be referenced via the names io0-io31.
+
+Valid values for DANUBE pin names:
+ Pinconf pins can be referenced via the names io0-io31.
+
+Valid values for xRX100 pin names:
+ Pinconf pins can be referenced via the names io0-io55.
+
+Valid values for xRX200 pin names:
+ Pinconf pins can be referenced via the names io0-io49.
+
+Valid values for xRX300 pin names:
+ Pinconf pins can be referenced via the names io0-io1,io3-io6,io8-io11,
+ io13-io19,io23-io27,io34-io36,
+ io42-io43,io48-io61.
+
Example:
gpio: pinmux@E100B10 {
- compatible = "lantiq,pinctrl-xway";
+ compatible = "lantiq,danube-pinctrl";
pinctrl-names = "default";
pinctrl-0 = <&state_default>;
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
index 0480bc31bfd7..9ffb0b276bb4 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
@@ -4,10 +4,11 @@ The Mediatek's Pin controller is used to control SoC pins.
Required properties:
- compatible: value should be one of the following.
- (a) "mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl.
- (b) "mediatek,mt8173-pinctrl", compatible with mt8173 pinctrl.
- (c) "mediatek,mt6397-pinctrl", compatible with mt6397 pinctrl.
- (d) "mediatek,mt8127-pinctrl", compatible with mt8127 pinctrl.
+ "mediatek,mt2701-pinctrl", compatible with mt2701 pinctrl.
+ "mediatek,mt6397-pinctrl", compatible with mt6397 pinctrl.
+ "mediatek,mt8127-pinctrl", compatible with mt8127 pinctrl.
+ "mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl.
+ "mediatek,mt8173-pinctrl", compatible with mt8173 pinctrl.
- pins-are-numbered: Specify the subnodes are using numbered pinmux to
specify pins.
- gpio-controller : Marks the device node as a gpio controller.
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt
new file mode 100644
index 000000000000..e312a71b2f94
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8996-pinctrl.txt
@@ -0,0 +1,199 @@
+Qualcomm MSM8996 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+MSM8996 platform.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be "qcom,msm8996-pinctrl"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the TLMM register space.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/gpio/gpio.h>
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of gpio pins affected by the properties specified in
+ this subnode.
+
+ Valid pins are:
+ gpio0-gpio149
+ Supports mux, bias and drive-strength
+
+ sdc1_clk, sdc1_cmd, sdc1_data sdc2_clk, sdc2_cmd,
+ sdc2_data sdc1_rclk
+ Supports bias and drive-strength
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Functions are only valid for gpio pins.
+ Valid values are:
+
+ blsp_uart1, blsp_spi1, blsp_i2c1, blsp_uim1, atest_tsens,
+ bimc_dte1, dac_calib0, blsp_spi8, blsp_uart8, blsp_uim8,
+ qdss_cti_trig_out_b, bimc_dte0, dac_calib1, qdss_cti_trig_in_b,
+ dac_calib2, atest_tsens2, atest_usb1, blsp_spi10, blsp_uart10,
+ blsp_uim10, atest_bbrx1, atest_usb13, atest_bbrx0, atest_usb12,
+ mdp_vsync, edp_lcd, blsp_i2c10, atest_gpsadc1, atest_usb11,
+ atest_gpsadc0, edp_hot, atest_usb10, m_voc, dac_gpio, atest_char,
+ cam_mclk, pll_bypassnl, qdss_stm7, blsp_i2c8, qdss_tracedata_b,
+ pll_reset, qdss_stm6, qdss_stm5, qdss_stm4, atest_usb2, cci_i2c,
+ qdss_stm3, dac_calib3, atest_usb23, atest_char3, dac_calib4,
+ qdss_stm2, atest_usb22, atest_char2, qdss_stm1, dac_calib5,
+ atest_usb21, atest_char1, dbg_out, qdss_stm0, dac_calib6,
+ atest_usb20, atest_char0, dac_calib10, qdss_stm10,
+ qdss_cti_trig_in_a, cci_timer4, blsp_spi6, blsp_uart6, blsp_uim6,
+ blsp2_spi, qdss_stm9, qdss_cti_trig_out_a, dac_calib11,
+ qdss_stm8, cci_timer0, qdss_stm13, dac_calib7, cci_timer1,
+ qdss_stm12, dac_calib8, cci_timer2, blsp1_spi, qdss_stm11,
+ dac_calib9, cci_timer3, cci_async, dac_calib12, blsp_i2c6,
+ qdss_tracectl_a, dac_calib13, qdss_traceclk_a, dac_calib14,
+ dac_calib15, hdmi_rcv, dac_calib16, hdmi_cec, pwr_modem,
+ dac_calib17, hdmi_ddc, pwr_nav, dac_calib18, pwr_crypto,
+ dac_calib19, hdmi_hot, dac_calib20, dac_calib21, pci_e0,
+ dac_calib22, dac_calib23, dac_calib24, tsif1_sync, dac_calib25,
+ sd_write, tsif1_error, blsp_spi2, blsp_uart2, blsp_uim2,
+ qdss_cti, blsp_i2c2, blsp_spi3, blsp_uart3, blsp_uim3, blsp_i2c3,
+ uim3, blsp_spi9, blsp_uart9, blsp_uim9, blsp10_spi, blsp_i2c9,
+ blsp_spi7, blsp_uart7, blsp_uim7, qdss_tracedata_a, blsp_i2c7,
+ qua_mi2s, gcc_gp1_clk_a, ssc_irq, uim4, blsp_spi11, blsp_uart11,
+ blsp_uim11, gcc_gp2_clk_a, gcc_gp3_clk_a, blsp_i2c11, cri_trng0,
+ cri_trng1, cri_trng, qdss_stm18, pri_mi2s, qdss_stm17, blsp_spi4,
+ blsp_uart4, blsp_uim4, qdss_stm16, qdss_stm15, blsp_i2c4,
+ qdss_stm14, dac_calib26, spkr_i2s, audio_ref, lpass_slimbus,
+ isense_dbg, tsense_pwm1, tsense_pwm2, btfm_slimbus, ter_mi2s,
+ qdss_stm22, qdss_stm21, qdss_stm20, qdss_stm19, gcc_gp1_clk_b,
+ sec_mi2s, blsp_spi5, blsp_uart5, blsp_uim5, gcc_gp2_clk_b,
+ gcc_gp3_clk_b, blsp_i2c5, blsp_spi12, blsp_uart12, blsp_uim12,
+ qdss_stm25, qdss_stm31, blsp_i2c12, qdss_stm30, qdss_stm29,
+ tsif1_clk, qdss_stm28, tsif1_en, tsif1_data, sdc4_cmd, qdss_stm27,
+ qdss_traceclk_b, tsif2_error, sdc43, vfr_1, qdss_stm26, tsif2_clk,
+ sdc4_clk, qdss_stm24, tsif2_en, sdc42, qdss_stm23, qdss_tracectl_b,
+ sd_card, tsif2_data, sdc41, tsif2_sync, sdc40, mdp_vsync_p_b,
+ ldo_en, mdp_vsync_s_b, ldo_update, blsp11_uart_tx_b, blsp11_uart_rx_b,
+ blsp11_i2c_sda_b, prng_rosc, blsp11_i2c_scl_b, uim2, uim1, uim_batt,
+ pci_e2, pa_indicator, adsp_ext, ddr_bist, qdss_tracedata_11,
+ qdss_tracedata_12, modem_tsync, nav_dr, nav_pps, pci_e1, gsm_tx,
+ qspi_cs, ssbi2, ssbi1, mss_lte, qspi_clk, qspi0, qspi1, qspi2, qspi3,
+ gpio
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as no pull.
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull down.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull up.
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+ Not valid for sdc pins.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+ Not valid for sdc pins.
+
+- drive-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the drive strength for the specified pins, in mA.
+ Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+ tlmm: pinctrl@01010000 {
+ compatible = "qcom,msm8996-pinctrl";
+ reg = <0x01010000 0x300000>;
+ interrupts = <0 208 0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ uart_console_active: uart_console_active {
+ mux {
+ pins = "gpio4", "gpio5";
+ function = "blsp_uart8";
+ };
+
+ config {
+ pins = "gpio4", "gpio5";
+ drive-strength = <2>;
+ bias-disable;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
index 1ae63c0acd40..a90c812ad642 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
@@ -14,6 +14,7 @@ PMIC's from Qualcomm.
"qcom,pm8917-gpio"
"qcom,pm8921-gpio"
"qcom,pm8941-gpio"
+ "qcom,pm8994-gpio"
"qcom,pma8084-gpio"
- reg:
@@ -79,6 +80,7 @@ to specify in a pin configuration subnode:
gpio1-gpio38 for pm8917
gpio1-gpio44 for pm8921
gpio1-gpio36 for pm8941
+ gpio1-gpio22 for pm8994
gpio1-gpio22 for pma8084
- function:
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
index d7803a2a94e9..d74e631e10da 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
@@ -15,6 +15,7 @@ of PMIC's from Qualcomm.
"qcom,pm8917-mpp",
"qcom,pm8921-mpp",
"qcom,pm8941-mpp",
+ "qcom,pm8994-mpp",
"qcom,pma8084-mpp",
- reg:
diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
index 391ef4be8d50..0cd701b1947f 100644
--- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
@@ -21,7 +21,8 @@ defined as gpio sub-nodes of the pinmux controller.
Required properties for iomux controller:
- compatible: one of "rockchip,rk2928-pinctrl", "rockchip,rk3066a-pinctrl"
"rockchip,rk3066b-pinctrl", "rockchip,rk3188-pinctrl"
- "rockchip,rk3288-pinctrl", "rockchip,rk3368-pinctrl"
+ "rockchip,rk3228-pinctrl", "rockchip,rk3288-pinctrl"
+ "rockchip,rk3368-pinctrl"
- rockchip,grf: phandle referencing a syscon providing the
"general register files"
diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
index 9d2a995293e6..6db16b90873a 100644
--- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
@@ -17,6 +17,7 @@ Required Properties:
- "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller.
- "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller.
- "samsung,exynos5260-pinctrl": for Exynos5260 compatible pin-controller.
+ - "samsung,exynos5410-pinctrl": for Exynos5410 compatible pin-controller.
- "samsung,exynos5420-pinctrl": for Exynos5420 compatible pin-controller.
- "samsung,exynos7-pinctrl": for Exynos7 compatible pin-controller.
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 06d443450f21..619af9bfdcb3 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,7 @@ prototypes:
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- const char *(*follow_link) (struct dentry *, void **);
- void (*put_link) (struct inode *, void *);
+ const char *(*get_link) (struct dentry *, struct inode *, void **);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, unsigned int);
int (*get_acl)(struct inode *, int);
@@ -83,8 +82,7 @@ rmdir: yes (both) (see below)
rename: yes (all) (see below)
rename2: yes (all) (see below)
readlink: no
-follow_link: no
-put_link: no
+get_link: no
setattr: yes
permission: no (may not block if called in rcu-walk mode)
get_acl: no
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f24d1b833957..0f88e6020487 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -504,3 +504,20 @@ in your dentry operations instead.
[mandatory]
__fd_install() & fd_install() can now sleep. Callers should not
hold a spinlock or other resources that do not allow a schedule.
+--
+[mandatory]
+ any symlink that might use page_follow_link_light/page_put_link() must
+ have inode_nohighmem(inode) called before anything might start playing with
+ its pagecache.
+--
+[mandatory]
+ ->follow_link() is replaced with ->get_link(); same API, except that
+ * ->get_link() gets inode as a separate argument
+ * ->get_link() may be called in RCU mode - in that case NULL
+ dentry is passed
+--
+[mandatory]
+ ->get_link() gets struct delayed_call *done now, and should do
+ set_delayed_call() where it used to set *cookie.
+ ->put_link() is gone - just give the destructor to set_delayed_call()
+ in ->get_link().
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 8c6f07ad373a..b02a7d598258 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- const char *(*follow_link) (struct dentry *, void **);
- void (*put_link) (struct inode *, void *);
+ const char *(*get_link) (struct dentry *, struct inode *,
+ struct delayed_call *);
int (*permission) (struct inode *, int);
int (*get_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
@@ -434,20 +434,19 @@ otherwise noted.
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
- follow_link: called by the VFS to follow a symbolic link to the
+ get_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
symbolic links. This method returns the symlink body
to traverse (and possibly resets the current position with
nd_jump_link()). If the body won't go away until the inode
is gone, nothing else is needed; if it needs to be otherwise
- pinned, the data needed to release whatever we'd grabbed
- is to be stored in void * variable passed by address to
- follow_link() instance.
-
- put_link: called by the VFS to release resources allocated by
- follow_link(). The cookie stored by follow_link() is passed
- to this method as the last parameter; only called when
- cookie isn't NULL.
+ pinned, arrange for its release by having get_link(..., ..., done)
+ do set_delayed_call(done, destructor, argument).
+ In that case destructor(argument) will be called once VFS is
+ done with the body you've returned.
+ May be called in RCU mode; that is indicated by NULL dentry
+ argument. If request can't be handled without leaving RCU mode,
+ have it return ERR_PTR(-ECHILD).
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d18fc8..1a8169ba29e6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -472,6 +472,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Change the amount of debugging information output
when initialising the APIC and IO-APIC components.
+ apic_extnmi= [APIC,X86] External NMI delivery setting
+ Format: { bsp (default) | all | none }
+ bsp: External NMI is delivered only to CPU 0
+ all: External NMIs are broadcast to all CPUs as a
+ backup of CPU 0
+ none: External NMI is masked for all CPUs. This is
+ useful so that a dump capture kernel won't be
+ shot down by NMI
+
autoconf= [IPV6]
See Documentation/networking/ipv6.txt.
@@ -3296,18 +3305,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
rcutorture.verbose= [KNL]
Enable additional printk() statements.
+ rcupdate.rcu_cpu_stall_suppress= [KNL]
+ Suppress RCU CPU stall warning messages.
+
+ rcupdate.rcu_cpu_stall_timeout= [KNL]
+ Set timeout for RCU CPU stall warning messages.
+
rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for
example, synchronize_rcu_expedited() instead
of synchronize_rcu(). This reduces latency,
but can increase CPU utilization, degrade
real-time latency, and degrade energy efficiency.
-
- rcupdate.rcu_cpu_stall_suppress= [KNL]
- Suppress RCU CPU stall warning messages.
-
- rcupdate.rcu_cpu_stall_timeout= [KNL]
- Set timeout for RCU CPU stall warning messages.
+ No effect on CONFIG_TINY_RCU kernels.
+
+ rcupdate.rcu_normal= [KNL]
+ Use only normal grace-period primitives,
+ for example, synchronize_rcu() instead of
+ synchronize_rcu_expedited(). This improves
+ real-time latency, CPU utilization, and
+ energy efficiency, but can expose users to
+ increased grace-period latency. This parameter
+ overrides rcupdate.rcu_expedited. No effect on
+ CONFIG_TINY_RCU kernels.
+
+ rcupdate.rcu_normal_after_boot= [KNL]
+ Once boot has completed (that is, after
+ rcu_end_inkernel_boot() has been invoked), use
+ only normal grace-period primitives. No effect
+ on CONFIG_TINY_RCU kernels.
rcupdate.rcu_task_stall_timeout= [KNL]
Set timeout in jiffies for RCU task stall warning
@@ -4114,6 +4140,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
or other driver-specific files in the
Documentation/watchdog/ directory.
+ workqueue.watchdog_thresh=
+ If CONFIG_WQ_WATCHDOG is configured, workqueue can
+ warn stall conditions and dump internal state to
+ help debugging. 0 disables workqueue stall
+ detection; otherwise, it's the stall threshold
+ duration in seconds. The default value is 30 and
+ it can be updated at runtime by writing to the
+ corresponding sysfs file.
+
workqueue.disable_numa
By default, all work items queued to unbound
workqueues are affine to the NUMA nodes they're
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index aef9487303d0..a61be39c7b51 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,7 +194,7 @@ There are some minimal guarantees that may be expected of a CPU:
(*) On any given CPU, dependent memory accesses will be issued in order, with
respect to itself. This means that for:
- WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+ Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
the CPU will issue the following memory operations:
@@ -202,9 +202,9 @@ There are some minimal guarantees that may be expected of a CPU:
and always in that order. On most systems, smp_read_barrier_depends()
does nothing, but it is required for DEC Alpha. The READ_ONCE()
- and WRITE_ONCE() are required to prevent compiler mischief. Please
- note that you should normally use something like rcu_dereference()
- instead of open-coding smp_read_barrier_depends().
+ is required to prevent compiler mischief. Please note that you
+ should normally use something like rcu_dereference() instead of
+ open-coding smp_read_barrier_depends().
(*) Overlapping loads and stores within a particular CPU will appear to be
ordered within that CPU. This means that for:
@@ -1673,8 +1673,8 @@ There are some more advanced barrier functions:
(*) smp_store_mb(var, value)
This assigns the value to the variable and then inserts a full memory
- barrier after it, depending on the function. It isn't guaranteed to
- insert anything more than a compiler barrier in a UP compilation.
+ barrier after it. It isn't guaranteed to insert anything more than a
+ compiler barrier in a UP compilation.
(*) smp_mb__before_atomic();
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index af70d1541d3a..73c6b1ef0e84 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -551,6 +551,21 @@ the recommended setting is 60.
==============================================================
+panic_on_io_nmi:
+
+Controls the kernel's behavior when a CPU receives an NMI caused by
+an IO error.
+
+0: try to continue operation (default)
+
+1: panic immediately. The IO error triggered an NMI. This indicates a
+ serious system condition which could result in IO data corruption.
+ Rather than continuing, panicking might be a better choice. Some
+ servers issue this sort of NMI when the dump button is pushed,
+ and you can use this option to take a crash dump.
+
+==============================================================
+
panic_on_oops:
Controls the kernel's behaviour when an oops or BUG is encountered.
diff --git a/Documentation/trace/events-msr.txt b/Documentation/trace/events-msr.txt
new file mode 100644
index 000000000000..78c383bf06aa
--- /dev/null
+++ b/Documentation/trace/events-msr.txt
@@ -0,0 +1,37 @@
+
+The x86 kernel supports tracing most MSR (Model Specific Register) accesses.
+To see the definition of the MSRs on Intel systems please see the SDM
+at http://www.intel.com/sdm (Volume 3)
+
+Available trace points:
+
+/sys/kernel/debug/tracing/events/msr/
+
+Trace MSR reads
+
+read_msr
+
+msr: MSR number
+val: Value written
+failed: 1 if the access failed, otherwise 0
+
+
+Trace MSR writes
+
+write_msr
+
+msr: MSR number
+val: Value written
+failed: 1 if the access failed, otherwise 0
+
+
+Trace RDPMC in kernel
+
+rdpmc
+
+The trace data can be post processed with the postprocess/decode_msr.py script
+
+cat /sys/kernel/debug/tracing/trace | decode_msr.py /usr/src/linux/include/asm/msr-index.h
+
+to add symbolic MSR names.
+
diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py
new file mode 100644
index 000000000000..0ab40e0db580
--- /dev/null
+++ b/Documentation/trace/postprocess/decode_msr.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# add symbolic names to read_msr / write_msr in trace
+# decode_msr msr-index.h < trace
+import sys
+import re
+
+msrs = dict()
+
+with open(sys.argv[1] if len(sys.argv) > 1 else "msr-index.h", "r") as f:
+ for j in f:
+ m = re.match(r'#define (MSR_\w+)\s+(0x[0-9a-fA-F]+)', j)
+ if m:
+ msrs[int(m.group(2), 16)] = m.group(1)
+
+extra_ranges = (
+ ( "MSR_LASTBRANCH_%d_FROM_IP", 0x680, 0x69F ),
+ ( "MSR_LASTBRANCH_%d_TO_IP", 0x6C0, 0x6DF ),
+ ( "LBR_INFO_%d", 0xdc0, 0xddf ),
+)
+
+for j in sys.stdin:
+ m = re.search(r'(read|write)_msr:\s+([0-9a-f]+)', j)
+ if m:
+ r = None
+ num = int(m.group(2), 16)
+ if num in msrs:
+ r = msrs[num]
+ else:
+ for er in extra_ranges:
+ if er[1] <= num <= er[2]:
+ r = er[0] % (num - er[1],)
+ break
+ if r:
+ j = j.replace(" " + m.group(2), " " + r + "(" + m.group(2) + ")")
+ print j,
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 233f83464814..a394704a9003 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1896,7 +1896,6 @@ ATMEL AT91 / AT32 MCI DRIVER
M: Ludovic Desroches <ludovic.desroches@atmel.com>
S: Maintained
F: drivers/mmc/host/atmel-mci.c
-F: drivers/mmc/host/atmel-mci-regs.h
ATMEL AT91 / AT32 SERIAL DRIVER
M: Nicolas Ferre <nicolas.ferre@atmel.com>
@@ -5622,9 +5621,7 @@ F: Documentation/trace/intel_th.txt
F: drivers/hwtracing/intel_th/
INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
-M: Richard L Maliszewski <richard.l.maliszewski@intel.com>
-M: Gang Wei <gang.wei@intel.com>
-M: Shane Wang <shane.wang@intel.com>
+M: Ning Sun <ning.sun@intel.com>
L: tboot-devel@lists.sourceforge.net
W: http://tboot.sourceforge.net
T: hg http://tboot.hg.sourceforge.net:8000/hgroot/tboot/tboot
@@ -8369,6 +8366,7 @@ F: drivers/pinctrl/intel/
PIN CONTROLLER - RENESAS
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+M: Geert Uytterhoeven <geert+renesas@glider.be>
L: linux-sh@vger.kernel.org
S: Maintained
F: drivers/pinctrl/sh-pfc/
@@ -8639,6 +8637,7 @@ S: Maintained
F: arch/arm/mach-pxa/
F: drivers/dma/pxa*
F: drivers/pcmcia/pxa2xx*
+F: drivers/pinctrl/pxa/
F: drivers/spi/spi-pxa2xx*
F: drivers/usb/gadget/udc/pxa2*
F: include/sound/pxa2xx-lib.h
@@ -9366,7 +9365,7 @@ M: Andreas Noever <andreas.noever@gmail.com>
S: Maintained
F: drivers/thunderbolt/
-TIMEKEEPING, CLOCKSOURCE CORE, NTP
+TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
M: John Stultz <john.stultz@linaro.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
@@ -9379,6 +9378,7 @@ F: include/uapi/linux/time.h
F: include/uapi/linux/timex.h
F: kernel/time/clocksource.c
F: kernel/time/time*.c
+F: kernel/time/alarmtimer.c
F: kernel/time/ntp.c
F: tools/testing/selftests/timers/
diff --git a/Makefile b/Makefile
index 9d94adeceab6..70dea02f1346 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION =
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 34e1569a11ee..688dc7b0d951 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -611,6 +611,7 @@ config ARCH_PXA
select AUTO_ZRELADDR
select COMMON_CLK
select CLKDEV_LOOKUP
+ select CLKSRC_PXA
select CLKSRC_MMIO
select CLKSRC_OF
select GENERIC_CLOCKEVENTS
@@ -650,6 +651,8 @@ config ARCH_SA1100
select ARCH_SPARSEMEM_ENABLE
select CLKDEV_LOOKUP
select CLKSRC_MMIO
+ select CLKSRC_PXA
+ select CLKSRC_OF if OF
select CPU_FREQ
select CPU_SA1100
select GENERIC_CLOCKEVENTS
diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
index 314f59c12162..d0c743853318 100644
--- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
+++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi
@@ -25,9 +25,9 @@
cache-sets = <512>;
cache-line-size = <32>;
/* At full speed latency must be >=2 */
- arm,tag-latency = <2>;
- arm,data-latency = <2 2>;
- arm,dirty-latency = <2>;
+ arm,tag-latency = <8>;
+ arm,data-latency = <8 8>;
+ arm,dirty-latency = <8>;
};
mtu0: mtu@101e2000 {
diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts
index 01f40197ea13..3279bf1a17a1 100644
--- a/arch/arm/boot/dts/versatile-ab.dts
+++ b/arch/arm/boot/dts/versatile-ab.dts
@@ -110,7 +110,11 @@
interrupt-parent = <&vic>;
interrupts = <31>; /* Cascaded to vic */
clear-mask = <0xffffffff>;
- valid-mask = <0xffc203f8>;
+ /*
+ * Valid interrupt lines mask according to
+ * table 4-36 page 4-50 of ARM DUI 0225D
+ */
+ valid-mask = <0x0760031b>;
};
dma@10130000 {
@@ -266,8 +270,8 @@
};
mmc@5000 {
compatible = "arm,pl180", "arm,primecell";
- reg = < 0x5000 0x1000>;
- interrupts-extended = <&vic 22 &sic 2>;
+ reg = <0x5000 0x1000>;
+ interrupts-extended = <&vic 22 &sic 1>;
clocks = <&xtal24mhz>, <&pclk>;
clock-names = "mclk", "apb_pclk";
};
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index b83137f66034..33a8eb28374e 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -5,6 +5,16 @@
compatible = "arm,versatile-pb";
amba {
+ /* The Versatile PB is using more SIC IRQ lines than the AB */
+ sic: intc@10003000 {
+ clear-mask = <0xffffffff>;
+ /*
+ * Valid interrupt lines mask according to
+ * figure 3-30 page 3-74 of ARM DUI 0224B
+ */
+ valid-mask = <0x7fe003ff>;
+ };
+
gpio2: gpio@101e6000 {
compatible = "arm,pl061", "arm,primecell";
reg = <0x101e6000 0x1000>;
@@ -67,6 +77,13 @@
};
fpga {
+ mmc@5000 {
+ /*
+ * Overrides the interrupt assignment from
+ * the Versatile AB board file.
+ */
+ interrupts-extended = <&sic 22 &sic 23>;
+ };
uart@9000 {
compatible = "arm,pl011", "arm,primecell";
reg = <0x9000 0x1000>;
@@ -86,7 +103,8 @@
mmc@b000 {
compatible = "arm,pl180", "arm,primecell";
reg = <0xb000 0x1000>;
- interrupts-extended = <&vic 23 &sic 2>;
+ interrupt-parent = <&sic>;
+ interrupts = <1>, <2>;
clocks = <&xtal24mhz>, <&pclk>;
clock-names = "mclk", "apb_pclk";
};
diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi
index b1c59a766a13..e12213d16693 100644
--- a/arch/arm/boot/dts/wm8650.dtsi
+++ b/arch/arm/boot/dts/wm8650.dtsi
@@ -187,6 +187,15 @@
interrupts = <43>;
};
+ sdhc@d800a000 {
+ compatible = "wm,wm8505-sdhc";
+ reg = <0xd800a000 0x400>;
+ interrupts = <20>, <21>;
+ clocks = <&clksdhc>;
+ bus-width = <4>;
+ sdon-inverted;
+ };
+
fb: fb@d8050800 {
compatible = "wm,wm8505-fb";
reg = <0xd8050800 0x200>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 69a22fdb52a5..cd7b198fc79e 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -366,6 +366,7 @@ CONFIG_BATTERY_MAX17042=m
CONFIG_CHARGER_MAX14577=m
CONFIG_CHARGER_MAX77693=m
CONFIG_CHARGER_TPS65090=y
+CONFIG_AXP20X_POWER=m
CONFIG_POWER_RESET_AS3722=y
CONFIG_POWER_RESET_GPIO=y
CONFIG_POWER_RESET_GPIO_RESTART=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 3c36e16fcacf..b503a89441bf 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -84,6 +84,7 @@ CONFIG_SPI_SUN4I=y
CONFIG_SPI_SUN6I=y
CONFIG_GPIO_SYSFS=y
CONFIG_POWER_SUPPLY=y
+CONFIG_AXP20X_POWER=y
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
CONFIG_WATCHDOG=y
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 3a10f1a8317a..ff105399aae4 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -27,6 +27,7 @@ menuconfig ARCH_EXYNOS
select SRAM
select THERMAL
select MFD_SYSCON
+ select CLKSRC_EXYNOS_MCT
help
Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5)
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 17a6f752a436..7b76ce01c21d 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -149,8 +149,8 @@ static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg,
freq = 104;
break;
default:
- freq = 54;
- break;
+ pr_err("onenand rate not detected, bad GPMC async timings?\n");
+ freq = 0;
}
return freq;
@@ -271,6 +271,11 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
struct gpmc_timings t;
int ret;
+ /*
+ * Note that we need to keep sync_write set for the call to
+ * omap2_onenand_set_async_mode() to work to detect the onenand
+ * supported clock rate for the sync timings.
+ */
if (gpmc_onenand_data->of_node) {
gpmc_read_settings_dt(gpmc_onenand_data->of_node,
&onenand_async);
@@ -281,12 +286,9 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
else
gpmc_onenand_data->flags |= ONENAND_SYNC_READ;
onenand_async.sync_read = false;
- onenand_async.sync_write = false;
}
}
- omap2_onenand_set_async_mode(onenand_base);
-
omap2_onenand_calc_async_timings(&t);
ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
@@ -310,6 +312,8 @@ static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr)
if (!freq) {
/* Very first call freq is not known */
freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base);
+ if (!freq)
+ return -ENODEV;
set_onenand_cfg(onenand_base);
}
diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig
index 125865daaf17..12dd1dc0a041 100644
--- a/arch/arm/mach-sti/Kconfig
+++ b/arch/arm/mach-sti/Kconfig
@@ -3,6 +3,7 @@ menuconfig ARCH_STI
select ARM_GIC
select ST_IRQCHIP
select ARM_GLOBAL_TIMER
+ select CLKSRC_ST_LPC
select PINCTRL
select PINCTRL_ST
select MFD_SYSCON
diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index c9ac19b24e5a..5eacdd61e61c 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -32,6 +32,7 @@ config UX500_SOC_DB8500
select PINCTRL_AB8540
select REGULATOR
select REGULATOR_DB8500_PRCMU
+ select CLKSRC_DBX500_PRCMU
select PM_GENERIC_DOMAINS if PM
config MACH_MOP500
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 591f9db3bf40..93d0b6d0b63e 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -187,19 +187,6 @@ static inline int mem_words_used(struct jit_ctx *ctx)
return fls(ctx->seen & SEEN_MEM);
}
-static inline bool is_load_to_a(u16 inst)
-{
- switch (inst) {
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- return true;
- default:
- return false;
- }
-}
-
static void jit_fill_hole(void *area, unsigned int size)
{
u32 *ptr;
@@ -211,7 +198,6 @@ static void jit_fill_hole(void *area, unsigned int size)
static void build_prologue(struct jit_ctx *ctx)
{
u16 reg_set = saved_regs(ctx);
- u16 first_inst = ctx->skf->insns[0].code;
u16 off;
#ifdef CONFIG_FRAME_POINTER
@@ -241,7 +227,7 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_MOV_I(r_X, 0), ctx);
/* do not leak kernel data to userspace */
- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
emit(ARM_MOV_I(r_A, 0), ctx);
/* stack space for the BPF_MEM words */
@@ -770,7 +756,8 @@ load_ind:
case BPF_ALU | BPF_RSH | BPF_K:
if (unlikely(k > 31))
return -1;
- emit(ARM_LSR_I(r_A, r_A, k), ctx);
+ if (k)
+ emit(ARM_LSR_I(r_A, r_A, k), ctx);
break;
case BPF_ALU | BPF_RSH | BPF_X:
update_on_xread(ctx);
diff --git a/arch/blackfin/include/asm/cmpxchg.h b/arch/blackfin/include/asm/cmpxchg.h
index c05868cc61c1..253928854299 100644
--- a/arch/blackfin/include/asm/cmpxchg.h
+++ b/arch/blackfin/include/asm/cmpxchg.h
@@ -128,6 +128,5 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
#endif /* !CONFIG_SMP */
#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
-#define tas(ptr) ((void)xchg((ptr), 1))
#endif /* __ARCH_BLACKFIN_CMPXCHG__ */
diff --git a/arch/c6x/include/asm/cmpxchg.h b/arch/c6x/include/asm/cmpxchg.h
index b27c8cefb8c3..93d0a5a047a2 100644
--- a/arch/c6x/include/asm/cmpxchg.h
+++ b/arch/c6x/include/asm/cmpxchg.h
@@ -47,8 +47,6 @@ static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size)
#define xchg(ptr, x) \
((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \
sizeof(*(ptr))))
-#define tas(ptr) xchg((ptr), 1)
-
#include <asm-generic/cmpxchg-local.h>
diff --git a/arch/frv/include/asm/cmpxchg.h b/arch/frv/include/asm/cmpxchg.h
index 5b04dd0aecab..a899765102ea 100644
--- a/arch/frv/include/asm/cmpxchg.h
+++ b/arch/frv/include/asm/cmpxchg.h
@@ -69,8 +69,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
#endif
-#define tas(ptr) (xchg((ptr), 1))
-
/*****************************************************************************/
/*
* compare and conditionally exchange value with memory
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index dd3ac75776ad..2e20333cbce9 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -17,6 +17,7 @@ config H8300
select HAVE_MEMBLOCK
select HAVE_DMA_ATTRS
select CLKSRC_OF
+ select H8300_TMR8
config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index bb837cded268..f0e14f3a800d 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -3,40 +3,45 @@
#ifdef __KERNEL__
-#include <asm-generic/io.h>
-
/* H8/300 internal I/O functions */
-static inline unsigned char ctrl_inb(unsigned long addr)
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
{
- return *(volatile unsigned char *)addr;
+ return *(volatile u8 *)addr;
}
-static inline unsigned short ctrl_inw(unsigned long addr)
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
{
- return *(volatile unsigned short *)addr;
+ return *(volatile u16 *)addr;
}
-static inline unsigned long ctrl_inl(unsigned long addr)
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
{
- return *(volatile unsigned long *)addr;
+ return *(volatile u32 *)addr;
}
-static inline void ctrl_outb(unsigned char b, unsigned long addr)
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, const volatile void __iomem *addr)
{
- *(volatile unsigned char *)addr = b;
+ *(volatile u8 *)addr = b;
}
-static inline void ctrl_outw(unsigned short b, unsigned long addr)
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, const volatile void __iomem *addr)
{
- *(volatile unsigned short *)addr = b;
+ *(volatile u16 *)addr = b;
}
-static inline void ctrl_outl(unsigned long b, unsigned long addr)
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, const volatile void __iomem *addr)
{
- *(volatile unsigned long *)addr = b;
+ *(volatile u32 *)addr = b;
}
-static inline void ctrl_bclr(int b, unsigned char *addr)
+static inline void ctrl_bclr(int b, void __iomem *addr)
{
if (__builtin_constant_p(b))
__asm__("bclr %1,%0" : "+WU"(*addr): "i"(b));
@@ -44,7 +49,7 @@ static inline void ctrl_bclr(int b, unsigned char *addr)
__asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b));
}
-static inline void ctrl_bset(int b, unsigned char *addr)
+static inline void ctrl_bset(int b, void __iomem *addr)
{
if (__builtin_constant_p(b))
__asm__("bset %1,%0" : "+WU"(*addr): "i"(b));
@@ -52,6 +57,8 @@ static inline void ctrl_bset(int b, unsigned char *addr)
__asm__("bset %w1,%0" : "+WU"(*addr): "r"(b));
}
+#include <asm-generic/io.h>
+
#endif /* __KERNEL__ */
#endif /* _H8300_IO_H */
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index c772abe6d19c..e4985dfa91dc 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -207,14 +207,14 @@ device_initcall(device_probe);
#define get_wait(base, addr) ({ \
int baddr; \
baddr = ((addr) / 0x200000 * 2); \
- w *= (ctrl_inw((unsigned long)(base) + 2) & (3 << baddr)) + 1; \
+ w *= (readw((base) + 2) & (3 << baddr)) + 1; \
})
#endif
#if defined(CONFIG_CPU_H8S)
#define get_wait(base, addr) ({ \
int baddr; \
baddr = ((addr) / 0x200000 * 16); \
- w *= (ctrl_inl((unsigned long)(base) + 2) & (7 << baddr)) + 1; \
+ w *= (readl((base) + 2) & (7 << baddr)) + 1; \
})
#endif
@@ -228,8 +228,8 @@ static __init int access_timing(void)
bsc = of_find_compatible_node(NULL, NULL, "renesas,h8300-bsc");
base = of_iomap(bsc, 0);
- w = (ctrl_inb((unsigned long)base + 0) & bit)?2:1;
- if (ctrl_inb((unsigned long)base + 1) & bit)
+ w = (readb(base + 0) & bit)?2:1;
+ if (readb(base + 1) & bit)
w *= get_wait(base, addr);
else
w *= 2;
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index df896a1c41d3..209c4b817c95 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,7 +77,7 @@ do { \
___p1; \
})
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
/*
* The group barrier in front of the rsm & ssm are necessary to ensure
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 0ec484d2dcbc..b9295793a5e2 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -6,8 +6,6 @@
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
-#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
-
#ifdef __ASSEMBLY__
# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */
#else /* !__ASSEMBLY__ */
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 77cb27309db2..1a8c96035716 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -521,19 +521,6 @@ static inline u16 align_sp(unsigned int num)
return num;
}
-static bool is_load_to_a(u16 inst)
-{
- switch (inst) {
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- return true;
- default:
- return false;
- }
-}
-
static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
{
int i = 0, real_off = 0;
@@ -614,7 +601,6 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
static void build_prologue(struct jit_ctx *ctx)
{
- u16 first_inst = ctx->skf->insns[0].code;
int sp_off;
/* Calculate the total offset for the stack pointer */
@@ -641,7 +627,7 @@ static void build_prologue(struct jit_ctx *ctx)
emit_jit_reg_move(r_X, r_zero, ctx);
/* Do not leak kernel data to userspace */
- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
emit_jit_reg_move(r_A, r_zero, ctx);
}
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 0eca6efc0631..a7af5fb7b914 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#ifdef __SUBARCH_HAS_LWSYNC
# define SMPWMB LWSYNC
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 04782164ee67..2d66a8446198 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -78,18 +78,9 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
PPC_LI(r_X, 0);
}
- switch (filter[0].code) {
- case BPF_RET | BPF_K:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- /* first instruction sets A register (or is RET 'constant') */
- break;
- default:
- /* make sure we dont leak kernel information to user */
+ /* make sure we dont leak kernel information to user */
+ if (bpf_needs_clear_a(&filter[0]))
PPC_LI(r_A, 0);
- }
}
static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d68e11e0df5e..7ffd0b19135c 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
#define smp_mb__before_atomic() smp_mb()
#define smp_mb__after_atomic() smp_mb()
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 22564f5f2364..3e6e05a7c4c2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -420,22 +420,9 @@ void bpf_jit_compile(struct bpf_prog *fp)
}
emit_reg_move(O7, r_saved_O7);
- switch (filter[0].code) {
- case BPF_RET | BPF_K:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- /* The first instruction sets the A register (or is
- * a "RET 'constant'")
- */
- break;
- default:
- /* Make sure we dont leak kernel information to the
- * user.
- */
+ /* Make sure we dont leak kernel information to the user. */
+ if (bpf_needs_clear_a(&filter[0]))
emit_clear(r_A); /* A = 0 */
- }
for (i = 0; i < flen; i++) {
unsigned int K = filter[i].k;
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 106c21bd7f44..8ec7a4599c08 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -176,8 +176,6 @@ config NR_CPUS
smaller kernel memory footprint results from using a smaller
value on chips with fewer tiles.
-if TILEGX
-
choice
prompt "Kernel page size"
default PAGE_SIZE_64KB
@@ -188,8 +186,11 @@ choice
connections, etc., it may be better to select 16KB, which uses
memory more efficiently at some cost in TLB performance.
- Note that this option is TILE-Gx specific; currently
- TILEPro page size is set by rebuilding the hypervisor.
+ Note that for TILEPro, you must also rebuild the hypervisor
+ with a matching page size.
+
+config PAGE_SIZE_4KB
+ bool "4KB" if TILEPRO
config PAGE_SIZE_16KB
bool "16KB"
@@ -199,8 +200,6 @@ config PAGE_SIZE_64KB
endchoice
-endif
-
source "kernel/Kconfig.hz"
config KEXEC
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
index 0ccda3c425be..25d5899497be 100644
--- a/arch/tile/include/asm/cmpxchg.h
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -127,8 +127,6 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n);
#endif
-#define tas(ptr) xchg((ptr), 1)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index a213a8d84a95..8eca6a0e1762 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,15 +20,17 @@
#include <arch/chip.h>
/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#if defined(CONFIG_PAGE_SIZE_16KB)
+#if defined(CONFIG_PAGE_SIZE_4KB) /* tilepro only */
+#define PAGE_SHIFT 12
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_4K
+#elif defined(CONFIG_PAGE_SIZE_16KB)
#define PAGE_SHIFT 14
#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K
#elif defined(CONFIG_PAGE_SIZE_64KB)
#define PAGE_SHIFT 16
#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K
#else
-#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
-#define CTX_PAGE_FLAG 0
+#error Page size not specified in Kconfig
#endif
#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db3622f22b61..258965d56beb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -349,6 +349,17 @@ config X86_FEATURE_NAMES
If in doubt, say Y.
+config X86_FAST_FEATURE_TESTS
+ bool "Fast CPU feature tests" if EMBEDDED
+ default y
+ ---help---
+ Some fast-paths in the kernel depend on the capabilities of the CPU.
+ Say Y here for the kernel to patch in the appropriate code at runtime
+ based on the capabilities of the CPU. The infrastructure for patching
+ code at runtime takes up some additional space; space-constrained
+ embedded systems may wish to say N here to produce smaller, slightly
+ slower code.
+
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
@@ -687,6 +698,14 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
+config QUEUED_LOCK_STAT
+ bool "Paravirt queued spinlock statistics"
+ depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
+ ---help---
+ Enable the collection of statistical data on the slowpath
+ behavior of paravirtualized queued spinlocks and report
+ them on debugfs.
+
source "arch/x86/xen/Kconfig"
config KVM_GUEST
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 137dfa96aa14..110253ce83af 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -69,7 +69,7 @@ config X86_PTDUMP_CORE
def_bool n
config X86_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
+ tristate "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
select X86_PTDUMP_CORE
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 722bacea040e..8baaff5af0b5 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -125,7 +125,7 @@ static struct crypto_alg alg = {
static int __init chacha20_simd_mod_init(void)
{
- if (!cpu_has_ssse3)
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 81a595d75cf5..0e9871693f24 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -257,7 +257,7 @@ static int __init crc32c_intel_mod_init(void)
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
- if (cpu_has_pclmulqdq) {
+ if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3c71dd947c7b..e32206e09868 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,3 +1,5 @@
+#include <linux/jump_label.h>
+
/*
x86 function call convention, 64-bit:
@@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with
#endif /* CONFIG_X86_64 */
+/*
+ * This does 'call enter_from_user_mode' unless we can avoid it based on
+ * kernel config or using the static jump infrastructure.
+ */
+.macro CALL_enter_from_user_mode
+#ifdef CONFIG_CONTEXT_TRACKING
+#ifdef HAVE_JUMP_LABEL
+ STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+#endif
+ call enter_from_user_mode
+.Lafter_call_\@:
+#endif
+.endm
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a89fdbc1f0be..03663740c866 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -421,7 +421,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
regs->ip = landing_pad;
/*
- * Fetch ECX from where the vDSO stashed it.
+ * Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
@@ -432,10 +432,10 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
* Micro-optimization: the pointer we're following is explicitly
* 32 bits, so it can't be out of range.
*/
- __get_user(*(u32 *)&regs->cx,
+ __get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
- get_user(*(u32 *)&regs->cx,
+ get_user(*(u32 *)&regs->bp,
(u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 3eb572ed3d7a..77d8c5112900 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -292,7 +292,7 @@ ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
- pushl %ecx /* pt_regs->cx */
+ pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
@@ -308,8 +308,9 @@ sysenter_past_esp:
movl %esp, %eax
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSEXIT */
TRACE_IRQS_ON /* User mode traces as IRQs on. */
@@ -328,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
- ENABLE_INTERRUPTS_SYSEXIT
+ sti
+ sysexit
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
@@ -551,11 +553,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)
-
-ENTRY(native_irq_enable_sysexit)
- sti
- sysexit
-END(native_irq_enable_sysexit)
#endif
ENTRY(overflow)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a55697d19824..9d34d3cfceb6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -520,9 +520,7 @@ END(irq_entries_start)
*/
TRACE_IRQS_OFF
-#ifdef CONFIG_CONTEXT_TRACKING
- call enter_from_user_mode
-#endif
+ CALL_enter_from_user_mode
1:
/*
@@ -1066,9 +1064,7 @@ ENTRY(error_entry)
* (which can take locks).
*/
TRACE_IRQS_OFF
-#ifdef CONFIG_CONTEXT_TRACKING
- call enter_from_user_mode
-#endif
+ CALL_enter_from_user_mode
ret
.Lerror_entry_done:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index c3201830a85e..ff1c6d61f332 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -18,13 +18,6 @@
.section .entry.text, "ax"
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret32)
- swapgs
- sysretl
-ENDPROC(native_usergs_sysret32)
-#endif
-
/*
* 32-bit SYSENTER instruction entry.
*
@@ -63,7 +56,7 @@ ENTRY(entry_SYSENTER_compat)
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
- pushq %rcx /* pt_regs->sp */
+ pushq %rbp /* pt_regs->sp (stashed in bp) */
/*
* Push flags. This is nasty. First, interrupts are currently
@@ -82,14 +75,14 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx (will be overwritten) */
+ pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 = 0 */
pushq %r8 /* pt_regs->r9 = 0 */
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
@@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
- * NB.: sysenter_fix_flags is a label with the code under it moved
+ * NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
- jnz sysenter_fix_flags
-sysenter_flags_fixed:
+ jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -121,14 +114,15 @@ sysenter_flags_fixed:
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call
-sysenter_fix_flags:
+.Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
popfq
- jmp sysenter_flags_fixed
+ jmp .Lsysenter_flags_fixed
ENDPROC(entry_SYSENTER_compat)
/*
@@ -178,7 +172,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx (will be overwritten) */
+ pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
xorq %r8,%r8
pushq %r8 /* pt_regs->r8 = 0 */
@@ -186,7 +180,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r8 /* pt_regs->r10 = 0 */
pushq %r8 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
+ pushq %rbp /* pt_regs->rbp (will be overwritten) */
pushq %r8 /* pt_regs->r12 = 0 */
pushq %r8 /* pt_regs->r13 = 0 */
pushq %r8 /* pt_regs->r14 = 0 */
@@ -200,8 +194,9 @@ ENTRY(entry_SYSCALL_compat)
movq %rsp, %rdi
call do_fast_syscall_32
- testl %eax, %eax
- jz .Lsyscall_32_done
+ /* XEN PV guests always use IRET path */
+ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
+ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
/* Opportunistic SYSRET */
sysret32_from_system_call:
@@ -236,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
- USERGS_SYSRET32
+ swapgs
+ sysretl
END(entry_SYSCALL_compat)
/*
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index ca94fa649251..8602f06c759f 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -17,8 +17,10 @@
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
+#include <asm/pvclock.h>
#include <linux/math64.h>
#include <linux/time.h>
+#include <linux/kernel.h>
#define gtod (&VVAR(vsyscall_gtod_data))
@@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
}
#endif
-#ifndef BUILD_VDSO32
+#ifdef CONFIG_PARAVIRT_CLOCK
+extern u8 pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
-#include <linux/kernel.h>
-#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
-#include <asm/pvclock.h>
+#ifndef BUILD_VDSO32
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
@@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
-#ifdef CONFIG_PARAVIRT_CLOCK
-
-static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
-{
- const struct pvclock_vsyscall_time_info *pvti_base;
- int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
- int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
-
- BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
-
- pvti_base = (struct pvclock_vsyscall_time_info *)
- __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
-
- return &pvti_base[offset];
-}
-
-static notrace cycle_t vread_pvclock(int *mode)
-{
- const struct pvclock_vsyscall_time_info *pvti;
- cycle_t ret;
- u64 last;
- u32 version;
- u8 flags;
- unsigned cpu, cpu1;
-
-
- /*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
- *
- */
- do {
- cpu = __getcpu() & VGETCPU_CPU_MASK;
- /* TODO: We can put vcpu id into higher bits of pvti.version.
- * This will save a couple of cycles by getting rid of
- * __getcpu() calls (Gleb).
- */
-
- pvti = get_pvti(cpu);
-
- version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
-
- /*
- * Test we're still on the cpu as well as the version.
- * We could have been migrated just after the first
- * vgetcpu but before fetching the version, so we
- * wouldn't notice a version change.
- */
- cpu1 = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1 ||
- (pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
-
- if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
- *mode = VCLOCK_NONE;
-
- /* refer to tsc.c read_tsc() comment for rationale */
- last = gtod->cycle_last;
-
- if (likely(ret >= last))
- return ret;
-
- return last;
-}
-#endif
#else
@@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}
+#endif
+
#ifdef CONFIG_PARAVIRT_CLOCK
+static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
+{
+ return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
+}
static notrace cycle_t vread_pvclock(int *mode)
{
- *mode = VCLOCK_NONE;
- return 0;
-}
-#endif
+ const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
+ cycle_t ret;
+ u64 tsc, pvti_tsc;
+ u64 last, delta, pvti_system_time;
+ u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
+ *mode = VCLOCK_NONE;
+ return 0;
+ }
+
+ do {
+ version = pvti->version;
+
+ smp_rmb();
+
+ tsc = rdtsc_ordered();
+ pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
+ pvti_tsc_shift = pvti->tsc_shift;
+ pvti_system_time = pvti->system_time;
+ pvti_tsc = pvti->tsc_timestamp;
+
+ /* Make sure that the version double-check is last. */
+ smp_rmb();
+ } while (unlikely((version & 1) || version != pvti->version));
+
+ delta = tsc - pvti_tsc;
+ ret = pvti_system_time +
+ pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
+ pvti_tsc_shift);
+
+ /* refer to vread_tsc() comment for rationale */
+ last = gtod->cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ return last;
+}
#endif
notrace static cycle_t vread_tsc(void)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index de2c921025f5..4158acc17df0 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@ SECTIONS
* segment.
*/
- vvar_start = . - 2 * PAGE_SIZE;
+ vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR
hpet_page = vvar_start + PAGE_SIZE;
+ pvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 785d9922b106..491020b2826d 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -73,6 +73,7 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@@ -80,6 +81,7 @@ enum {
const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
+ sym_pvclock_page,
};
struct vdso_sym {
@@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
+ [sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 93bd8452383f..3a1d9297074b 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -1,5 +1,5 @@
/*
- * Code for the vDSO. This version uses the old int $0x80 method.
+ * AT_SYSINFO entry point
*/
#include <asm/dwarf2.h>
@@ -21,35 +21,67 @@ __kernel_vsyscall:
/*
* Reshuffle regs so that all of any of the entry instructions
* will preserve enough state.
+ *
+ * A really nice entry sequence would be:
+ * pushl %edx
+ * pushl %ecx
+ * movl %esp, %ecx
+ *
+ * Unfortunately, naughty Android versions between July and December
+ * 2015 actually hardcode the traditional Linux SYSENTER entry
+ * sequence. That is severely broken for a number of reasons (ask
+ * anyone with an AMD CPU, for example). Nonetheless, we try to keep
+ * it working approximately as well as it ever worked.
+ *
+ * This link may eludicate some of the history:
+ * https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
+ * personally, I find it hard to understand what's going on there.
+ *
+ * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
+ * Execute an indirect call to the address in the AT_SYSINFO auxv
+ * entry. That is the ONLY correct way to make a fast 32-bit system
+ * call on Linux. (Open-coding int $0x80 is also fine, but it's
+ * slow.)
*/
+ pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
pushl %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx, 0
- pushl %ecx
+ pushl %ebp
CFI_ADJUST_CFA_OFFSET 4
- CFI_REL_OFFSET ecx, 0
- movl %esp, %ecx
+ CFI_REL_OFFSET ebp, 0
+
+ #define SYSENTER_SEQUENCE "movl %esp, %ebp; sysenter"
+ #define SYSCALL_SEQUENCE "movl %ecx, %ebp; syscall"
#ifdef CONFIG_X86_64
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
- ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
- "syscall", X86_FEATURE_SYSCALL32
+ ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
+ SYSCALL_SEQUENCE, X86_FEATURE_SYSCALL32
#else
- ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+ ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
#endif
/* Enter using int $0x80 */
- movl (%esp), %ecx
int $0x80
GLOBAL(int80_landing_pad)
- /* Restore ECX and EDX in case they were clobbered. */
- popl %ecx
- CFI_RESTORE ecx
+ /*
+ * Restore EDX and ECX in case they were clobbered. EBP is not
+ * clobbered (the kernel restores it), but it's cleaner and
+ * probably faster to pop it than to adjust ESP using addl.
+ */
+ popl %ebp
+ CFI_RESTORE ebp
CFI_ADJUST_CFA_OFFSET -4
popl %edx
CFI_RESTORE edx
CFI_ADJUST_CFA_OFFSET -4
+ popl %ecx
+ CFI_RESTORE ecx
+ CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 64df47148160..b8f69e264ac4 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
@@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]",
.pages = no_pages,
};
+ struct pvclock_vsyscall_time_info *pvti;
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
@@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
#endif
+ pvti = pvclock_pvti_cpu0_va();
+ if (pvti && image->sym_pvclock_page) {
+ ret = remap_pfn_range(vma,
+ text_start + image->sym_pvclock_page,
+ __pa(pvti) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
+ }
+
up_fail:
if (ret)
current->mm->context.vdso = NULL;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a30316bf801a..c80f6b6f3da2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -23,6 +23,11 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP 0 /* Default */
+#define APIC_EXTNMI_ALL 1
+#define APIC_EXTNMI_NONE 2
+
/*
* Define the default level of output to be very little
* This can be turned up by using apic=verbose for more
@@ -303,6 +308,7 @@ struct apic {
unsigned int *apicid);
/* ipi */
+ void (*send_IPI)(int cpu, int vector);
void (*send_IPI_mask)(const struct cpumask *mask, int vector);
void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
int vector);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index ae5fb83e6d91..3e8674288198 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/processor.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
#include <asm/rmwcc.h>
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a11c30b77fb5..a984111135b1 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/processor.h>
//#include <asm/cmpxchg.h>
/* An 64bit atomic type */
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
index 0d467b338835..a8303ebe089f 100644
--- a/arch/x86/include/asm/calgary.h
+++ b/arch/x86/include/asm/calgary.h
@@ -31,7 +31,7 @@
#include <asm/types.h>
struct iommu_table {
- struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
+ const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
unsigned long it_base; /* mapped address of tce table */
unsigned long it_hint; /* Hint for next alloc */
unsigned long *it_map; /* A simple allocation bitmap for now */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index f7e142926481..e4959d023af8 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -109,6 +109,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
#endif
-#define system_has_cmpxchg_double() cpu_has_cx8
+#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 1af94697aae5..caa23a34c963 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -18,6 +18,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
cmpxchg_local((ptr), (o), (n)); \
})
-#define system_has_cmpxchg_double() cpu_has_cx16
+#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bf2caa1dedc5..678637ad7476 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -36,4 +36,7 @@ extern int _debug_hotplug_cpu(int cpu, int action);
int mwait_usable(const struct cpuinfo_x86 *);
+unsigned int x86_family(unsigned int sig);
+unsigned int x86_model(unsigned int sig);
+unsigned int x86_stepping(unsigned int sig);
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e4f8010f22e0..7ad8c9464297 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
#include <asm/disabled-features.h>
#endif
-#define NCAPINTS 14 /* N 32-bit words worth of info */
+#define NCAPINTS 16 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -181,22 +181,17 @@
/*
* Auxiliary flags: Linux defined - For features scattered in various
- * CPUID levels like 0x6, 0xA etc, word 7
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
+
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
+
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */
-#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
-#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
-#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
-#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
+
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
/* Virtualization flags: Linux defined, word 8 */
@@ -205,17 +200,9 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
-#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */
-#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */
-#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
-#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
@@ -258,6 +245,30 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+
/*
* BUG word(s)
*/
@@ -278,6 +289,26 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+enum cpuid_leafs
+{
+ CPUID_1_EDX = 0,
+ CPUID_8000_0001_EDX,
+ CPUID_8086_0001_EDX,
+ CPUID_LNX_1,
+ CPUID_1_ECX,
+ CPUID_C000_0001_EDX,
+ CPUID_8000_0001_ECX,
+ CPUID_LNX_2,
+ CPUID_LNX_3,
+ CPUID_7_0_EBX,
+ CPUID_D_1_EAX,
+ CPUID_F_0_EDX,
+ CPUID_F_1_EDX,
+ CPUID_8000_0008_EBX,
+ CPUID_6_EAX,
+ CPUID_8000_000A_EDX,
+};
+
#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
@@ -355,60 +386,31 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
} while (0)
#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_sep boot_cpu_has(X86_FEATURE_SEP)
-#define cpu_has_mtrr boot_cpu_has(X86_FEATURE_MTRR)
-#define cpu_has_mmx boot_cpu_has(X86_FEATURE_MMX)
#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
-#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3)
#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
-#define cpu_has_xstore boot_cpu_has(X86_FEATURE_XSTORE)
-#define cpu_has_xstore_enabled boot_cpu_has(X86_FEATURE_XSTORE_EN)
-#define cpu_has_xcrypt boot_cpu_has(X86_FEATURE_XCRYPT)
-#define cpu_has_xcrypt_enabled boot_cpu_has(X86_FEATURE_XCRYPT_EN)
-#define cpu_has_ace2 boot_cpu_has(X86_FEATURE_ACE2)
-#define cpu_has_ace2_enabled boot_cpu_has(X86_FEATURE_ACE2_EN)
-#define cpu_has_phe boot_cpu_has(X86_FEATURE_PHE)
-#define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN)
-#define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM)
-#define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN)
-#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS)
-#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1)
-#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
-#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
-#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
-#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
-#define cpu_has_perfctr_l2 boot_cpu_has(X86_FEATURE_PERFCTR_L2)
-#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
-#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
-#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
-#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
-#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
-
-#if __GNUC__ >= 4
+/*
+ * Do not add any more of those clumsy macros - use static_cpu_has_safe() for
+ * fast paths and boot_cpu_has() otherwise!
+ */
+
+#if __GNUC__ >= 4 && defined(CONFIG_X86_FAST_FEATURE_TESTS)
extern void warn_pre_alternatives(void);
extern bool __static_cpu_has_safe(u16 bit);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index f80d70009ff8..6d7d0e52ed5a 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -19,7 +19,6 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
-#include <asm/pvclock.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -72,10 +71,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_VSYSCALL_EMULATION
VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
#endif
-#ifdef CONFIG_PARAVIRT_CLOCK
- PVCLOCK_FIXMAP_BEGIN,
- PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
-#endif
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 3c3550c3a4a3..eadcdd5bb946 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -224,18 +224,67 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-/* xstate instruction fault handler: */
-#define xstate_fault(__err) \
- \
- ".section .fixup,\"ax\"\n" \
- \
- "3: movl $-2,%[_err]\n" \
- " jmp 2b\n" \
- \
- ".previous\n" \
- \
- _ASM_EXTABLE(1b, 3b) \
- : [_err] "=r" (__err)
+#define XSTATE_OP(op, st, lmask, hmask, err) \
+ asm volatile("1:" op "\n\t" \
+ "xor %[err], %[err]\n" \
+ "2:\n\t" \
+ ".pushsection .fixup,\"ax\"\n\t" \
+ "3: movl $-2,%[err]\n\t" \
+ "jmp 2b\n\t" \
+ ".popsection\n\t" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
+ * format and supervisor states in addition to modified optimization in
+ * XSAVEOPT.
+ *
+ * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
+ * supports modified optimization which is not supported by XSAVE.
+ *
+ * We use XSAVE as a fallback.
+ *
+ * The 661 label is defined in the ALTERNATIVE* macros as the address of the
+ * original instruction which gets replaced. We need to use it here as the
+ * address of the instruction where we might get an exception at.
+ */
+#define XSTATE_XSAVE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE_2(XSAVE, \
+ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVES, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ "3:\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "4: movl $-2, %[err]\n" \
+ "jmp 3b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 4b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
+ * XSAVE area format.
+ */
+#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE(XRSTOR, \
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ "3:\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "4: movl $-2, %[err]\n" \
+ "jmp 3b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 4b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
/*
* This function is called only during boot time when x86 caps are not set
@@ -246,22 +295,14 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(system_state != SYSTEM_BOOTING);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
@@ -276,22 +317,14 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(system_state != SYSTEM_BOOTING);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault(err)
- : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory");
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
/* We should never fault when copying from a kernel buffer: */
WARN_ON_FPU(err);
@@ -305,33 +338,11 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
u64 mask = -1;
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
WARN_ON(!alternatives_patched);
- /*
- * If xsaves is enabled, xsaves replaces xsaveopt because
- * it supports compact format and supervisor states in addition to
- * modified optimization in xsaveopt.
- *
- * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
- * because xsaveopt supports modified optimization which is not
- * supported by xsave.
- *
- * If none of xsaves and xsaveopt is enabled, use xsave.
- */
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault(err)
- : "0" (err)
- : "memory");
+ XSTATE_XSAVE(xstate, lmask, hmask, err);
/* We should never fault when copying to a kernel buffer: */
WARN_ON_FPU(err);
@@ -344,23 +355,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
- /*
- * Use xrstors to restore context if it is enabled. xrstors supports
- * compacted format of xsave area which is not supported by xrstor.
- */
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault(err)
- : "0" (err)
- : "memory");
+ XSTATE_XRESTORE(xstate, lmask, hmask, err);
/* We should never fault when copying from a kernel buffer: */
WARN_ON_FPU(err);
@@ -388,12 +385,10 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
if (unlikely(err))
return -EFAULT;
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XSAVE"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault(err)
- : "D" (buf), "a" (-1), "d" (-1), "0" (err)
- : "memory");
+ stac();
+ XSTATE_OP(XSAVE, buf, -1, -1, err);
+ clac();
+
return err;
}
@@ -405,14 +400,12 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
struct xregs_state *xstate = ((__force struct xregs_state *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XRSTOR"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault(err)
- : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
- : "memory"); /* memory required? */
+ int err;
+
+ stac();
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+ clac();
+
return err;
}
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
new file mode 100644
index 000000000000..e1a411786bf5
--- /dev/null
+++ b/arch/x86/include/asm/intel_pt.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_X86_INTEL_PT_H
+#define _ASM_X86_INTEL_PT_H
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+void cpu_emergency_stop_pt(void);
+#else
+static inline void cpu_emergency_stop_pt(void) {}
+#endif
+
+#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 615fa9061b57..cfc9a0d2d07c 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -119,6 +119,8 @@ static inline void
native_apic_mem_write(APIC_ICR, cfg);
}
+extern void default_send_IPI_single(int cpu, int vector);
+extern void default_send_IPI_single_phys(int cpu, int vector);
extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 5daeca3d0f9e..adc54c12cbd1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,12 +1,18 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#ifndef __ASSEMBLY__
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <asm/nops.h>
-#include <asm/asm.h>
+#ifndef HAVE_JUMP_LABEL
+/*
+ * For better or for worse, if jump labels (the gcc extension) are missing,
+ * then the entire static branch patching infrastructure is compiled out.
+ * If that happens, the code in here will malfunction. Raise a compiler
+ * error instead.
+ *
+ * In theory, jump labels and the static branch patching infrastructure
+ * could be decoupled to fix this.
+ */
+#error asm/jump_label.h included on a non-jump-label kernel
+#endif
#define JUMP_LABEL_NOP_SIZE 5
@@ -16,6 +22,14 @@
# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
#endif
+#include <asm/asm.h>
+#include <asm/nops.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
@@ -59,5 +73,40 @@ struct jump_entry {
jump_label_t key;
};
-#endif /* __ASSEMBLY__ */
+#else /* __ASSEMBLY__ */
+
+.macro STATIC_JUMP_IF_TRUE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .else
+ .byte STATIC_KEY_INIT_NOP
+ .endif
+ .pushsection __jump_table, "aw"
+ _ASM_ALIGN
+ _ASM_PTR .Lstatic_jump_\@, \target, \key
+ .popsection
+.endm
+
+.macro STATIC_JUMP_IF_FALSE target, key, def
+.Lstatic_jump_\@:
+ .if \def
+ .byte STATIC_KEY_INIT_NOP
+ .else
+ /* Equivalent to "jmp.d32 \target" */
+ .byte 0xe9
+ .long \target - .Lstatic_jump_after_\@
+.Lstatic_jump_after_\@:
+ .endif
+ .pushsection __jump_table, "aw"
+ _ASM_ALIGN
+ _ASM_PTR .Lstatic_jump_\@, \target, \key + 1
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 34e62b1dcfce..1e1b07a5a738 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_MICROCODE_H
#define _ASM_X86_MICROCODE_H
+#include <asm/cpu.h>
#include <linux/earlycpio.h>
#define native_rdmsr(msr, val1, val2) \
@@ -95,14 +96,14 @@ static inline void __exit exit_amd_microcode(void) {}
/*
* In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
+ * x86_cpuid_vendor() gets vendor id for BSP.
*
* In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
+ * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
*
- * x86_vendor() gets vendor information directly from CPUID.
+ * x86_cpuid_vendor() gets vendor information directly from CPUID.
*/
-static inline int x86_vendor(void)
+static inline int x86_cpuid_vendor(void)
{
u32 eax = 0x00000000;
u32 ebx, ecx = 0, edx;
@@ -118,40 +119,14 @@ static inline int x86_vendor(void)
return X86_VENDOR_UNKNOWN;
}
-static inline unsigned int __x86_family(unsigned int sig)
-{
- unsigned int x86;
-
- x86 = (sig >> 8) & 0xf;
-
- if (x86 == 0xf)
- x86 += (sig >> 20) & 0xff;
-
- return x86;
-}
-
-static inline unsigned int x86_family(void)
+static inline unsigned int x86_cpuid_family(void)
{
u32 eax = 0x00000001;
u32 ebx, ecx = 0, edx;
native_cpuid(&eax, &ebx, &ecx, &edx);
- return __x86_family(eax);
-}
-
-static inline unsigned int x86_model(unsigned int sig)
-{
- unsigned int x86, model;
-
- x86 = __x86_family(sig);
-
- model = (sig >> 4) & 0xf;
-
- if (x86 == 0x6 || x86 == 0xf)
- model += ((sig >> 16) & 0xf) << 4;
-
- return model;
+ return x86_family(eax);
}
#ifdef CONFIG_MICROCODE
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 93724cc62177..eb4b09b41df5 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -1,7 +1,13 @@
#ifndef _ASM_X86_MSI_H
#define _ASM_X86_MSI_H
#include <asm/hw_irq.h>
+#include <asm/irqdomain.h>
typedef struct irq_alloc_info msi_alloc_info_t;
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg);
+
+void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
+
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 690b4027e17c..b05402ef3b84 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -321,6 +321,7 @@
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_IC_CFG 0xc0011021
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/msr-trace.h b/arch/x86/include/asm/msr-trace.h
new file mode 100644
index 000000000000..7567225747d8
--- /dev/null
+++ b/arch/x86/include/asm/msr-trace.h
@@ -0,0 +1,57 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msr
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE msr-trace
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/
+
+#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MSR_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracing for x86 model specific registers. Directly maps to the
+ * RDMSR/WRMSR instructions.
+ */
+
+DECLARE_EVENT_CLASS(msr_trace_class,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed),
+ TP_STRUCT__entry(
+ __field( unsigned, msr )
+ __field( u64, val )
+ __field( int, failed )
+ ),
+ TP_fast_assign(
+ __entry->msr = msr;
+ __entry->val = val;
+ __entry->failed = failed;
+ ),
+ TP_printk("%x, value %llx%s",
+ __entry->msr,
+ __entry->val,
+ __entry->failed ? " #GP" : "")
+);
+
+DEFINE_EVENT(msr_trace_class, read_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, write_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, rdpmc,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+#endif /* _TRACE_MSR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 77d8b284e4a7..93fb7c1cffda 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -32,6 +32,16 @@ struct msr_regs_info {
int err;
};
+struct saved_msr {
+ bool valid;
+ struct msr_info info;
+};
+
+struct saved_msrs {
+ unsigned int num;
+ struct saved_msr *array;
+};
+
static inline unsigned long long native_read_tscp(unsigned int *aux)
{
unsigned long low, high;
@@ -57,11 +67,34 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Be very careful with includes. This header is prone to include loops.
+ */
+#include <asm/atomic.h>
+#include <linux/tracepoint-defs.h>
+
+extern struct tracepoint __tracepoint_read_msr;
+extern struct tracepoint __tracepoint_write_msr;
+extern struct tracepoint __tracepoint_rdpmc;
+#define msr_tracepoint_active(t) static_key_false(&(t).key)
+extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
+extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
+extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
+#else
+#define msr_tracepoint_active(t) false
+static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
+#endif
+
static inline unsigned long long native_read_msr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);
asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@@ -78,6 +111,8 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
}
@@ -85,6 +120,8 @@ static inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
/* Can be uninlined because referenced by paravirt */
@@ -102,6 +139,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
+ if (msr_tracepoint_active(__tracepoint_read_msr))
+ do_trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}
@@ -160,6 +199,8 @@ static inline unsigned long long native_read_pmc(int counter)
DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
+ if (msr_tracepoint_active(__tracepoint_rdpmc))
+ do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
@@ -190,7 +231,7 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
static inline void wrmsrl(unsigned msr, u64 val)
{
- native_write_msr(msr, (u32)val, (u32)(val >> 32));
+ native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
}
/* wrmsr with exception handling */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index cc071c6f7d4d..7bd0099384ca 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -5,9 +5,9 @@
#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT 12
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 10d0596433f8..f6192502149e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,6 +19,12 @@ static inline int paravirt_enabled(void)
return pv_info.paravirt_enabled;
}
+static inline int paravirt_has_feature(unsigned int feature)
+{
+ WARN_ON_ONCE(!pv_info.paravirt_enabled);
+ return (pv_info.features & feature);
+}
+
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
@@ -285,15 +291,6 @@ static inline void slow_down_io(void)
#endif
}
-#ifdef CONFIG_SMP
-static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
- unsigned long start_esp)
-{
- PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
- phys_apicid, start_eip, start_esp);
-}
-#endif
-
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
@@ -375,23 +372,6 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr,
{
PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
}
-static inline void pmd_update(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp);
-}
-
-static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
-}
-
-static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp);
-}
static inline pte_t __pte(pteval_t val)
{
@@ -922,23 +902,11 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#define USERGS_SYSRET32 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
-
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
-
-#define ENABLE_INTERRUPTS_SYSEXIT \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
-
-
#else /* !CONFIG_X86_32 */
/*
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 31247b5bff7c..77db5616a473 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -70,9 +70,14 @@ struct pv_info {
#endif
int paravirt_enabled;
+ unsigned int features; /* valid only if paravirt_enabled is set */
const char *name;
};
+#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
+/* Supported features */
+#define PV_SUPPORTED_RTC (1<<0)
+
struct pv_init_ops {
/*
* Patch may replace one of the defined code sequences with
@@ -157,15 +162,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
-#ifdef CONFIG_X86_32
- /*
- * Atomically enable interrupts and return to userspace. This
- * is only used in 32-bit kernels. 64-bit kernels use
- * usergs_sysret32 instead.
- */
- void (*irq_enable_sysexit)(void);
-#endif
-
/*
* Switch to usermode gs and return to 64-bit usermode using
* sysret. Only used in 64-bit kernels to return to 64-bit
@@ -174,14 +170,6 @@ struct pv_cpu_ops {
*/
void (*usergs_sysret64)(void);
- /*
- * Switch to usermode gs and return to 32-bit usermode using
- * sysret. Used to return to 32-on-64 compat processes.
- * Other usermode register state, including %esp, must already
- * be restored.
- */
- void (*usergs_sysret32)(void);
-
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
@@ -215,14 +203,6 @@ struct pv_irq_ops {
#endif
};
-struct pv_apic_ops {
-#ifdef CONFIG_X86_LOCAL_APIC
- void (*startup_ipi_hook)(int phys_apicid,
- unsigned long start_eip,
- unsigned long start_esp);
-#endif
-};
-
struct pv_mmu_ops {
unsigned long (*read_cr2)(void);
void (*write_cr2)(unsigned long);
@@ -274,12 +254,6 @@ struct pv_mmu_ops {
pmd_t *pmdp, pmd_t pmdval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
- void (*pte_update_defer)(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
- void (*pmd_update)(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp);
- void (*pmd_update_defer)(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp);
pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
@@ -354,7 +328,6 @@ struct paravirt_patch_template {
struct pv_time_ops pv_time_ops;
struct pv_cpu_ops pv_cpu_ops;
struct pv_irq_ops pv_irq_ops;
- struct pv_apic_ops pv_apic_ops;
struct pv_mmu_ops pv_mmu_ops;
struct pv_lock_ops pv_lock_ops;
};
@@ -364,7 +337,6 @@ extern struct pv_init_ops pv_init_ops;
extern struct pv_time_ops pv_time_ops;
extern struct pv_cpu_ops pv_cpu_ops;
extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_apic_ops pv_apic_ops;
extern struct pv_mmu_ops pv_mmu_ops;
extern struct pv_lock_ops pv_lock_ops;
@@ -402,10 +374,8 @@ extern struct pv_lock_ops pv_lock_ops;
__visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \
asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
-unsigned paravirt_patch_nop(void);
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ignore(unsigned len);
unsigned paravirt_patch_call(void *insnbuf,
const void *target, u16 tgt_clobbers,
unsigned long addr, u16 site_clobbers,
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6ec0c8b2e9df..d3eee663c41f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -69,9 +69,6 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define pmd_clear(pmd) native_pmd_clear(pmd)
#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
-#define pmd_update(mm, addr, ptep) do { } while (0)
-#define pmd_update_defer(mm, addr, ptep) do { } while (0)
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
@@ -731,14 +728,9 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
* updates should either be sets, clears, or set_pte_atomic for P->P
* transitions, which means this hook should only be called for user PTEs.
* This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush. The notification can optionally be delayed
- * until the TLB flush event by using the pte_update_defer form of the
- * interface, but care must be taken to assure that the flush happens while
- * still holding the same page table lock so that the shadow and primary pages
- * do not become out of sync on SMP.
+ * requires a subsequent TLB flush.
*/
#define pte_update(mm, addr, ptep) do { } while (0)
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
#endif
/*
@@ -830,9 +822,7 @@ static inline int pmd_write(pmd_t pmd)
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{
- pmd_t pmd = native_pmdp_get_and_clear(pmdp);
- pmd_update(mm, addr, pmdp);
- return pmd;
+ return native_pmdp_get_and_clear(pmdp);
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -840,7 +830,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
- pmd_update(mm, addr, pmdp);
}
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 67522256c7ff..2d5a50cb61a2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -472,6 +472,7 @@ static inline unsigned long current_top_of_stack(void)
#else
#define __cpuid native_cpuid
#define paravirt_enabled() 0
+#define paravirt_has(x) 0
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7a6bed5c08bc..fdcc04020636 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -4,6 +4,15 @@
#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
+#ifdef CONFIG_KVM_GUEST
+extern struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void);
+#else
+static inline struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return NULL;
+}
+#endif
+
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
@@ -91,10 +100,5 @@ struct pvclock_vsyscall_time_info {
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
-#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size);
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index b002e711ba88..9f92c180ed2f 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -1,6 +1,65 @@
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
#define __ASM_QSPINLOCK_PARAVIRT_H
+/*
+ * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
+ * registers. For i386, however, only 1 32-bit register needs to be saved
+ * and restored. So an optimized version of __pv_queued_spin_unlock() is
+ * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit.
+ */
+#ifdef CONFIG_64BIT
+
+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+#define __pv_queued_spin_unlock __pv_queued_spin_unlock
+#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock"
+#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath"
+
+/*
+ * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
+ * which combines the registers saving trunk and the body of the following
+ * C code:
+ *
+ * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * {
+ * struct __qspinlock *l = (void *)lock;
+ * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ *
+ * if (likely(lockval == _Q_LOCKED_VAL))
+ * return;
+ * pv_queued_spin_unlock_slowpath(lock, lockval);
+ * }
+ *
+ * For x86-64,
+ * rdi = lock (first argument)
+ * rsi = lockval (second argument)
+ * rdx = internal variable (set to 0)
+ */
+asm (".pushsection .text;"
+ ".globl " PV_UNLOCK ";"
+ ".align 4,0x90;"
+ PV_UNLOCK ": "
+ "push %rdx;"
+ "mov $0x1,%eax;"
+ "xor %edx,%edx;"
+ "lock cmpxchg %dl,(%rdi);"
+ "cmp $0x1,%al;"
+ "jne .slowpath;"
+ "pop %rdx;"
+ "ret;"
+ ".slowpath: "
+ "push %rsi;"
+ "movzbl %al,%esi;"
+ "call " PV_UNLOCK_SLOWPATH ";"
+ "pop %rsi;"
+ "pop %rdx;"
+ "ret;"
+ ".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
+ ".popsection");
+
+#else /* CONFIG_64BIT */
+
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+#endif /* CONFIG_64BIT */
#endif
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index a82c4f1b4d83..2cb1cc253d51 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,5 +25,6 @@ void __noreturn machine_real_restart(unsigned int type);
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+void run_crash_ipi_callback(struct pt_regs *regs);
#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 222a6a3ca2b5..dfcf0727623b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,15 +21,6 @@
extern int smp_num_siblings;
extern unsigned int num_processors;
-static inline bool cpu_has_ht_siblings(void)
-{
- bool has_siblings = false;
-#ifdef CONFIG_SMP
- has_siblings = cpu_has_ht && smp_num_siblings > 1;
-#endif
- return has_siblings;
-}
-
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
/* cpus sharing the last level cache: */
@@ -74,9 +65,6 @@ struct smp_ops {
extern void set_cpu_sibling_map(int cpu);
#ifdef CONFIG_SMP
-#ifndef CONFIG_PARAVIRT
-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
-#endif
extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index d1793f06854d..8e9dbe7b73a1 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -15,6 +15,7 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
struct desc_ptr gdt_desc;
struct desc_ptr idt;
u16 ldt;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7ebf0ebe4e68..6136a18152af 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -24,6 +24,7 @@ struct saved_context {
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
+ struct saved_msrs saved_msrs;
unsigned long efer;
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 09b1b0ab94b7..660458af425d 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -745,5 +745,14 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
#undef __copy_from_user_overflow
#undef __copy_to_user_overflow
+/*
+ * We rely on the nested NMI work to allow atomic faults from the NMI path; the
+ * nested NMI paths are careful to preserve CR2.
+ *
+ * Caller must use pagefault_enable/disable, or run in interrupt context,
+ * and also do a uaccess_ok() check
+ */
+#define __copy_from_user_nmi __copy_from_user_inatomic
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 756de9190aec..deabaf9759b6 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_hpet_page;
+ long sym_pvclock_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index cd0fc0cc78bc..1ae89a2721d6 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -82,13 +82,11 @@ struct x86_init_paging {
* struct x86_init_timers - platform specific timer setup
* @setup_perpcu_clockev: set up the per cpu clock event device for the
* boot cpu
- * @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
* @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
- void (*tsc_pre_init)(void);
void (*timer_init)(void);
void (*wallclock_init)(void);
};
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 5a08bc8bff33..c54beb44c4c1 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -553,7 +553,7 @@ do { \
if (cpu_has_xmm) { \
xor_speed(&xor_block_pIII_sse); \
xor_speed(&xor_block_sse_pf64); \
- } else if (cpu_has_mmx) { \
+ } else if (boot_cpu_has(X86_FEATURE_MMX)) { \
xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \
} else { \
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 03429da2fa80..2184943341bf 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -16,7 +16,7 @@ struct mce {
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */
__u8 severity;
- __u8 usable_addr;
+ __u8 pad;
__u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */
__u8 bank; /* machine check bank */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2f69e3b184f6..8a5cddac7d44 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -82,6 +82,12 @@ physid_mask_t phys_cpu_present_map;
static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
/*
+ * This variable controls which CPUs receive external NMIs. By default,
+ * external NMIs are delivered only to the BSP.
+ */
+static int apic_extnmi = APIC_EXTNMI_BSP;
+
+/*
* Map cpu index to physical APIC ID
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
@@ -1161,6 +1167,8 @@ void __init init_bsp_APIC(void)
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
+ if (apic_extnmi == APIC_EXTNMI_NONE)
+ value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
}
@@ -1378,9 +1386,11 @@ void setup_local_APIC(void)
apic_write(APIC_LVT0, value);
/*
- * only the BP should see the LINT1 NMI signal, obviously.
+ * Only the BSP sees the LINT1 NMI signal by default. This can be
+ * modified by apic_extnmi= boot option.
*/
- if (!cpu)
+ if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
+ apic_extnmi == APIC_EXTNMI_ALL)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -2270,6 +2280,7 @@ static struct {
unsigned int apic_tmict;
unsigned int apic_tdcr;
unsigned int apic_thmr;
+ unsigned int apic_cmci;
} apic_pm_state;
static int lapic_suspend(void)
@@ -2299,6 +2310,10 @@ static int lapic_suspend(void)
if (maxlvt >= 5)
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
+#endif
local_irq_save(flags);
disable_local_APIC();
@@ -2355,10 +2370,14 @@ static void lapic_resume(void)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
if (maxlvt >= 5)
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
#endif
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 6)
+ apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
+#endif
if (maxlvt >= 4)
apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
@@ -2548,3 +2567,23 @@ static int __init apic_set_disabled_cpu_apicid(char *arg)
return 0;
}
early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
+
+static int __init apic_set_extnmi(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (!strncmp("all", arg, 3))
+ apic_extnmi = APIC_EXTNMI_ALL;
+ else if (!strncmp("none", arg, 4))
+ apic_extnmi = APIC_EXTNMI_NONE;
+ else if (!strncmp("bsp", arg, 3))
+ apic_extnmi = APIC_EXTNMI_BSP;
+ else {
+ pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+early_param("apic_extnmi", apic_set_extnmi);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f92ab36979a2..9968f30cca3e 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,6 +185,7 @@ static struct apic apic_flat = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = default_send_IPI_single,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_allbutself = flat_send_IPI_allbutself,
@@ -230,17 +231,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
-{
- default_send_IPI_mask_sequence_phys(cpumask, vector);
-}
-
-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
- int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpumask, vector);
-}
-
static void physflat_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -248,7 +238,7 @@ static void physflat_send_IPI_allbutself(int vector)
static void physflat_send_IPI_all(int vector)
{
- physflat_send_IPI_mask(cpu_online_mask, vector);
+ default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
}
static int physflat_probe(void)
@@ -292,8 +282,9 @@ static struct apic apic_physflat = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
- .send_IPI_mask = physflat_send_IPI_mask,
- .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+ .send_IPI = default_send_IPI_single_phys,
+ .send_IPI_mask = default_send_IPI_mask_sequence_phys,
+ .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_phys,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 0d96749cfcac..331a7a07c48f 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -30,6 +30,7 @@
#include <asm/e820.h>
static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI(int cpu, int vector) { }
static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
static void noop_send_IPI_allbutself(int vector) { }
@@ -144,6 +145,7 @@ struct apic apic_noop = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = noop_send_IPI,
.send_IPI_mask = noop_send_IPI_mask,
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
.send_IPI_allbutself = noop_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 38dd5efdd04c..c80c02c6ec49 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -193,20 +193,17 @@ static int __init numachip_system_init(void)
case 1:
init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
numachip_apic_icr_write = numachip1_apic_icr_write;
- x86_init.pci.arch_init = pci_numachip_init;
break;
case 2:
init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
numachip_apic_icr_write = numachip2_apic_icr_write;
-
- /* Use MCFG config cycles rather than locked CF8 cycles */
- raw_pci_ops = &pci_mmcfg;
break;
default:
return 0;
}
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+ x86_init.pci.arch_init = pci_numachip_init;
return 0;
}
@@ -276,6 +273,7 @@ static const struct apic apic_numachip1 __refconst = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
.send_IPI_allbutself = numachip_send_IPI_allbutself,
@@ -327,6 +325,7 @@ static const struct apic apic_numachip2 __refconst = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = numachip_send_IPI_one,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
.send_IPI_allbutself = numachip_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 971cf8875939..cf9bd896c12d 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -96,11 +96,6 @@ static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_phys(mask, vector);
-}
-
static void bigsmp_send_IPI_allbutself(int vector)
{
default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
@@ -108,7 +103,7 @@ static void bigsmp_send_IPI_allbutself(int vector)
static void bigsmp_send_IPI_all(int vector)
{
- bigsmp_send_IPI_mask(cpu_online_mask, vector);
+ default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
}
static int dmi_bigsmp; /* can be set by dmi scanners */
@@ -180,7 +175,8 @@ static struct apic apic_bigsmp = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
- .send_IPI_mask = bigsmp_send_IPI_mask,
+ .send_IPI = default_send_IPI_single_phys,
+ .send_IPI_mask = default_send_IPI_mask_sequence_phys,
.send_IPI_mask_allbutself = NULL,
.send_IPI_allbutself = bigsmp_send_IPI_allbutself,
.send_IPI_all = bigsmp_send_IPI_all,
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 62071569bd50..eb45fc9b6124 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -18,6 +18,16 @@
#include <asm/proto.h>
#include <asm/ipi.h>
+void default_send_IPI_single_phys(int cpu, int vector)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+}
+
void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
{
unsigned long query_cpu;
@@ -55,6 +65,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+/*
+ * Helper function for APICs which insist on cpumasks
+ */
+void default_send_IPI_single(int cpu, int vector)
+{
+ apic->send_IPI_mask(cpumask_of(cpu), vector);
+}
+
#ifdef CONFIG_X86_32
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 5f1feb6854af..ade25320df96 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -96,8 +96,8 @@ static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
return arg->msi_hwirq;
}
-static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
- int nvec, msi_alloc_info_t *arg)
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct msi_desc *desc = first_pci_msi_entry(pdev);
@@ -113,11 +113,13 @@ static int pci_msi_prepare(struct irq_domain *domain, struct device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(pci_msi_prepare);
-static void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
+void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
{
arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
}
+EXPORT_SYMBOL_GPL(pci_msi_set_desc);
static struct msi_domain_ops pci_msi_domain_ops = {
.get_hwirq = pci_msi_get_hwirq,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7694ae6c1199..f316e34abb42 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,6 +105,7 @@ static struct apic apic_default = {
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .send_IPI = default_send_IPI_single,
.send_IPI_mask = default_send_IPI_mask_logical,
.send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
.send_IPI_allbutself = default_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 861bc59c8f25..908cb37da171 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -29,6 +29,7 @@ struct apic_chip_data {
};
struct irq_domain *x86_vector_domain;
+EXPORT_SYMBOL_GPL(x86_vector_domain);
static DEFINE_RAW_SPINLOCK(vector_lock);
static cpumask_var_t vector_cpumask;
static struct irq_chip lapic_controller;
@@ -66,6 +67,7 @@ struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
return data ? &data->cfg : NULL;
}
+EXPORT_SYMBOL_GPL(irqd_cfg);
struct irq_cfg *irq_cfg(unsigned int irq)
{
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cc8311c4d298..aca8b75c1552 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -23,6 +23,14 @@ static inline u32 x2apic_cluster(int cpu)
return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
}
+static void x2apic_send_IPI(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ x2apic_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
+}
+
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
@@ -266,6 +274,7 @@ static struct apic apic_x2apic_cluster = {
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 662e9150ea6f..a1242e2c12e6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -36,6 +36,14 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
+static void x2apic_send_IPI(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ x2apic_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
+}
+
static void
__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
{
@@ -122,6 +130,7 @@ static struct apic apic_x2apic_phys = {
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .send_IPI = x2apic_send_IPI,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 4a139465f1d4..d760c6bb37b5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -406,6 +406,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+ .send_IPI = uv_send_IPI_one,
.send_IPI_mask = uv_send_IPI_mask,
.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_allbutself = uv_send_IPI_allbutself,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 439df975bc7a..84a7524b202c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -65,9 +65,6 @@ void common(void) {
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
-#ifdef CONFIG_X86_32
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
-#endif
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d8f42f902a0f..f2edafb5f24e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,7 +23,6 @@ int main(void)
{
#ifdef CONFIG_PARAVIRT
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a8816b325162..e678ddeed030 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -304,7 +304,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
/* get information required for multi-node processors */
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -678,9 +678,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
* Disable it on the affected CPUs.
*/
if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
- if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
+ if (!rdmsrl_safe(MSR_F15H_IC_CFG, &value) && !(value & 0x1E)) {
value |= 0x1E;
- wrmsrl_safe(0xc0011021, value);
+ wrmsrl_safe(MSR_F15H_IC_CFG, value);
}
}
}
@@ -922,7 +922,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
void set_dr_addr_mask(unsigned long mask, int dr)
{
- if (!cpu_has_bpext)
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
return;
switch (dr) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index d8fba5c15fbd..ae20be6e483c 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -43,7 +43,7 @@ static void init_c3(struct cpuinfo_x86 *c)
/* store Centaur Extended Feature Flags as
* word 5 of the CPU capability bit array
*/
- c->x86_capability[5] = cpuid_edx(0xC0000001);
+ c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001);
}
#ifdef CONFIG_X86_32
/* Cyrix III family needs CX8 & PGE explicitly enabled. */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c2b7522cbf35..37830de8f60a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -581,14 +581,9 @@ void cpu_detect(struct cpuinfo_x86 *c)
u32 junk, tfms, cap0, misc;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
- c->x86 = (tfms >> 8) & 0xf;
- c->x86_model = (tfms >> 4) & 0xf;
- c->x86_mask = tfms & 0xf;
-
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xf) << 4;
+ c->x86 = x86_family(tfms);
+ c->x86_model = x86_model(tfms);
+ c->x86_mask = x86_stepping(tfms);
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -599,50 +594,47 @@ void cpu_detect(struct cpuinfo_x86 *c)
void get_cpu_cap(struct cpuinfo_x86 *c)
{
- u32 tfms, xlvl;
- u32 ebx;
+ u32 eax, ebx, ecx, edx;
/* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
+ c->x86_capability[CPUID_1_ECX] = ecx;
+ c->x86_capability[CPUID_1_EDX] = edx;
}
/* Additional Intel-defined flags: level 0x00000007 */
if (c->cpuid_level >= 0x00000007) {
- u32 eax, ebx, ecx, edx;
-
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[9] = ebx;
+ c->x86_capability[CPUID_7_0_EBX] = ebx;
+
+ c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
}
/* Extended state features: level 0x0000000d */
if (c->cpuid_level >= 0x0000000d) {
- u32 eax, ebx, ecx, edx;
-
cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[10] = eax;
+ c->x86_capability[CPUID_D_1_EAX] = eax;
}
/* Additional Intel-defined flags: level 0x0000000F */
if (c->cpuid_level >= 0x0000000F) {
- u32 eax, ebx, ecx, edx;
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
- c->x86_capability[11] = edx;
+ c->x86_capability[CPUID_F_0_EDX] = edx;
+
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
/* will be overridden if occupancy monitoring exists */
c->x86_cache_max_rmid = ebx;
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
- c->x86_capability[12] = edx;
+ c->x86_capability[CPUID_F_1_EDX] = edx;
+
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
@@ -654,22 +646,24 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
}
/* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- c->extended_cpuid_level = xlvl;
+ eax = cpuid_eax(0x80000000);
+ c->extended_cpuid_level = eax;
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
+ if ((eax & 0xffff0000) == 0x80000000) {
+ if (eax >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
}
if (c->extended_cpuid_level >= 0x80000008) {
- u32 eax = cpuid_eax(0x80000008);
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
- c->x86_capability[13] = cpuid_ebx(0x80000008);
+ c->x86_capability[CPUID_8000_0008_EBX] = ebx;
}
#ifdef CONFIG_X86_32
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
@@ -679,6 +673,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ if (c->extended_cpuid_level >= 0x8000000a)
+ c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
+
init_scattered_cpuid_features(c);
}
@@ -1185,7 +1182,7 @@ void syscall_init(void)
* They both write to the same internal register. STAR allows to
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
*/
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
@@ -1443,7 +1440,9 @@ void cpu_init(void)
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
- if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de)
+ if (cpu_feature_enabled(X86_FEATURE_VME) ||
+ cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
load_current_idt();
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 209ac1e7d1f0..565648bc1a0a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -445,7 +445,8 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has_xmm2)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
- if (cpu_has_ds) {
+
+ if (boot_cpu_has(X86_FEATURE_DS)) {
unsigned int l1;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index e38d338a6447..0b6c52388cf4 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -591,7 +591,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (cpu_has_topoext)
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT))
cpuid_count(0x8000001d, index, &eax.full,
&ebx.full, &ecx.full, &edx);
else
@@ -637,7 +637,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
num_cache_leaves = find_num_cache_leaves(c);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
@@ -809,7 +809,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
struct cacheinfo *this_leaf;
int i, sibling;
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
unsigned int apicid, nshared, first, last;
this_leaf = this_cpu_ci->info_list + index;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c5b0d562dbf5..a006f4cd792b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -114,7 +114,6 @@ static struct work_struct mce_work;
static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
-static int mce_usable_address(struct mce *m);
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&mce_irq_work);
}
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses up to page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+ if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
+ return 0;
+
+ /* Checks after this one are Intel-specific: */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return 1;
+
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
+ return 0;
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
+ return 0;
+ return 1;
+}
+
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (!mce)
return NOTIFY_DONE;
- if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
+ if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0);
}
@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m)
struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor == X86_VENDOR_AMD) {
- /*
- * coming soon
- */
- return false;
+ /* ErrCodeExt[20:16] */
+ u8 xec = (m->status >> 16) & 0x1f;
+
+ return (xec == 0x0 || xec == 0x8);
} else if (c->x86_vendor == X86_VENDOR_INTEL) {
/*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
- bool error_logged = false;
+ bool error_seen = false;
struct mce m;
int severity;
int i;
@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
+ error_seen = true;
+
mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
- /*
- * In the cases where we don't have a valid address after all,
- * do not add it into the ring buffer.
- */
- if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
- if (m.status & MCI_STATUS_ADDRV) {
+ if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
+ if (m.status & MCI_STATUS_ADDRV)
m.severity = severity;
- m.usable_addr = mce_usable_address(&m);
-
- if (!mce_gen_pool_add(&m))
- mce_schedule_work();
- }
- }
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
- error_logged = true;
+ if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
+ else if (mce_usable_address(&m)) {
+ /*
+ * Although we skipped logging this, we still want
+ * to take action. Add to the pool so the registered
+ * notifiers will see it.
+ */
+ if (!mce_gen_pool_add(&m))
+ mce_schedule_work();
}
/*
@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
sync_core();
- return error_logged;
+ return error_seen;
}
EXPORT_SYMBOL_GPL(machine_check_poll);
@@ -931,23 +951,6 @@ reset:
return ret;
}
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granuality for now.
- */
-static int mce_usable_address(struct mce *m)
-{
- if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
- return 0;
- if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
- return 0;
- if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
- return 0;
- return 1;
-}
-
static void mce_clear_state(unsigned long *toclear)
{
int i;
@@ -999,6 +1002,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int flags = MF_ACTION_REQUIRED;
int lmce = 0;
+ /* If this CPU is offline, just bail out. */
+ if (cpu_is_offline(smp_processor_id())) {
+ u64 mcgstatus;
+
+ mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (mcgstatus & MCG_STATUS_RIPV) {
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ return;
+ }
+ }
+
ist_enter(regs);
this_cpu_inc(mce_exception_count);
@@ -1089,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/* assuming valid severity level != 0 */
m.severity = severity;
- m.usable_addr = mce_usable_address(&m);
mce_log(&m);
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b3e94ef461fd..faec7120c508 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -129,8 +129,8 @@ void __init load_ucode_bsp(void)
if (!have_cpuid_p())
return;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
@@ -165,8 +165,8 @@ void load_ucode_ap(void)
if (!have_cpuid_p())
return;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
@@ -206,8 +206,8 @@ void reload_early_microcode(void)
{
int vendor, family;
- vendor = x86_vendor();
- family = x86_family();
+ vendor = x86_cpuid_vendor();
+ family = x86_cpuid_family();
switch (vendor) {
case X86_VENDOR_INTEL:
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index ce47402eb2f9..ee81c544ee0d 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -145,10 +145,10 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
int ext_sigcount, i;
struct extended_signature *ext_sig;
- fam = __x86_family(sig);
+ fam = x86_family(sig);
model = x86_model(sig);
- fam_ucode = __x86_family(mc_header->sig);
+ fam_ucode = x86_family(mc_header->sig);
model_ucode = x86_model(mc_header->sig);
if (fam == fam_ucode && model == model_ucode)
@@ -163,7 +163,7 @@ matching_model_microcode(struct microcode_header_intel *mc_header,
ext_sigcount = ext_header->count;
for (i = 0; i < ext_sigcount; i++) {
- fam_ucode = __x86_family(ext_sig->sig);
+ fam_ucode = x86_family(ext_sig->sig);
model_ucode = x86_model(ext_sig->sig);
if (fam == fam_ucode && model == model_ucode)
@@ -365,7 +365,7 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
native_cpuid(&eax, &ebx, &ecx, &edx);
csig.sig = eax;
- family = __x86_family(csig.sig);
+ family = x86_family(csig.sig);
model = x86_model(csig.sig);
if ((model >= 5) || (family > 6)) {
@@ -521,16 +521,12 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
{
#ifdef CONFIG_X86_64
unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
- unsigned int family, model, stepping;
char name[30];
native_cpuid(&eax, &ebx, &ecx, &edx);
- family = __x86_family(eax);
- model = x86_model(eax);
- stepping = eax & 0xf;
-
- sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
+ sprintf(name, "intel-ucode/%02x-%02x-%02x",
+ x86_family(eax), x86_model(eax), x86_stepping(eax));
return get_builtin_firmware(cp, name);
#else
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 70d7c93f4550..0d98503c2245 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -593,9 +593,16 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
unsigned long x_remove_base,
unsigned long x_remove_size, int i)
{
- static struct range range_new[RANGE_NUM];
+ /*
+ * range_new should really be an automatic variable, but
+ * putting 4096 bytes on the stack is frowned upon, to put it
+ * mildly. It is safe to make it a static __initdata variable,
+ * since mtrr_calc_range_state is only called during init and
+ * there's no way it will call itself recursively.
+ */
+ static struct range range_new[RANGE_NUM] __initdata;
unsigned long range_sums_new;
- static int nr_range_new;
+ int nr_range_new;
int num_reg;
/* Convert ranges to var ranges state: */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 3b533cf37c74..c870af161008 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -349,7 +349,7 @@ static void get_fixed_ranges(mtrr_type *frs)
void mtrr_save_fixed_ranges(void *info)
{
- if (cpu_has_mtrr)
+ if (boot_cpu_has(X86_FEATURE_MTRR))
get_fixed_ranges(mtrr_state.fixed_ranges);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index f891b4750f04..5c3d149ee91c 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -682,7 +682,7 @@ void __init mtrr_bp_init(void)
phys_addr = 32;
- if (cpu_has_mtrr) {
+ if (boot_cpu_has(X86_FEATURE_MTRR)) {
mtrr_if = &generic_mtrr_ops;
size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bf79d7c97df..1b443db2db50 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -482,6 +482,9 @@ int x86_pmu_hw_config(struct perf_event *event)
/* Support for IP fixup */
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
+
+ if (x86_pmu.pebs_prec_dist)
+ precise++;
}
if (event->attr.precise_ip > precise)
@@ -1531,6 +1534,7 @@ static void __init filter_events(struct attribute **attrs)
{
struct device_attribute *d;
struct perf_pmu_events_attr *pmu_attr;
+ int offset = 0;
int i, j;
for (i = 0; attrs[i]; i++) {
@@ -1539,7 +1543,7 @@ static void __init filter_events(struct attribute **attrs)
/* str trumps id */
if (pmu_attr->event_str)
continue;
- if (x86_pmu.event_map(i))
+ if (x86_pmu.event_map(i + offset))
continue;
for (j = i; attrs[j]; j++)
@@ -1547,6 +1551,14 @@ static void __init filter_events(struct attribute **attrs)
/* Check the shifted attr. */
i--;
+
+ /*
+ * event_map() is index based, the attrs array is organized
+ * by increasing event index. If we shift the events, then
+ * we need to compensate for the event_map(), otherwise
+ * we are looking up the wrong event in the map
+ */
+ offset++;
}
}
@@ -2250,12 +2262,19 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
ss_base = get_segment_base(regs->ss);
fp = compat_ptr(ss_base + regs->bp);
+ pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;
- bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (!access_ok(VERIFY_READ, fp, 8))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
if (bytes != 0)
break;
@@ -2265,6 +2284,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
perf_callchain_store(entry, cs_base + frame.return_address);
fp = compat_ptr(ss_base + frame.next_frame);
}
+ pagefault_enable();
return 1;
}
#else
@@ -2302,12 +2322,19 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
if (perf_callchain_user32(regs, entry))
return;
+ pagefault_disable();
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
- bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
+ if (!access_ok(VERIFY_READ, fp, 16))
+ break;
+
+ bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+ if (bytes != 0)
+ break;
+ bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
if (bytes != 0)
break;
@@ -2315,8 +2342,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
break;
perf_callchain_store(entry, frame.return_address);
- fp = frame.next_frame;
+ fp = (void __user *)frame.next_frame;
}
+ pagefault_enable();
}
/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d0e35ebb2adb..7bb61e32fb29 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -14,17 +14,7 @@
#include <linux/perf_event.h>
-#if 0
-#undef wrmsrl
-#define wrmsrl(msr, val) \
-do { \
- unsigned int _msr = (msr); \
- u64 _val = (val); \
- trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
- (unsigned long long)(_val)); \
- native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
-} while (0)
-#endif
+/* To enable MSR tracing please use the generic trace points. */
/*
* | NHM/WSM | SNB |
@@ -318,6 +308,10 @@ struct cpu_hw_events {
#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+/* Constraint on specific umask bit only + event */
+#define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+
/* Like UEVENT_CONSTRAINT, but match flags too */
#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
@@ -589,7 +583,8 @@ struct x86_pmu {
bts_active :1,
pebs :1,
pebs_active :1,
- pebs_broken :1;
+ pebs_broken :1,
+ pebs_prec_dist :1;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
@@ -907,6 +902,8 @@ void intel_pmu_lbr_init_hsw(void);
void intel_pmu_lbr_init_skl(void);
+void intel_pmu_lbr_init_knl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 1cee5d2d7ece..58610539b048 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -18,7 +18,7 @@ static __initconst const u64 amd_hw_cache_event_ids
[ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+ [ C(RESULT_ACCESS) ] = 0,
[ C(RESULT_MISS) ] = 0,
},
[ C(OP_PREFETCH) ] = {
@@ -160,7 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
if (offset)
return offset;
- if (!cpu_has_perfctr_core)
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
offset = index;
else
offset = index << 1;
@@ -652,7 +652,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int __init amd_core_pmu_init(void)
{
- if (!cpu_has_perfctr_core)
+ if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
switch (boot_cpu_data.x86) {
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index cc6cedb8f25d..49742746a6c9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -523,10 +523,10 @@ static int __init amd_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
goto fail_nodev;
- if (!cpu_has_topoext)
+ if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
goto fail_nodev;
- if (cpu_has_perfctr_nb) {
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
ret = -ENOMEM;
@@ -540,7 +540,7 @@ static int __init amd_uncore_init(void)
ret = 0;
}
- if (cpu_has_perfctr_l2) {
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_l2) {
ret = -ENOMEM;
@@ -583,10 +583,11 @@ fail_online:
/* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
amd_uncore_nb = amd_uncore_l2 = NULL;
- if (cpu_has_perfctr_l2)
+
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
perf_pmu_unregister(&amd_l2_pmu);
fail_l2:
- if (cpu_has_perfctr_nb)
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
perf_pmu_unregister(&amd_nb_pmu);
if (amd_uncore_l2)
free_percpu(amd_uncore_l2);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e2a430021e46..a667078a5180 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -185,6 +185,14 @@ struct event_constraint intel_skl_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7,
+ MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7,
+ MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -255,7 +263,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
EVENT_CONSTRAINT_END
};
@@ -1457,6 +1465,42 @@ static __initconst const u64 slm_hw_cache_event_ids
},
};
+#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
+#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
+#define KNL_MCDRAM_LOCAL BIT_ULL(21)
+#define KNL_MCDRAM_FAR BIT_ULL(22)
+#define KNL_DDR_LOCAL BIT_ULL(23)
+#define KNL_DDR_FAR BIT_ULL(24)
+#define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
+ KNL_DDR_LOCAL | KNL_DDR_FAR)
+#define KNL_L2_READ SLM_DMND_READ
+#define KNL_L2_WRITE SLM_DMND_WRITE
+#define KNL_L2_PREFETCH SLM_DMND_PREFETCH
+#define KNL_L2_ACCESS SLM_LLC_ACCESS
+#define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
+ KNL_DRAM_ANY | SNB_SNP_ANY | \
+ SNB_NON_DRAM)
+
+static __initconst const u64 knl_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
+ [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS,
+ },
+ },
+};
+
/*
* Use from PMIs where the LBRs are already disabled.
*/
@@ -2475,6 +2519,44 @@ static void intel_pebs_aliases_snb(struct perf_event *event)
}
}
+static void intel_pebs_aliases_precdist(struct perf_event *event)
+{
+ if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use INST_RETIRED.PREC_DIST
+ * (0x01c0), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * The PREC_DIST event has special support to minimize sample
+ * shadowing effects. One drawback is that it can be
+ * only programmed on counter 1, but that seems like an
+ * acceptable trade off.
+ */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+}
+
+static void intel_pebs_aliases_ivb(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_snb(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
+static void intel_pebs_aliases_skl(struct perf_event *event)
+{
+ if (event->attr.precise_ip < 3)
+ return intel_pebs_aliases_core2(event);
+ return intel_pebs_aliases_precdist(event);
+}
+
static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
{
unsigned long flags = x86_pmu.free_running_flags;
@@ -3332,6 +3414,7 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_gen_event_constraints;
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
pr_cont("Atom events, ");
break;
@@ -3431,7 +3514,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
if (boot_cpu_data.x86_model == 62)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
@@ -3464,7 +3548,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
@@ -3499,7 +3584,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
@@ -3511,6 +3597,24 @@ __init int intel_pmu_init(void)
pr_cont("Broadwell events, ");
break;
+ case 87: /* Knights Landing Xeon Phi */
+ memcpy(hw_cache_event_ids,
+ slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs,
+ knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ intel_pmu_lbr_init_knl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_knl_extra_regs;
+
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ pr_cont("Knights Landing events, ");
+ break;
+
case 78: /* 14nm Skylake Mobile */
case 94: /* 14nm Skylake Desktop */
x86_pmu.late_ack = true;
@@ -3521,7 +3625,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
- x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
+ x86_pmu.pebs_prec_dist = true;
/* all extra regs are per-cpu when HT is on */
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5db1c7755548..10602f0a438f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -620,6 +620,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
/* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
EVENT_CONSTRAINT_END
};
@@ -686,6 +688,8 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -700,6 +704,8 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x01cd, 0xf), /* MEM_TRANS_RETIRED.* */
/* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@ -718,9 +724,10 @@ struct event_constraint intel_hsw_pebs_event_constraints[] = {
struct event_constraint intel_skl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */
- INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
- /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
- INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+ /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+ /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
+ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
INTEL_PLD_CONSTRAINT(0x1cd, 0xf), /* MEM_TRANS_RETIRED.* */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
@@ -1101,6 +1108,13 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
void *at;
u64 pebs_status;
+ /*
+ * fmt0 does not have a status bitfield (does not use
+ * perf_record_nhm format)
+ */
+ if (x86_pmu.intel_cap.pebs_format < 1)
+ return base;
+
if (base == NULL)
return NULL;
@@ -1186,7 +1200,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
if (!event->attr.precise_ip)
return;
- n = (top - at) / x86_pmu.pebs_record_size;
+ n = top - at;
if (n <= 0)
return;
@@ -1230,12 +1244,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
pebs_status = p->status & cpuc->pebs_enabled;
pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+ /*
+ * On some CPUs the PEBS status can be zero when PEBS is
+ * racing with clearing of GLOBAL_STATUS.
+ *
+ * Normally we would drop that record, but in the
+ * case when there is only a single active PEBS event
+ * we can assume it's for that event.
+ */
+ if (!pebs_status && cpuc->pebs_enabled &&
+ !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
+ pebs_status = cpuc->pebs_enabled;
+
bit = find_first_bit((unsigned long *)&pebs_status,
x86_pmu.max_pebs_events);
- if (WARN(bit >= x86_pmu.max_pebs_events,
- "PEBS record without PEBS event! status=%Lx pebs_enabled=%Lx active_mask=%Lx",
- (unsigned long long)p->status, (unsigned long long)cpuc->pebs_enabled,
- *(unsigned long long *)cpuc->active_mask))
+ if (bit >= x86_pmu.max_pebs_events)
continue;
/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 659f01e165d5..653f88d25987 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -42,6 +42,13 @@ static enum {
#define LBR_FAR_BIT 8 /* do not capture far branches */
#define LBR_CALL_STACK_BIT 9 /* enable call stack */
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT 63 /* don't read LBR_INFO. */
+
#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
#define LBR_USER (1 << LBR_USER_BIT)
#define LBR_JCC (1 << LBR_JCC_BIT)
@@ -52,6 +59,7 @@ static enum {
#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
#define LBR_FAR (1 << LBR_FAR_BIT)
#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT)
#define LBR_PLM (LBR_KERNEL | LBR_USER)
@@ -152,8 +160,8 @@ static void __intel_pmu_lbr_enable(bool pmi)
* did not change.
*/
if (cpuc->lbr_sel)
- lbr_select = cpuc->lbr_sel->config;
- if (!pmi)
+ lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+ if (!pmi && cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, lbr_select);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
@@ -422,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
+ bool need_info = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -429,8 +438,11 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
int out = 0;
int num = x86_pmu.lbr_nr;
- if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ if (cpuc->lbr_sel) {
+ need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+ if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+ num = tos;
+ }
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
@@ -442,7 +454,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
- if (lbr_format == LBR_FORMAT_INFO) {
+ if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
@@ -590,6 +602,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if (v != LBR_IGN)
mask |= v;
}
+
reg = &event->hw.branch_reg;
reg->idx = EXTRA_REG_LBR;
@@ -600,6 +613,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
*/
reg->config = mask ^ x86_pmu.lbr_sel_mask;
+ if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+ (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+ (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ reg->config |= LBR_NO_INFO;
+
return 0;
}
@@ -1028,3 +1046,17 @@ void __init intel_pmu_lbr_init_atom(void)
*/
pr_cont("8-deep LBR, ");
}
+
+/* Knights Landing */
+void intel_pmu_lbr_init_knl(void)
+{
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ pr_cont("8-deep LBR, ");
+}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 868e1194337f..c0bbd1033b7c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -27,6 +27,7 @@
#include <asm/perf_event.h>
#include <asm/insn.h>
#include <asm/io.h>
+#include <asm/intel_pt.h>
#include "perf_event.h"
#include "intel_pt.h"
@@ -1122,6 +1123,14 @@ static int pt_event_init(struct perf_event *event)
return 0;
}
+void cpu_emergency_stop_pt(void)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+ if (pt->handle.event)
+ pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
+}
+
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index ed446bdcbf31..24a351ad628d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -63,7 +63,7 @@
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
#define NR_RAPL_DOMAINS 0x4
-static const char *rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
@@ -109,11 +109,11 @@ static struct kobj_attribute format_attr_##_var = \
#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
-#define RAPL_EVENT_ATTR_STR(_name, v, str) \
-static struct perf_pmu_events_attr event_attr_##v = { \
- .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \
- .id = 0, \
- .event_str = str, \
+#define RAPL_EVENT_ATTR_STR(_name, v, str) \
+static struct perf_pmu_events_attr event_attr_##v = { \
+ .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
+ .id = 0, \
+ .event_str = str, \
};
struct rapl_pmu {
@@ -405,19 +405,6 @@ static struct attribute_group rapl_pmu_attr_group = {
.attrs = rapl_pmu_attrs,
};
-static ssize_t rapl_sysfs_show(struct device *dev,
- struct device_attribute *attr,
- char *page)
-{
- struct perf_pmu_events_attr *pmu_attr = \
- container_of(attr, struct perf_pmu_events_attr, attr);
-
- if (pmu_attr->event_str)
- return sprintf(page, "%s", pmu_attr->event_str);
-
- return 0;
-}
-
RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 61215a69b03d..f97f8075bf04 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -884,6 +884,15 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
* each box has a different function id.
*/
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ /* Knights Landing uses a common PCI device ID for multiple instances of
+ * an uncore PMU device type. There is only one entry per device type in
+ * the knl_uncore_pci_ids table inspite of multiple devices present for
+ * some device types. Hence PCI device idx would be 0 for all devices.
+ * So increment pmu pointer to point to an unused array element.
+ */
+ if (boot_cpu_data.x86_model == 87)
+ while (pmu->func_id >= 0)
+ pmu++;
if (pmu->func_id < 0)
pmu->func_id = pdev->devfn;
else
@@ -966,6 +975,7 @@ static int __init uncore_pci_init(void)
case 63: /* Haswell-EP */
ret = hswep_uncore_pci_init();
break;
+ case 79: /* BDX-EP */
case 86: /* BDX-DE */
ret = bdx_uncore_pci_init();
break;
@@ -982,6 +992,9 @@ static int __init uncore_pci_init(void)
case 61: /* Broadwell */
ret = bdw_uncore_pci_init();
break;
+ case 87: /* Knights Landing */
+ ret = knl_uncore_pci_init();
+ break;
default:
return 0;
}
@@ -1287,9 +1300,13 @@ static int __init uncore_cpu_init(void)
case 63: /* Haswell-EP */
hswep_uncore_cpu_init();
break;
+ case 79: /* BDX-EP */
case 86: /* BDX-DE */
bdx_uncore_cpu_init();
break;
+ case 87: /* Knights Landing */
+ knl_uncore_cpu_init();
+ break;
default:
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 2f0a4a98e16b..07aa2d6bd710 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -338,6 +338,7 @@ int hsw_uncore_pci_init(void);
int bdw_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
int snbep_uncore_pci_init(void);
@@ -348,6 +349,8 @@ int hswep_uncore_pci_init(void);
void hswep_uncore_cpu_init(void);
int bdx_uncore_pci_init(void);
void bdx_uncore_cpu_init(void);
+int knl_uncore_pci_init(void);
+void knl_uncore_cpu_init(void);
/* perf_event_intel_uncore_nhmex.c */
void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 845256158a10..0b934820fafd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -417,7 +417,7 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
}
}
-static int snb_pci2phy_map_init(int devid)
+int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
struct pci2phy_map *map;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index f0f4fcba252e..33acb884ccf1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -209,31 +209,98 @@
#define HSWEP_PCU_MSR_PMON_BOX_CTL 0x710
#define HSWEP_PCU_MSR_PMON_BOX_FILTER 0x715
+/* KNL Ubox */
+#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+/* KNL CHA */
+#define KNL_CHA_MSR_OFFSET 0xc
+#define KNL_CHA_MSR_PMON_CTL_QOR (1 << 16)
+#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff
+#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32)
+
+/* KNL EDC/MC UCLK */
+#define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400
+#define KNL_UCLK_MSR_PMON_CTL0 0x420
+#define KNL_UCLK_MSR_PMON_BOX_CTL 0x430
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW 0x44c
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL 0x454
+#define KNL_PMON_FIXED_CTL_EN 0x1
+
+/* KNL EDC */
+#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW 0xa00
+#define KNL_EDC0_ECLK_MSR_PMON_CTL0 0xa20
+#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL 0xa30
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW 0xa3c
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL 0xa44
+
+/* KNL MC */
+#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW 0xb00
+#define KNL_MC0_CH0_MSR_PMON_CTL0 0xb20
+#define KNL_MC0_CH0_MSR_PMON_BOX_CTL 0xb30
+#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW 0xb3c
+#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL 0xb44
+
+/* KNL IRP */
+#define KNL_IRP_PCI_PMON_BOX_CTL 0xf0
+#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ KNL_CHA_MSR_PMON_CTL_QOR)
+/* KNL PCU */
+#define KNL_PCU_PMON_CTL_EV_SEL_MASK 0x0000007f
+#define KNL_PCU_PMON_CTL_USE_OCC_CTR (1 << 7)
+#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK 0x3f000000
+#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
+ KNL_PCU_PMON_CTL_USE_OCC_CTR | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_CBO_PMON_CTL_TID_EN | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
+ SNBEP_PMON_CTL_INVERT | \
+ KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
@@ -315,8 +382,9 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe
static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
- pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
+ pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
}
static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
@@ -1728,6 +1796,419 @@ int ivbep_uncore_pci_init(void)
}
/* end of IvyTown uncore support */
+/* KNL uncore support */
+static struct attribute *knl_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = HSWEP_U_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_U_MSR_PMON_CTL0,
+ .event_mask = KNL_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_ubox_format_group,
+};
+
+static struct attribute *knl_uncore_cha_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid4.attr,
+ &format_attr_filter_link3.attr,
+ &format_attr_filter_state4.attr,
+ &format_attr_filter_local.attr,
+ &format_attr_filter_all_op.attr,
+ &format_attr_filter_nnm.attr,
+ &format_attr_filter_opc3.attr,
+ &format_attr_filter_nc.attr,
+ &format_attr_filter_isoc.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_cha_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_cha_formats_attr,
+};
+
+static struct event_constraint knl_uncore_cha_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg knl_uncore_cha_extra_regs[] = {
+ SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+ SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
+ EVENT_EXTRA_END
+};
+
+static u64 knl_cha_filter_mask(int fields)
+{
+ u64 mask = 0;
+
+ if (fields & 0x1)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
+ if (fields & 0x2)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
+ if (fields & 0x4)
+ mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
+ return mask;
+}
+
+static struct event_constraint *
+knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
+}
+
+static int knl_cha_hw_config(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct extra_reg *er;
+ int idx = 0;
+
+ for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ idx |= er->idx;
+ }
+
+ if (idx) {
+ reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+ KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+ reg1->idx = idx;
+ }
+ return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
+
+static struct intel_uncore_ops knl_uncore_cha_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = hswep_cbox_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .hw_config = knl_cha_hw_config,
+ .get_constraint = knl_cha_get_constraint,
+ .put_constraint = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type knl_uncore_cha = {
+ .name = "cha",
+ .num_counters = 4,
+ .num_boxes = 38,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
+ .event_mask = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = KNL_CHA_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = knl_uncore_cha_constraints,
+ .ops = &knl_uncore_cha_ops,
+ .format_group = &knl_uncore_cha_format_group,
+};
+
+static struct attribute *knl_uncore_pcu_formats_attr[] = {
+ &format_attr_event2.attr,
+ &format_attr_use_occ_ctr.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh6.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge_det.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = HSWEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = HSWEP_PCU_MSR_PMON_CTL0,
+ .event_mask = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_PCU_MSR_PMON_BOX_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &knl_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *knl_msr_uncores[] = {
+ &knl_uncore_ubox,
+ &knl_uncore_cha,
+ &knl_uncore_pcu,
+ NULL,
+};
+
+void knl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = knl_msr_uncores;
+}
+
+static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+
+ pci_write_config_dword(pdev, box_ctl, 0);
+}
+
+static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
+ == UNCORE_FIXED_EVENT)
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | KNL_PMON_FIXED_CTL_EN);
+ else
+ pci_write_config_dword(pdev, hwc->config_base,
+ hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops knl_uncore_imc_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = knl_uncore_imc_enable_box,
+ .read_counter = snbep_uncore_pci_read_counter,
+ .enable_event = knl_uncore_imc_enable_event,
+ .disable_event = snbep_uncore_pci_disable_event,
+};
+
+static struct intel_uncore_type knl_uncore_imc_uclk = {
+ .name = "imc_uclk",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_imc_dclk = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 6,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_MC0_CH0_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
+ .fixed_ctl = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
+ .box_ctl = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_uclk = {
+ .name = "edc_uclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_UCLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_UCLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+ .fixed_ctl = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+ .box_ctl = KNL_UCLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_eclk = {
+ .name = "edc_eclk",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
+ .event_ctl = KNL_EDC0_ECLK_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
+ .fixed_ctl = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
+ .box_ctl = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
+ .ops = &knl_uncore_imc_ops,
+ .format_group = &snbep_uncore_format_group,
+};
+
+static struct event_constraint knl_uncore_m2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type knl_uncore_m2pcie = {
+ .name = "m2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .constraints = knl_uncore_m2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct attribute *knl_uncore_irp_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_qor.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group knl_uncore_irp_format_group = {
+ .name = "format",
+ .attrs = knl_uncore_irp_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_irp = {
+ .name = "irp",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = KNL_IRP_PCI_PMON_BOX_CTL,
+ .ops = &snbep_uncore_pci_ops,
+ .format_group = &knl_uncore_irp_format_group,
+};
+
+enum {
+ KNL_PCI_UNCORE_MC_UCLK,
+ KNL_PCI_UNCORE_MC_DCLK,
+ KNL_PCI_UNCORE_EDC_UCLK,
+ KNL_PCI_UNCORE_EDC_ECLK,
+ KNL_PCI_UNCORE_M2PCIE,
+ KNL_PCI_UNCORE_IRP,
+};
+
+static struct intel_uncore_type *knl_pci_uncores[] = {
+ [KNL_PCI_UNCORE_MC_UCLK] = &knl_uncore_imc_uclk,
+ [KNL_PCI_UNCORE_MC_DCLK] = &knl_uncore_imc_dclk,
+ [KNL_PCI_UNCORE_EDC_UCLK] = &knl_uncore_edc_uclk,
+ [KNL_PCI_UNCORE_EDC_ECLK] = &knl_uncore_edc_eclk,
+ [KNL_PCI_UNCORE_M2PCIE] = &knl_uncore_m2pcie,
+ [KNL_PCI_UNCORE_IRP] = &knl_uncore_irp,
+ NULL,
+};
+
+/*
+ * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
+ * device type. prior to KNL, each instance of a PMU device type had a unique
+ * device ID.
+ *
+ * PCI Device ID Uncore PMU Devices
+ * ----------------------------------
+ * 0x7841 MC0 UClk, MC1 UClk
+ * 0x7843 MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
+ * MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
+ * 0x7833 EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
+ * EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
+ * 0x7835 EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
+ * EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
+ * 0x7817 M2PCIe
+ * 0x7814 IRP
+*/
+
+static const struct pci_device_id knl_uncore_pci_ids[] = {
+ { /* MC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+ },
+ { /* MC DClk Channel */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+ },
+ { /* EDC UClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+ },
+ { /* EDC EClk */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+ },
+ { /* M2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
+ },
+ { /* IRP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
+ .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver knl_uncore_pci_driver = {
+ .name = "knl_uncore",
+ .id_table = knl_uncore_pci_ids,
+};
+
+int knl_uncore_pci_init(void)
+{
+ int ret;
+
+ /* All KNL PCI based PMON units are on the same PCI bus except IRP */
+ ret = snb_pci2phy_map_init(0x7814); /* IRP */
+ if (ret)
+ return ret;
+ ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
+ if (ret)
+ return ret;
+ uncore_pci_uncores = knl_pci_uncores;
+ uncore_pci_driver = &knl_uncore_pci_driver;
+ return 0;
+}
+
+/* end of KNL uncore support */
+
/* Haswell-EP uncore support */
static struct attribute *hswep_uncore_ubox_formats_attr[] = {
&format_attr_event.attr,
@@ -2338,7 +2819,7 @@ int hswep_uncore_pci_init(void)
}
/* end of Haswell-EP uncore support */
-/* BDX-DE uncore support */
+/* BDX uncore support */
static struct intel_uncore_type bdx_uncore_ubox = {
.name = "ubox",
@@ -2360,13 +2841,14 @@ static struct event_constraint bdx_uncore_cbox_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
EVENT_CONSTRAINT_END
};
static struct intel_uncore_type bdx_uncore_cbox = {
.name = "cbox",
.num_counters = 4,
- .num_boxes = 8,
+ .num_boxes = 24,
.perf_ctr_bits = 48,
.event_ctl = HSWEP_C0_MSR_PMON_CTL0,
.perf_ctr = HSWEP_C0_MSR_PMON_CTR0,
@@ -2379,9 +2861,24 @@ static struct intel_uncore_type bdx_uncore_cbox = {
.format_group = &hswep_uncore_cbox_format_group,
};
+static struct intel_uncore_type bdx_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
+ &bdx_uncore_sbox,
&hswep_uncore_pcu,
NULL,
};
@@ -2396,7 +2893,7 @@ void bdx_uncore_cpu_init(void)
static struct intel_uncore_type bdx_uncore_ha = {
.name = "ha",
.num_counters = 4,
- .num_boxes = 1,
+ .num_boxes = 2,
.perf_ctr_bits = 48,
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
@@ -2404,7 +2901,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
static struct intel_uncore_type bdx_uncore_imc = {
.name = "imc",
.num_counters = 5,
- .num_boxes = 2,
+ .num_boxes = 8,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
.fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
@@ -2424,6 +2921,19 @@ static struct intel_uncore_type bdx_uncore_irp = {
.format_group = &snbep_uncore_format_group,
};
+static struct intel_uncore_type bdx_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
@@ -2432,6 +2942,8 @@ static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
EVENT_CONSTRAINT_END
};
@@ -2445,18 +2957,65 @@ static struct intel_uncore_type bdx_uncore_r2pcie = {
SNBEP_UNCORE_PCI_COMMON_INIT(),
};
+static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 3,
+ .perf_ctr_bits = 48,
+ .constraints = bdx_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
enum {
BDX_PCI_UNCORE_HA,
BDX_PCI_UNCORE_IMC,
BDX_PCI_UNCORE_IRP,
+ BDX_PCI_UNCORE_QPI,
BDX_PCI_UNCORE_R2PCIE,
+ BDX_PCI_UNCORE_R3QPI,
};
static struct intel_uncore_type *bdx_pci_uncores[] = {
[BDX_PCI_UNCORE_HA] = &bdx_uncore_ha,
[BDX_PCI_UNCORE_IMC] = &bdx_uncore_imc,
[BDX_PCI_UNCORE_IRP] = &bdx_uncore_irp,
+ [BDX_PCI_UNCORE_QPI] = &bdx_uncore_qpi,
[BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
+ [BDX_PCI_UNCORE_R3QPI] = &bdx_uncore_r3qpi,
NULL,
};
@@ -2465,6 +3024,10 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
},
+ { /* Home Agent 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
+ },
{ /* MC0 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
@@ -2473,14 +3036,74 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
},
+ { /* MC0 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
+ },
+ { /* MC0 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
+ },
+ { /* MC1 Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
+ },
+ { /* MC1 Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
+ },
+ { /* MC1 Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
+ },
+ { /* MC1 Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
+ },
{ /* IRP */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
},
+ { /* QPI0 Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
+ },
+ { /* QPI0 Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
+ },
+ { /* QPI1 Port 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
+ },
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
},
+ { /* R3QPI0 Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
+ },
+ { /* R3QPI0 Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* R3QPI1 Link 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
+ .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+ },
+ { /* QPI Port 1 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+ },
+ { /* QPI Port 2 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+ },
{ /* end: all zeroes */ }
};
@@ -2500,4 +3123,4 @@ int bdx_uncore_pci_init(void)
return 0;
}
-/* end of BDX-DE uncore support */
+/* end of BDX uncore support */
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 136ac74dee82..819d94982e07 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -33,28 +33,27 @@ static int __init x86_rdrand_setup(char *s)
__setup("nordrand", x86_rdrand_setup);
/*
- * Force a reseed cycle; we are architecturally guaranteed a reseed
- * after no more than 512 128-bit chunks of random data. This also
- * acts as a test of the CPU capability.
+ * RDRAND has Built-In-Self-Test (BIST) that runs on every invocation.
+ * Run the instruction a few times as a sanity check.
+ * If it fails, it is simple to disable RDRAND here.
*/
-#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+#define SANITY_CHECK_LOOPS 8
void x86_init_rdrand(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_ARCH_RANDOM
unsigned long tmp;
- int i, count, ok;
+ int i;
if (!cpu_has(c, X86_FEATURE_RDRAND))
- return; /* Nothing to do */
+ return;
- for (count = i = 0; i < RESEED_LOOP; i++) {
- ok = rdrand_long(&tmp);
- if (ok)
- count++;
+ for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
+ if (!rdrand_long(&tmp)) {
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+ printk_once(KERN_WARNING "rdrand: disabled\n");
+ return;
+ }
}
-
- if (count != RESEED_LOOP)
- clear_cpu_cap(c, X86_FEATURE_RDRAND);
#endif
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 608fb26c7254..8cb57df9398d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,32 +31,12 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
- { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
- { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
- { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
- { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
- { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 },
- { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 },
- { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 },
- { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 },
- { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
- { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
- { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
- { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
- { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
- { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
- { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
- { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
- { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
- { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
- { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 3fa0e5ad86b4..252da7aceca6 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -12,7 +12,7 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
xlvl = cpuid_eax(0x80860000);
if ((xlvl & 0xffff0000) == 0x80860000) {
if (xlvl >= 0x80860001)
- c->x86_capability[2] = cpuid_edx(0x80860001);
+ c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
}
}
@@ -82,7 +82,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
/* Unhide possibly hidden capability flags */
rdmsr(0x80860004, cap_mask, uk);
wrmsr(0x80860004, ~0, uk);
- c->x86_capability[0] = cpuid_edx(0x00000001);
+ c->x86_capability[CPUID_1_EDX] = cpuid_edx(0x00000001);
wrmsr(0x80860004, cap_mask, uk);
/* All Transmeta CPUs have a constant TSC */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2c1910f6717e..58f34319b29a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,6 +35,7 @@
#include <asm/cpu.h>
#include <asm/reboot.h>
#include <asm/virtext.h>
+#include <asm/intel_pt.h>
/* Alignment required for elf header segment */
#define ELF_CORE_HEADER_ALIGN 4096
@@ -125,6 +126,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
+ /*
+ * Disable Intel PT to stop its logging
+ */
+ cpu_emergency_stop_pt();
+
disable_local_APIC();
}
@@ -169,6 +175,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
cpu_emergency_vmxoff();
cpu_emergency_svm_disable();
+ /*
+ * Disable Intel PT to stop its logging
+ */
+ cpu_emergency_stop_pt();
+
#ifdef CONFIG_X86_IO_APIC
/* Prevent crash_kexec() from deadlocking on ioapic_lock. */
ioapic_zap_locks();
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index be39b5fde4b9..7b2978ab30df 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -12,7 +12,7 @@
*/
static void fpu__init_cpu_ctx_switch(void)
{
- if (!cpu_has_eager_fpu)
+ if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
stts();
else
clts();
@@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void)
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
-/* Enforce that 'MEMBER' is the last field of 'TYPE': */
+/* Get alignment of the TYPE. */
+#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
+
+/*
+ * Enforce that 'MEMBER' is the last field of 'TYPE'.
+ *
+ * Align the computed size with alignment of the TYPE,
+ * because that's how C aligns structs.
+ */
#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
- BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
+ BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
+ TYPE_ALIGN(TYPE)))
/*
* We append the 'struct fpu' to the task_struct:
@@ -188,7 +197,7 @@ static void __init fpu__init_task_struct_size(void)
*/
static void __init fpu__init_system_xstate_size_legacy(void)
{
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -278,7 +287,7 @@ __setup("eagerfpu=", eager_fpu_setup);
*/
static void __init fpu__init_system_ctx_switch(void)
{
- static bool on_boot_cpu = 1;
+ static bool on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -287,7 +296,7 @@ static void __init fpu__init_system_ctx_switch(void)
current_thread_info()->status = 0;
/* Auto enable eagerfpu for xsaveopt */
- if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+ if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
eagerfpu = ENABLE;
if (xfeatures_mask & XFEATURE_MASK_EAGER) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 70fc312221fc..40f100285984 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -297,7 +297,7 @@ static void __init setup_xstate_comp(void)
*/
static void __init setup_init_fpu_buf(void)
{
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -608,7 +608,7 @@ static void fpu__init_disable_system_xstate(void)
void __init fpu__init_system_xstate(void)
{
unsigned int eax, ebx, ecx, edx;
- static int on_boot_cpu = 1;
+ static int on_boot_cpu __initdata = 1;
int err;
WARN_ON_FPU(!on_boot_cpu);
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 50a3fad5b89f..2bcfb5f2bc44 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -300,6 +300,10 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
return -EINVAL;
+
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
+ return -EOPNOTSUPP;
+
/*
* It's impossible to use a range breakpoint to fake out
* user vs kernel detection because bp_len - 1 can't
@@ -307,8 +311,6 @@ static int arch_build_bp_info(struct perf_event *bp)
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
- if (!cpu_has_bpext)
- return -EOPNOTSUPP;
info->mask = bp->attr.bp_len - 1;
info->len = X86_BREAKPOINT_LEN_1;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2bd81e302427..72cef58693c7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -45,6 +45,11 @@ early_param("no-kvmclock", parse_no_kvmclock);
static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
+struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
+{
+ return hv_clock;
+}
+
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
@@ -305,7 +310,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
int cpu;
- int ret;
u8 flags;
struct pvclock_vcpu_time_info *vcpu_time;
unsigned int size;
@@ -325,11 +329,6 @@ int __init kvm_setup_vsyscall_timeinfo(void)
return 1;
}
- if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
- put_cpu();
- return ret;
- }
-
put_cpu();
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 697f90db0e37..8a2cdd736fa4 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -29,6 +29,7 @@
#include <asm/mach_traps.h>
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#include <asm/reboot.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -231,7 +232,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
#endif
if (panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
@@ -255,8 +256,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
reason, smp_processor_id());
show_regs(regs);
- if (panic_on_io_nmi)
- panic("NMI IOCK error: Not continuing");
+ if (panic_on_io_nmi) {
+ nmi_panic(regs, "NMI IOCK error: Not continuing");
+
+ /*
+ * If we end up here, it means we have received an NMI while
+ * processing panic(). Simply return without delaying and
+ * re-enabling NMIs.
+ */
+ return;
+ }
/* Re-enable the IOCK line, wait for a few seconds */
reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
@@ -297,7 +306,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
pr_emerg("Do you have a strange power saving mode enabled?\n");
if (unknown_nmi_panic || panic_on_unrecovered_nmi)
- panic("NMI: Not continuing");
+ nmi_panic(regs, "NMI: Not continuing");
pr_emerg("Dazed and confused, but trying to continue\n");
}
@@ -348,8 +357,19 @@ static void default_do_nmi(struct pt_regs *regs)
return;
}
- /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
- raw_spin_lock(&nmi_reason_lock);
+ /*
+ * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
+ *
+ * Another CPU may be processing panic routines while holding
+ * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
+ * and if so, call its callback directly. If there is no CPU preparing
+ * crash dump, we simply loop here.
+ */
+ while (!raw_spin_trylock(&nmi_reason_lock)) {
+ run_crash_ipi_callback(regs);
+ cpu_relax();
+ }
+
reason = x86_platform.get_nmi_reason();
if (reason & NMI_REASON_MASK) {
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c2130aef3f9d..f08ac28b8136 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -74,16 +74,6 @@ void __init default_banner(void)
/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
-unsigned paravirt_patch_nop(void)
-{
- return 0;
-}
-
-unsigned paravirt_patch_ignore(unsigned len)
-{
- return len;
-}
-
struct branch {
unsigned char opcode;
u32 delta;
@@ -133,7 +123,6 @@ static void *get_call_destination(u8 type)
.pv_time_ops = pv_time_ops,
.pv_cpu_ops = pv_cpu_ops,
.pv_irq_ops = pv_irq_ops,
- .pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
@@ -152,8 +141,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
/* If there's no function, patch it with a ud2a (BUG) */
ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
else if (opfunc == _paravirt_nop)
- /* If the operation is a nop, then nop the callsite */
- ret = paravirt_patch_nop();
+ ret = 0;
/* identity functions just return their single argument */
else if (opfunc == _paravirt_ident_32)
@@ -162,10 +150,6 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
-#ifdef CONFIG_X86_32
- type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
-#endif
- type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
/* If operation requires a jmp, then jmp */
ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
@@ -220,8 +204,6 @@ static u64 native_steal_clock(int cpu)
/* These are in entry.S */
extern void native_iret(void);
-extern void native_irq_enable_sysexit(void);
-extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
static struct resource reserve_ioports = {
@@ -379,13 +361,7 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.load_sp0 = native_load_sp0,
-#if defined(CONFIG_X86_32)
- .irq_enable_sysexit = native_irq_enable_sysexit,
-#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
- .usergs_sysret32 = native_usergs_sysret32,
-#endif
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
@@ -403,12 +379,6 @@ NOKPROBE_SYMBOL(native_get_debugreg);
NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
-struct pv_apic_ops pv_apic_ops = {
-#ifdef CONFIG_X86_LOCAL_APIC
- .startup_ipi_hook = paravirt_nop,
-#endif
-};
-
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
/* 32-bit pagetable entries */
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
@@ -444,9 +414,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_pmd = native_set_pmd,
.set_pmd_at = native_set_pmd_at,
.pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
- .pmd_update = paravirt_nop,
- .pmd_update_defer = paravirt_nop,
.ptep_modify_prot_start = __ptep_modify_prot_start,
.ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -492,6 +459,5 @@ struct pv_mmu_ops pv_mmu_ops = {
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
-EXPORT_SYMBOL_GPL(pv_apic_ops);
EXPORT_SYMBOL_GPL(pv_info);
EXPORT_SYMBOL (pv_irq_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index c89f50a76e97..158dc0650d5d 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,6 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -46,7 +45,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
- PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 8aa05583bc42..e70087a04cc8 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -13,9 +13,7 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, clts, "clts");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
-DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
DEF_NATIVE(, mov32, "mov %edi, %eax");
@@ -55,7 +53,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
- PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
PATCH_SITE(pv_cpu_ops, swapgs);
PATCH_SITE(pv_mmu_ops, read_cr2);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 0497f719977d..833b1d329c47 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -180,13 +180,13 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl);
static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
static void get_tce_space_from_tar(void);
-static struct cal_chipset_ops calgary_chip_ops = {
+static const struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
.tce_cache_blast = calgary_tce_cache_blast,
.dump_error_regs = calgary_dump_error_regs
};
-static struct cal_chipset_ops calioc2_chip_ops = {
+static const struct cal_chipset_ops calioc2_chip_ops = {
.handle_quirks = calioc2_handle_quirks,
.tce_cache_blast = calioc2_tce_cache_blast,
.dump_error_regs = calioc2_dump_error_regs
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index adf0392d549a..7c577a178859 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -88,7 +88,7 @@ int __init pci_swiotlb_detect_4gb(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
- if (!no_iommu && max_pfn > MAX_DMA32_PFN)
+ if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN)
swiotlb = 1;
#endif
return swiotlb;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e835d263a33b..b9d99e0f82c4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -125,7 +125,7 @@ void release_thread(struct task_struct *dead_task)
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
- dead_task->mm->context.ldt,
+ dead_task->mm->context.ldt->entries,
dead_task->mm->context.ldt->size);
BUG();
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 558f50edebca..32e9d9cbb884 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -124,21 +124,6 @@ const char *regs_query_register_name(unsigned int offset)
return NULL;
}
-static const int arg_offs_table[] = {
-#ifdef CONFIG_X86_32
- [0] = offsetof(struct pt_regs, ax),
- [1] = offsetof(struct pt_regs, dx),
- [2] = offsetof(struct pt_regs, cx)
-#else /* CONFIG_X86_64 */
- [0] = offsetof(struct pt_regs, di),
- [1] = offsetof(struct pt_regs, si),
- [2] = offsetof(struct pt_regs, dx),
- [3] = offsetof(struct pt_regs, cx),
- [4] = offsetof(struct pt_regs, r8),
- [5] = offsetof(struct pt_regs, r9)
-#endif
-};
-
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d229a58..99bfc025111d 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -140,27 +140,3 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
-
-#ifdef CONFIG_X86_64
-/*
- * Initialize the generic pvclock vsyscall state. This will allocate
- * a/some page(s) for the per-vcpu pvclock information, set up a
- * fixmap mapping for the page(s)
- */
-
-int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
- int size)
-{
- int idx;
-
- WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
-
- for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
- __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
- __pa(i) + (idx*PAGE_SIZE),
- PAGE_KERNEL_VVAR);
- }
-
- return 0;
-}
-#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 02693dd9a079..d64889aa2d46 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -718,6 +718,7 @@ static int crashing_cpu;
static nmi_shootdown_cb shootdown_callback;
static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
{
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
smp_send_nmi_allbutself();
+ /* Kick CPUs looping in NMI context. */
+ WRITE_ONCE(crash_ipi_issued, 1);
+
msecs = 1000; /* Wait at most a second for the other cpus to stop */
while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
mdelay(1);
@@ -788,9 +792,35 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* Leave the nmi callback set */
}
+
+/*
+ * Check if the crash dumping IPI got issued and if so, call its callback
+ * directly. This function is used when we have already been in NMI handler.
+ * It doesn't return.
+ */
+void run_crash_ipi_callback(struct pt_regs *regs)
+{
+ if (crash_ipi_issued)
+ crash_nmi_callback(0, regs);
+}
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+ while (1) {
+ /* If no CPU is preparing crash dump, we simply loop here. */
+ run_crash_ipi_callback(regs);
+ cpu_relax();
+ }
+}
+
#else /* !CONFIG_SMP */
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
/* No other CPUs to shoot down */
}
+
+void run_crash_ipi_callback(struct pt_regs *regs)
+{
+}
#endif
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index cd9685235df9..4af8d063fb36 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -200,6 +200,9 @@ static __init int add_rtc_cmos(void)
}
#endif
+ if (paravirt_enabled() && !paravirt_has(RTC))
+ return -ENODEV;
+
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d2bbe343fda7..d3d80e6d42a2 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1048,6 +1048,8 @@ void __init setup_arch(char **cmdline_p)
if (mtrr_trim_uncached_memory(max_pfn))
max_pfn = e820_end_of_ram_pfn();
+ max_possible_pfn = max_pfn;
+
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 12c8286206ce..658777cf3851 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -125,12 +125,12 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
- apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
+ apic->send_IPI(cpu, RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
}
void native_send_call_func_ipi(const struct cpumask *mask)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fbabe4fcc7fb..24d57f77b3c1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -304,7 +304,7 @@ do { \
static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (cpu_has_topoext) {
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
@@ -630,13 +630,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
num_starts = 0;
/*
- * Paravirt / VMI wants a startup IPI hook here to set up the
- * target processor state.
- */
- startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
- stack_start);
-
- /*
* Run STARTUP IPI loop.
*/
pr_debug("#startup loops: %d\n", num_starts);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c7c4d9c51e99..3d743da828d3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1185,8 +1185,6 @@ void __init tsc_init(void)
u64 lpj;
int cpu;
- x86_init.timers.tsc_pre_init();
-
if (!cpu_has_tsc) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 524619351961..483231ebbb0b 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -357,8 +357,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
tss = &per_cpu(cpu_tss, get_cpu());
/* make room for real-mode segments */
tsk->thread.sp0 += 16;
- if (cpu_has_sep)
+
+ if (static_cpu_has_safe(X86_FEATURE_SEP))
tsk->thread.sysenter_cs = 0;
+
load_sp0(tss, &tsk->thread);
put_cpu();
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 3839628d962e..dad5fe9633a3 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -68,7 +68,6 @@ struct x86_init_ops x86_init __initdata = {
.timers = {
.setup_percpu_clockev = setup_boot_APIC_clock,
- .tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
.wallclock_init = x86_init_noop,
},
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 3f5c48ddba45..c8eda1498121 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -2,6 +2,7 @@
#define ARCH_X86_KVM_CPUID_H
#include "x86.h"
+#include <asm/cpu.h>
int kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -178,4 +179,37 @@ static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
}
#undef BIT_NRIPS
+static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_family(best->eax);
+}
+
+static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_model(best->eax);
+}
+
+static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ if (!best)
+ return -1;
+
+ return x86_stepping(best->eax);
+}
+
#endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 08116ff227cc..b0ea42b78ccd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -420,6 +420,7 @@ void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_s
u8 saved_mode;
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
+ WARN_ON(channel != 0);
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
pit_load_count(kvm, channel, val);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 899c40f826dd..22aef20bf9c7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3053,6 +3053,23 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
+ case MSR_F15H_IC_CFG: {
+
+ int family, model;
+
+ family = guest_cpuid_family(vcpu);
+ model = guest_cpuid_model(vcpu);
+
+ if (family < 0 || model < 0)
+ return kvm_get_msr_common(vcpu, msr_info);
+
+ msr_info->data = 0;
+
+ if (family == 0x15 &&
+ (model >= 0x2 && model < 0x20))
+ msr_info->data = 0x1E;
+ }
+ break;
default:
return kvm_get_msr_common(vcpu, msr_info);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ffc224bbe41..97592e190413 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3606,7 +3606,8 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
sizeof(kvm->arch.vpit->pit_state.channels));
kvm->arch.vpit->pit_state.flags = ps->flags;
for (i = 0; i < 3; i++)
- kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count, start);
+ kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+ start && i == 0);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a0d09f6c6533..4ba229ac3f4f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1414,6 +1414,7 @@ __init void lguest_init(void)
pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1;
+ pv_info.features = 0;
/*
* We set up all the lguest overrides for sensitive operations. These
@@ -1472,7 +1473,6 @@ __init void lguest_init(void)
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
- pv_mmu_ops.pte_update_defer = lguest_pte_update;
#ifdef CONFIG_X86_LOCAL_APIC
/* APIC read/write intercepts */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f2587888d987..a501fa25da41 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,7 @@ clean-files := inat-tables.c
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
-lib-y := delay.o misc.o cmdline.o
+lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
new file mode 100644
index 000000000000..aa417a97511c
--- /dev/null
+++ b/arch/x86/lib/cpu.c
@@ -0,0 +1,35 @@
+#include <linux/module.h>
+
+unsigned int x86_family(unsigned int sig)
+{
+ unsigned int x86;
+
+ x86 = (sig >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (sig >> 20) & 0xff;
+
+ return x86;
+}
+EXPORT_SYMBOL_GPL(x86_family);
+
+unsigned int x86_model(unsigned int sig)
+{
+ unsigned int fam, model;
+
+ fam = x86_family(sig);
+
+ model = (sig >> 4) & 0xf;
+
+ if (fam >= 0x6)
+ model += ((sig >> 16) & 0xf) << 4;
+
+ return model;
+}
+EXPORT_SYMBOL_GPL(x86_model);
+
+unsigned int x86_stepping(unsigned int sig)
+{
+ return sig & 0xf;
+}
+EXPORT_SYMBOL_GPL(x86_stepping);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 43623739c7cf..004c861b1648 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,6 +1,8 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <asm/msr.h>
+#define CREATE_TRACE_POINTS
+#include <asm/msr-trace.h>
struct msr *msrs_alloc(void)
{
@@ -108,3 +110,27 @@ int msr_clear_bit(u32 msr, u8 bit)
{
return __flip_bit(msr, bit, false);
}
+
+#ifdef CONFIG_TRACEPOINTS
+void do_trace_write_msr(unsigned msr, u64 val, int failed)
+{
+ trace_write_msr(msr, val, failed);
+}
+EXPORT_SYMBOL(do_trace_write_msr);
+EXPORT_TRACEPOINT_SYMBOL(write_msr);
+
+void do_trace_read_msr(unsigned msr, u64 val, int failed)
+{
+ trace_read_msr(msr, val, failed);
+}
+EXPORT_SYMBOL(do_trace_read_msr);
+EXPORT_TRACEPOINT_SYMBOL(read_msr);
+
+void do_trace_rdpmc(unsigned counter, u64 val, int failed)
+{
+ trace_rdpmc(counter, val, failed);
+}
+EXPORT_SYMBOL(do_trace_rdpmc);
+EXPORT_TRACEPOINT_SYMBOL(rdpmc);
+
+#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 65c47fda26fc..f9d38a48e3c8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
new file mode 100644
index 000000000000..bfcffdf6c577
--- /dev/null
+++ b/arch/x86/mm/debug_pagetables.c
@@ -0,0 +1,46 @@
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <asm/pgtable.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ ptdump_walk_pgd_level(m, NULL);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *pe;
+
+static int __init pt_dump_debug_init(void)
+{
+ pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
+ &ptdump_fops);
+ if (!pe)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit pt_dump_debug_exit(void)
+{
+ debugfs_remove_recursive(pe);
+}
+
+module_init(pt_dump_debug_init);
+module_exit(pt_dump_debug_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 0f1c6fc3ddd8..4a6f1d9b5106 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -426,38 +426,15 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
}
-#ifdef CONFIG_X86_PTDUMP
-static int ptdump_show(struct seq_file *m, void *v)
+static int __init pt_dump_init(void)
{
- ptdump_walk_pgd_level(m, NULL);
- return 0;
-}
-
-static int ptdump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, ptdump_show, NULL);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif
-
-static int pt_dump_init(void)
-{
-#ifdef CONFIG_X86_PTDUMP
- struct dentry *pe;
-#endif
-
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
@@ -468,13 +445,6 @@ static int pt_dump_init(void)
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
#endif
-#ifdef CONFIG_X86_PTDUMP
- pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
- &ptdump_fops);
- if (!pe)
- return -ENOMEM;
-#endif
-
return 0;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b9c78f3bcd67..0d8d53d1f5cc 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -194,8 +194,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
- WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
- KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+ if (iomem_map_sanity_check(unaligned_phys_addr, unaligned_size))
+ pr_warn("caller %pS mapping multiple BARs\n", caller);
return ret_addr;
err_free_area:
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a3137a4feed1..6000ad7f560c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
*/
void clflush_cache_range(void *vaddr, unsigned int size)
{
- unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
+ const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
+ void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
void *vend = vaddr + size;
- void *p;
+
+ if (p >= vend)
+ return;
mb();
- for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
- p < vend; p += boot_cpu_data.x86_clflush_size)
+ for (; p < vend; p += clflush_size)
clflushopt(p);
mb();
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 188e3e07eeeb..031782e74231 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -586,7 +586,7 @@ int free_memtype(u64 start, u64 end)
entry = rbt_memtype_erase(start, end);
spin_unlock(&memtype_lock);
- if (!entry) {
+ if (IS_ERR(entry)) {
pr_info("x86/PAT: %s:%d freeing invalid memtype [mem %#010Lx-%#010Lx]\n",
current->comm, current->pid, start, end - 1);
return -EINVAL;
@@ -992,6 +992,16 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
vma->vm_flags &= ~VM_PAT;
}
+/*
+ * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
+ * with the old vma after its pfnmap page table has been removed. The new
+ * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
+ */
+void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+ vma->vm_flags &= ~VM_PAT;
+}
+
pgprot_t pgprot_writecombine(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) |
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 63931080366a..2f7702253ccf 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -98,8 +98,13 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
return last_lower; /* Returns NULL if there is no overlap */
}
-static struct memtype *memtype_rb_exact_match(struct rb_root *root,
- u64 start, u64 end)
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_rb_match(struct rb_root *root,
+ u64 start, u64 end, int match_type)
{
struct memtype *match;
@@ -107,7 +112,12 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
while (match != NULL && match->start < end) {
struct rb_node *node;
- if (match->start == start && match->end == end)
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
return match;
node = rb_next(&match->rb);
@@ -117,7 +127,7 @@ static struct memtype *memtype_rb_exact_match(struct rb_root *root,
match = NULL;
}
- return NULL; /* Returns NULL if there is no exact match */
+ return NULL; /* Returns NULL if there is no match */
}
static int memtype_rb_check_conflict(struct rb_root *root,
@@ -210,12 +220,36 @@ struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
struct memtype *data;
- data = memtype_rb_exact_match(&memtype_rbroot, start, end);
- if (!data)
- goto out;
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_rb_match(&memtype_rbroot, start, end,
+ MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ } else {
+ /* mremap: update the end value of this node */
+ rb_erase_augmented(&data->rb, &memtype_rbroot,
+ &memtype_rb_augment_cb);
+ data->end = start;
+ data->subtree_max_end = data->end;
+ memtype_rb_insert(&memtype_rbroot, data);
+ return NULL;
+ }
- rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
-out:
return data;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fb0a9dd1d6e4..ee9c2e3a7199 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -414,7 +414,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*ptep = entry;
- pte_update_defer(vma->vm_mm, address, ptep);
+ pte_update(vma->vm_mm, address, ptep);
}
return changed;
@@ -431,7 +431,6 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*pmdp = entry;
- pmd_update_defer(vma->vm_mm, address, pmdp);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -469,9 +468,6 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
(unsigned long *)pmdp);
- if (ret)
- pmd_update(vma->vm_mm, addr, pmdp);
-
return ret;
}
#endif
@@ -518,7 +514,6 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
set = !test_and_set_bit(_PAGE_BIT_SPLITTING,
(unsigned long *)pmdp);
if (set) {
- pmd_update(vma->vm_mm, address, pmdp);
/* need tlb flush only to serialize against gup-fast */
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 90555bf60aa4..92e2eacb3321 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -31,7 +31,7 @@ early_param("noexec", noexec_setup);
void x86_configure_nx(void)
{
- if (cpu_has_nx && !disable_nx)
+ if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx)
__supported_pte_mask |= _PAGE_NX;
else
__supported_pte_mask &= ~_PAGE_NX;
@@ -39,7 +39,7 @@ void x86_configure_nx(void)
void __init x86_report_nx(void)
{
- if (!cpu_has_nx) {
+ if (!boot_cpu_has(X86_FEATURE_NX)) {
printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
"missing in CPU!\n");
} else {
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index c2aea63bee20..b5f821881465 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -203,6 +203,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
(unsigned long long)start, (unsigned long long)end - 1);
+ max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
+
return 0;
out_err_bad_srat:
bad_srat();
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 327f21c3bde1..8dd80050d705 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -28,6 +28,7 @@
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/clocksource.h>
#include <asm/apic.h>
#include <asm/current.h>
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 9ab52791fed5..d5f64996394a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -23,6 +23,7 @@
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
+#include <linux/dmi.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
@@ -32,6 +33,29 @@ __visible unsigned long saved_context_eflags;
#endif
struct saved_context saved_context;
+static void msr_save_context(struct saved_context *ctxt)
+{
+ struct saved_msr *msr = ctxt->saved_msrs.array;
+ struct saved_msr *end = msr + ctxt->saved_msrs.num;
+
+ while (msr < end) {
+ msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
+ msr++;
+ }
+}
+
+static void msr_restore_context(struct saved_context *ctxt)
+{
+ struct saved_msr *msr = ctxt->saved_msrs.array;
+ struct saved_msr *end = msr + ctxt->saved_msrs.num;
+
+ while (msr < end) {
+ if (msr->valid)
+ wrmsrl(msr->info.msr_no, msr->info.reg.q);
+ msr++;
+ }
+}
+
/**
* __save_processor_state - save CPU registers before creating a
* hibernation image and before restoring the memory state from it
@@ -111,6 +135,7 @@ static void __save_processor_state(struct saved_context *ctxt)
#endif
ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
&ctxt->misc_enable);
+ msr_save_context(ctxt);
}
/* Needed by apm.c */
@@ -229,6 +254,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
perf_restore_debug_store();
+ msr_restore_context(ctxt);
}
/* Needed by apm.c */
@@ -320,3 +346,69 @@ static int __init bsp_pm_check_init(void)
}
core_initcall(bsp_pm_check_init);
+
+static int msr_init_context(const u32 *msr_id, const int total_num)
+{
+ int i = 0;
+ struct saved_msr *msr_array;
+
+ if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) {
+ pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n");
+ return -EINVAL;
+ }
+
+ msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
+ if (!msr_array) {
+ pr_err("x86/pm: Can not allocate memory to save/restore MSRs during suspend.\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < total_num; i++) {
+ msr_array[i].info.msr_no = msr_id[i];
+ msr_array[i].valid = false;
+ msr_array[i].info.reg.q = 0;
+ }
+ saved_context.saved_msrs.num = total_num;
+ saved_context.saved_msrs.array = msr_array;
+
+ return 0;
+}
+
+/*
+ * The following section is a quirk framework for problematic BIOSen:
+ * Sometimes MSRs are modified by the BIOSen after suspended to
+ * RAM, this might cause unexpected behavior after wakeup.
+ * Thus we save/restore these specified MSRs across suspend/resume
+ * in order to work around it.
+ *
+ * For any further problematic BIOSen/platforms,
+ * please add your own function similar to msr_initialize_bdw.
+ */
+static int msr_initialize_bdw(const struct dmi_system_id *d)
+{
+ /* Add any extra MSR ids into this array. */
+ u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
+
+ pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
+ return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
+}
+
+static struct dmi_system_id msr_save_dmi_table[] = {
+ {
+ .callback = msr_initialize_bdw,
+ .ident = "BROADWELL BDX_EP",
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "GRANTLEY"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "E63448-400"),
+ },
+ },
+ {}
+};
+
+static int pm_check_save_msr(void)
+{
+ dmi_check_system(msr_save_dmi_table);
+ return 0;
+}
+
+device_initcall(pm_check_save_msr);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5774800ff583..23063923e364 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1192,7 +1192,7 @@ static const struct pv_info xen_info __initconst = {
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
#endif
-
+ .features = 0,
.name = "Xen",
};
@@ -1229,10 +1229,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.iret = xen_iret,
#ifdef CONFIG_X86_64
- .usergs_sysret32 = xen_sysret32,
.usergs_sysret64 = xen_sysret64,
-#else
- .irq_enable_sysexit = xen_sysexit,
#endif
.load_tr_desc = paravirt_nop,
@@ -1265,12 +1262,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.end_context_switch = xen_end_context_switch,
};
-static const struct pv_apic_ops xen_apic_ops __initconst = {
-#ifdef CONFIG_X86_LOCAL_APIC
- .startup_ipi_hook = paravirt_nop,
-#endif
-};
-
static void xen_reboot(int reason)
{
struct sched_shutdown r = { .reason = reason };
@@ -1535,8 +1526,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
+ if (xen_initial_domain())
+ pv_info.features |= PV_SUPPORTED_RTC;
pv_init_ops = xen_init_ops;
- pv_apic_ops = xen_apic_ops;
if (!xen_pvh_domain()) {
pv_cpu_ops = xen_cpu_ops;
@@ -1886,8 +1878,10 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
- if (xen_pv_domain())
+ if (xen_pv_domain()) {
clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ set_cpu_cap(c, X86_FEATURE_XENPV);
+ }
}
const struct hypervisor_x86 x86_hyper_xen = {
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb5e266a8bf7..c913ca4f6958 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2436,7 +2436,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.flush_tlb_others = xen_flush_tlb_others,
.pte_update = paravirt_nop,
- .pte_update_defer = paravirt_nop,
.pgd_alloc = xen_pgd_alloc,
.pgd_free = xen_pgd_free,
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index fd92a64d748e..feb6d40a0860 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -35,20 +35,6 @@ check_events:
ret
/*
- * We can't use sysexit directly, because we're not running in ring0.
- * But we can easily fake it up using iret. Assuming xen_sysexit is
- * jumped to with a standard stack frame, we can just strip it back to
- * a standard iret frame and use iret.
- */
-ENTRY(xen_sysexit)
- movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
- orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
- lea PT_EIP(%esp), %esp
-
- jmp xen_iret
-ENDPROC(xen_sysexit)
-
-/*
* This is run where a normal iret would be run, with the same stack setup:
* 8: eflags
* 4: cs
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index f22667abf7b9..cc8acc410ddb 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,25 +68,6 @@ ENTRY(xen_sysret64)
ENDPATCH(xen_sysret64)
RELOC(xen_sysret64, 1b+1)
-ENTRY(xen_sysret32)
- /*
- * We're already on the usermode stack at this point, but
- * still with the kernel gs, so we can easily switch back
- */
- movq %rsp, PER_CPU_VAR(rsp_scratch)
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
- pushq $__USER32_DS
- pushq PER_CPU_VAR(rsp_scratch)
- pushq %r11
- pushq $__USER32_CS
- pushq %rcx
-
- pushq $0
-1: jmp hypercall_iret
-ENDPATCH(xen_sysret32)
-RELOC(xen_sysret32, 1b+1)
-
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
* means we have:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1399423f3418..4140b070f2e9 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -139,9 +139,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
-#ifdef CONFIG_X86_32
-__visible void xen_sysexit(void);
-#endif
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
__visible void xen_adjust_exception_frame(void);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e73846a3d08a..e01405a3e8b3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -81,7 +81,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio *new = NULL;
bio_for_each_segment(bv, bio, iter) {
- if (sectors + (bv.bv_len >> 9) > blk_max_size_offset(q, bio->bi_iter.bi_sector))
+ if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
goto split;
/*
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index f8c0b8dbeb75..88bc8e6b2a54 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -53,7 +53,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
struct dmaengine_unmap_data *unmap = NULL;
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
unsigned long dma_prep_flags = 0;
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 5d355e0c2633..c0748bbd4c08 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -188,7 +188,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
/* XORing P/Q is only implemented in software */
if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
@@ -307,7 +307,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
BUG_ON(disks < 4);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT);
if (unmap && disks <= dma_maxpq(device, 0) &&
is_dma_pq_aligned(device, offset, 0, len)) {
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 934a84981495..8fab6275ea1f 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -41,7 +41,7 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
u8 *a, *b, *c;
if (dma)
- unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
if (unmap) {
struct device *dev = dma->dev;
@@ -105,7 +105,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
u8 *d, *s;
if (dma)
- unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOWAIT);
if (unmap) {
dma_addr_t dma_dest[2];
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e1bce26cd4f9..da75777f2b3f 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -182,7 +182,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
BUG_ON(src_cnt <= 1);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT);
if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
struct dma_async_tx_descriptor *tx;
@@ -278,7 +278,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
BUG_ON(src_cnt <= 1);
if (device)
- unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
+ unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT);
if (unmap && src_cnt <= device->max_xor &&
is_dma_xor_aligned(device, offset, 0, len)) {
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index 707cf6213bc2..b9afb47db7ed 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -104,7 +104,7 @@ static void acpi_expose_nondev_subnodes(struct kobject *kobj,
init_completion(&dn->kobj_done);
ret = kobject_init_and_add(&dn->kobj, &acpi_data_node_ktype,
- kobj, dn->name);
+ kobj, "%s", dn->name);
if (ret)
acpi_handle_err(dn->handle, "Failed to expose (%d)\n", ret);
else
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 6aaa3f81755b..861643ea91b5 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -100,7 +100,7 @@ config SATA_AHCI_PLATFORM
config AHCI_BRCMSTB
tristate "Broadcom STB AHCI SATA support"
- depends on ARCH_BRCMSTB
+ depends on ARCH_BRCMSTB || BMIPS_GENERIC
help
This option enables support for the AHCI SATA3 controller found on
STB SoC's.
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index cdfbcc54821f..594fcabd22cd 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1306,15 +1306,13 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
#endif
/*
- * ahci_init_msix() only implements single MSI-X support, not multiple
- * MSI-X per-port interrupts. This is needed for host controllers that only
- * have MSI-X support implemented, but no MSI or intx.
+ * ahci_init_msix() - optionally enable per-port MSI-X otherwise defer
+ * to single msi.
*/
static int ahci_init_msix(struct pci_dev *pdev, unsigned int n_ports,
- struct ahci_host_priv *hpriv)
+ struct ahci_host_priv *hpriv, unsigned long flags)
{
- int rc, nvec;
- struct msix_entry entry = {};
+ int nvec, i, rc;
/* Do not init MSI-X if MSI is disabled for the device */
if (hpriv->flags & AHCI_HFLAG_NO_MSI)
@@ -1324,22 +1322,39 @@ static int ahci_init_msix(struct pci_dev *pdev, unsigned int n_ports,
if (nvec < 0)
return nvec;
- if (!nvec) {
+ /*
+ * Proper MSI-X implementations will have a vector per-port.
+ * Barring that, we prefer single-MSI over single-MSIX. If this
+ * check fails (not enough MSI-X vectors for all ports) we will
+ * be called again with the flag clear iff ahci_init_msi()
+ * fails.
+ */
+ if (flags & AHCI_HFLAG_MULTI_MSIX) {
+ if (nvec < n_ports)
+ return -ENODEV;
+ nvec = n_ports;
+ } else if (nvec) {
+ nvec = 1;
+ } else {
+ /*
+ * Emit dev_err() since this was the non-legacy irq
+ * method of last resort.
+ */
rc = -ENODEV;
goto fail;
}
- /*
- * There can be more than one vector (e.g. for error detection or
- * hdd hotplug). Only the first vector (entry.entry = 0) is used.
- */
- rc = pci_enable_msix_exact(pdev, &entry, 1);
+ for (i = 0; i < nvec; i++)
+ hpriv->msix[i].entry = i;
+ rc = pci_enable_msix_exact(pdev, hpriv->msix, nvec);
if (rc < 0)
goto fail;
- hpriv->irq = entry.vector;
+ if (nvec > 1)
+ hpriv->flags |= AHCI_HFLAG_MULTI_MSIX;
+ hpriv->irq = hpriv->msix[0].vector; /* for single msi-x */
- return 1;
+ return nvec;
fail:
dev_err(&pdev->dev,
"failed to enable MSI-X with error %d, # of vectors: %d\n",
@@ -1403,20 +1418,25 @@ static int ahci_init_interrupts(struct pci_dev *pdev, unsigned int n_ports,
{
int nvec;
+ /*
+ * Try to enable per-port MSI-X. If the host is not capable
+ * fall back to single MSI before finally attempting single
+ * MSI-X.
+ */
+ nvec = ahci_init_msix(pdev, n_ports, hpriv, AHCI_HFLAG_MULTI_MSIX);
+ if (nvec >= 0)
+ return nvec;
+
nvec = ahci_init_msi(pdev, n_ports, hpriv);
if (nvec >= 0)
return nvec;
- /*
- * Currently, MSI-X support only implements single IRQ mode and
- * exists for controllers which can't do other types of IRQ. Only
- * set it up if MSI fails.
- */
- nvec = ahci_init_msix(pdev, n_ports, hpriv);
+ /* try single-msix */
+ nvec = ahci_init_msix(pdev, n_ports, hpriv, 0);
if (nvec >= 0)
return nvec;
- /* lagacy intx interrupts */
+ /* legacy intx interrupts */
pci_intx(pdev, 1);
hpriv->irq = pdev->irq;
@@ -1578,7 +1598,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!host)
return -ENOMEM;
host->private_data = hpriv;
-
+ hpriv->msix = devm_kzalloc(&pdev->dev,
+ sizeof(struct msix_entry) * n_ports, GFP_KERNEL);
+ if (!hpriv->msix)
+ return -ENOMEM;
ahci_init_interrupts(pdev, n_ports, hpriv);
if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 45586c1dbbdc..a4faa438889c 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -35,6 +35,7 @@
#ifndef _AHCI_H
#define _AHCI_H
+#include <linux/pci.h>
#include <linux/clk.h>
#include <linux/libata.h>
#include <linux/phy/phy.h>
@@ -237,11 +238,18 @@ enum {
AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on
port start (wait until
error-handling stage) */
- AHCI_HFLAG_MULTI_MSI = (1 << 16), /* multiple PCI MSIs */
AHCI_HFLAG_NO_DEVSLP = (1 << 17), /* no device sleep */
AHCI_HFLAG_NO_FBS = (1 << 18), /* no FBS */
AHCI_HFLAG_EDGE_IRQ = (1 << 19), /* HOST_IRQ_STAT behaves as
Edge Triggered */
+#ifdef CONFIG_PCI_MSI
+ AHCI_HFLAG_MULTI_MSI = (1 << 20), /* multiple PCI MSIs */
+ AHCI_HFLAG_MULTI_MSIX = (1 << 21), /* per-port MSI-X */
+#else
+ /* compile out MSI infrastructure */
+ AHCI_HFLAG_MULTI_MSI = 0,
+ AHCI_HFLAG_MULTI_MSIX = 0,
+#endif
/* ap->flags bits */
@@ -308,7 +316,6 @@ struct ahci_port_priv {
unsigned int ncq_saw_d2h:1;
unsigned int ncq_saw_dmas:1;
unsigned int ncq_saw_sdb:1;
- atomic_t intr_status; /* interrupts to handle */
spinlock_t lock; /* protects parent ata_port */
u32 intr_mask; /* interrupts to enable */
bool fbs_supported; /* set iff FBS is supported */
@@ -343,6 +350,7 @@ struct ahci_host_priv {
* the PHY position in this array.
*/
struct phy **phys;
+ struct msix_entry *msix; /* Optional MSI-X support */
unsigned nports; /* Number of ports */
void *plat_data; /* Other platform data */
unsigned int irq; /* interrupt line */
@@ -354,6 +362,21 @@ struct ahci_host_priv {
void (*start_engine)(struct ata_port *ap);
};
+#ifdef CONFIG_PCI_MSI
+static inline int ahci_irq_vector(struct ahci_host_priv *hpriv, int port)
+{
+ if (hpriv->flags & AHCI_HFLAG_MULTI_MSIX)
+ return hpriv->msix[port].vector;
+ else
+ return hpriv->irq + port;
+}
+#else
+static inline int ahci_irq_vector(struct ahci_host_priv *hpriv, int port)
+{
+ return hpriv->irq;
+}
+#endif
+
extern int ahci_ignore_sss;
extern struct device_attribute *ahci_shost_attrs[];
diff --git a/drivers/ata/ahci_brcmstb.c b/drivers/ata/ahci_brcmstb.c
index 14b7305d2ba0..b36cae2fd04b 100644
--- a/drivers/ata/ahci_brcmstb.c
+++ b/drivers/ata/ahci_brcmstb.c
@@ -52,8 +52,10 @@
#define SATA_TOP_CTRL_2_PHY_GLOBAL_RESET BIT(14)
#define SATA_TOP_CTRL_PHY_OFFS 0x8
#define SATA_TOP_MAX_PHYS 2
-#define SATA_TOP_CTRL_SATA_TP_OUT 0x1c
-#define SATA_TOP_CTRL_CLIENT_INIT_CTRL 0x20
+
+#define SATA_FIRST_PORT_CTRL 0x700
+#define SATA_NEXT_PORT_CTRL_OFFSET 0x80
+#define SATA_PORT_PCTRL6(reg_base) (reg_base + 0x18)
/* On big-endian MIPS, buses are reversed to big endian, so switch them back */
#if defined(CONFIG_MIPS) && defined(__BIG_ENDIAN)
@@ -69,14 +71,21 @@
(DATA_ENDIAN << DMADESC_ENDIAN_SHIFT) | \
(MMIO_ENDIAN << MMIO_ENDIAN_SHIFT))
+enum brcm_ahci_quirks {
+ BRCM_AHCI_QUIRK_NO_NCQ = BIT(0),
+ BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE = BIT(1),
+};
+
struct brcm_ahci_priv {
struct device *dev;
void __iomem *top_ctrl;
u32 port_mask;
+ u32 quirks;
};
static const struct ata_port_info ahci_brcm_port_info = {
- .flags = AHCI_FLAG_COMMON,
+ .flags = AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM,
+ .link_flags = ATA_LFLAG_NO_DB_DELAY,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_platform_ops,
@@ -107,6 +116,34 @@ static inline void brcm_sata_writereg(u32 val, void __iomem *addr)
writel_relaxed(val, addr);
}
+static void brcm_sata_alpm_init(struct ahci_host_priv *hpriv)
+{
+ struct brcm_ahci_priv *priv = hpriv->plat_data;
+ u32 bus_ctrl, port_ctrl, host_caps;
+ int i;
+
+ /* Enable support for ALPM */
+ bus_ctrl = brcm_sata_readreg(priv->top_ctrl +
+ SATA_TOP_CTRL_BUS_CTRL);
+ brcm_sata_writereg(bus_ctrl | OVERRIDE_HWINIT,
+ priv->top_ctrl + SATA_TOP_CTRL_BUS_CTRL);
+ host_caps = readl(hpriv->mmio + HOST_CAP);
+ writel(host_caps | HOST_CAP_ALPM, hpriv->mmio);
+ brcm_sata_writereg(bus_ctrl, priv->top_ctrl + SATA_TOP_CTRL_BUS_CTRL);
+
+ /*
+ * Adjust timeout to allow PLL sufficient time to lock while waking
+ * up from slumber mode.
+ */
+ for (i = 0, port_ctrl = SATA_FIRST_PORT_CTRL;
+ i < SATA_TOP_MAX_PHYS;
+ i++, port_ctrl += SATA_NEXT_PORT_CTRL_OFFSET) {
+ if (priv->port_mask & BIT(i))
+ writel(0xff1003fc,
+ hpriv->mmio + SATA_PORT_PCTRL6(port_ctrl));
+ }
+}
+
static void brcm_sata_phy_enable(struct brcm_ahci_priv *priv, int port)
{
void __iomem *phyctrl = priv->top_ctrl + SATA_TOP_CTRL_PHY_CTRL +
@@ -114,6 +151,9 @@ static void brcm_sata_phy_enable(struct brcm_ahci_priv *priv, int port)
void __iomem *p;
u32 reg;
+ if (priv->quirks & BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE)
+ return;
+
/* clear PHY_DEFAULT_POWER_STATE */
p = phyctrl + SATA_TOP_CTRL_PHY_CTRL_1;
reg = brcm_sata_readreg(p);
@@ -143,6 +183,9 @@ static void brcm_sata_phy_disable(struct brcm_ahci_priv *priv, int port)
void __iomem *p;
u32 reg;
+ if (priv->quirks & BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE)
+ return;
+
/* power-off the PHY digital logic */
p = phyctrl + SATA_TOP_CTRL_PHY_CTRL_2;
reg = brcm_sata_readreg(p);
@@ -230,6 +273,7 @@ static int brcm_ahci_resume(struct device *dev)
brcm_sata_init(priv);
brcm_sata_phys_enable(priv);
+ brcm_sata_alpm_init(hpriv);
return ahci_platform_resume(dev);
}
#endif
@@ -256,6 +300,11 @@ static int brcm_ahci_probe(struct platform_device *pdev)
if (IS_ERR(priv->top_ctrl))
return PTR_ERR(priv->top_ctrl);
+ if (of_device_is_compatible(dev->of_node, "brcm,bcm7425-ahci")) {
+ priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ;
+ priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE;
+ }
+
brcm_sata_init(priv);
priv->port_mask = brcm_ahci_get_portmask(pdev, priv);
@@ -269,10 +318,15 @@ static int brcm_ahci_probe(struct platform_device *pdev)
return PTR_ERR(hpriv);
hpriv->plat_data = priv;
+ brcm_sata_alpm_init(hpriv);
+
ret = ahci_platform_enable_resources(hpriv);
if (ret)
return ret;
+ if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ)
+ hpriv->flags |= AHCI_HFLAG_NO_NCQ;
+
ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info,
&ahci_platform_sht);
if (ret)
@@ -300,6 +354,7 @@ static int brcm_ahci_remove(struct platform_device *pdev)
}
static const struct of_device_id ahci_of_match[] = {
+ {.compatible = "brcm,bcm7425-ahci"},
{.compatible = "brcm,bcm7445-ahci"},
{},
};
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index d0f9de96e4ea..7bdee9bd8786 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -34,14 +34,20 @@
/* port register default value */
#define AHCI_PORT_PHY_1_CFG 0xa003fffe
-#define AHCI_PORT_PHY_2_CFG 0x28183411
-#define AHCI_PORT_PHY_3_CFG 0x0e081004
-#define AHCI_PORT_PHY_4_CFG 0x00480811
-#define AHCI_PORT_PHY_5_CFG 0x192c96a4
-#define AHCI_PORT_TRANS_CFG 0x08000025
+#define AHCI_PORT_TRANS_CFG 0x08000029
+
+/* for ls1021a */
+#define LS1021A_PORT_PHY2 0x28183414
+#define LS1021A_PORT_PHY3 0x0e080e06
+#define LS1021A_PORT_PHY4 0x064a080b
+#define LS1021A_PORT_PHY5 0x2aa86470
#define SATA_ECC_DISABLE 0x00020000
+/* for ls1043a */
+#define LS1043A_PORT_PHY2 0x28184d1f
+#define LS1043A_PORT_PHY3 0x0e081509
+
enum ahci_qoriq_type {
AHCI_LS1021A,
AHCI_LS1043A,
@@ -151,16 +157,23 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
case AHCI_LS1021A:
writel(SATA_ECC_DISABLE, qpriv->ecc_addr);
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
- writel(AHCI_PORT_PHY_2_CFG, reg_base + PORT_PHY2);
- writel(AHCI_PORT_PHY_3_CFG, reg_base + PORT_PHY3);
- writel(AHCI_PORT_PHY_4_CFG, reg_base + PORT_PHY4);
- writel(AHCI_PORT_PHY_5_CFG, reg_base + PORT_PHY5);
+ writel(LS1021A_PORT_PHY2, reg_base + PORT_PHY2);
+ writel(LS1021A_PORT_PHY3, reg_base + PORT_PHY3);
+ writel(LS1021A_PORT_PHY4, reg_base + PORT_PHY4);
+ writel(LS1021A_PORT_PHY5, reg_base + PORT_PHY5);
writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
break;
case AHCI_LS1043A:
+ writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+ writel(LS1043A_PORT_PHY2, reg_base + PORT_PHY2);
+ writel(LS1043A_PORT_PHY3, reg_base + PORT_PHY3);
+ writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
+ break;
+
case AHCI_LS2080A:
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
+ writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
break;
}
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 4665512dae44..d61740e78d6d 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -43,6 +43,7 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
#include <linux/libata.h>
+#include <linux/pci.h>
#include "ahci.h"
#include "libata.h"
@@ -1804,41 +1805,24 @@ static void ahci_port_intr(struct ata_port *ap)
ahci_handle_port_interrupt(ap, port_mmio, status);
}
-static irqreturn_t ahci_port_thread_fn(int irq, void *dev_instance)
+static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
{
struct ata_port *ap = dev_instance;
- struct ahci_port_priv *pp = ap->private_data;
void __iomem *port_mmio = ahci_port_base(ap);
u32 status;
- status = atomic_xchg(&pp->intr_status, 0);
- if (!status)
- return IRQ_NONE;
-
- spin_lock_bh(ap->lock);
- ahci_handle_port_interrupt(ap, port_mmio, status);
- spin_unlock_bh(ap->lock);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t ahci_multi_irqs_intr(int irq, void *dev_instance)
-{
- struct ata_port *ap = dev_instance;
- void __iomem *port_mmio = ahci_port_base(ap);
- struct ahci_port_priv *pp = ap->private_data;
- u32 status;
-
VPRINTK("ENTER\n");
status = readl(port_mmio + PORT_IRQ_STAT);
writel(status, port_mmio + PORT_IRQ_STAT);
- atomic_or(status, &pp->intr_status);
+ spin_lock(ap->lock);
+ ahci_handle_port_interrupt(ap, port_mmio, status);
+ spin_unlock(ap->lock);
VPRINTK("EXIT\n");
- return IRQ_WAKE_THREAD;
+ return IRQ_HANDLED;
}
static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
@@ -2479,9 +2463,10 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv,
}
EXPORT_SYMBOL_GPL(ahci_set_em_messages);
-static int ahci_host_activate_multi_irqs(struct ata_host *host, int irq,
+static int ahci_host_activate_multi_irqs(struct ata_host *host,
struct scsi_host_template *sht)
{
+ struct ahci_host_priv *hpriv = host->private_data;
int i, rc;
rc = ata_host_start(host);
@@ -2493,6 +2478,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, int irq,
*/
for (i = 0; i < host->n_ports; i++) {
struct ahci_port_priv *pp = host->ports[i]->private_data;
+ int irq = ahci_irq_vector(hpriv, i);
/* Do not receive interrupts sent by dummy ports */
if (!pp) {
@@ -2500,14 +2486,14 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host, int irq,
continue;
}
- rc = devm_request_threaded_irq(host->dev, irq + i,
- ahci_multi_irqs_intr,
- ahci_port_thread_fn, 0,
- pp->irq_desc, host->ports[i]);
+ rc = devm_request_irq(host->dev, irq, ahci_multi_irqs_intr_hard,
+ 0, pp->irq_desc, host->ports[i]);
+
if (rc)
return rc;
- ata_port_desc(host->ports[i], "irq %d", irq + i);
+ ata_port_desc(host->ports[i], "irq %d", irq);
}
+
return ata_host_register(host, sht);
}
@@ -2528,8 +2514,8 @@ int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht)
int irq = hpriv->irq;
int rc;
- if (hpriv->flags & AHCI_HFLAG_MULTI_MSI)
- rc = ahci_host_activate_multi_irqs(host, irq, sht);
+ if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX))
+ rc = ahci_host_activate_multi_irqs(host, sht);
else if (hpriv->flags & AHCI_HFLAG_EDGE_IRQ)
rc = ata_host_activate(host, irq, ahci_single_edge_irq_intr,
IRQF_SHARED, sht);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b79cb10e289e..cbb74719d2c1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -50,6 +50,7 @@
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/timer.h>
+#include <linux/time.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <linux/suspend.h>
@@ -3597,7 +3598,8 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params,
* immediately after resuming. Delay 200ms before
* debouncing.
*/
- ata_msleep(link->ap, 200);
+ if (!(link->flags & ATA_LFLAG_NO_DB_DELAY))
+ ata_msleep(link->ap, 200);
/* is SControl restored correctly? */
if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
@@ -6223,6 +6225,7 @@ int ata_host_activate(struct ata_host *host, int irq,
struct scsi_host_template *sht)
{
int i, rc;
+ char *irq_desc;
rc = ata_host_start(host);
if (rc)
@@ -6234,8 +6237,14 @@ int ata_host_activate(struct ata_host *host, int irq,
return ata_host_register(host, sht);
}
+ irq_desc = devm_kasprintf(host->dev, GFP_KERNEL, "%s[%s]",
+ dev_driver_string(host->dev),
+ dev_name(host->dev));
+ if (!irq_desc)
+ return -ENOMEM;
+
rc = devm_request_irq(host->dev, irq, irq_handler, irq_flags,
- dev_name(host->dev), host);
+ irq_desc, host);
if (rc)
return rc;
@@ -6697,7 +6706,12 @@ void ata_msleep(struct ata_port *ap, unsigned int msecs)
if (owns_eh)
ata_eh_release(ap);
- msleep(msecs);
+ if (msecs < 20) {
+ unsigned long usecs = msecs * USEC_PER_MSEC;
+ usleep_range(usecs, usecs + 50);
+ } else {
+ msleep(msecs);
+ }
if (owns_eh)
ata_eh_acquire(ap);
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 8804127b108c..f72d601e300a 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -854,17 +854,14 @@ static struct of_device_id sata_rcar_match[] = {
.compatible = "renesas,sata-r8a7793",
.data = (void *)RCAR_GEN2_SATA
},
+ {
+ .compatible = "renesas,sata-r8a7795",
+ .data = (void *)RCAR_GEN2_SATA
+ },
{ },
};
MODULE_DEVICE_TABLE(of, sata_rcar_match);
-static const struct platform_device_id sata_rcar_id_table[] = {
- { "sata_rcar", RCAR_GEN1_SATA }, /* Deprecated by "sata-r8a7779" */
- { "sata-r8a7779", RCAR_GEN1_SATA },
- { },
-};
-MODULE_DEVICE_TABLE(platform, sata_rcar_id_table);
-
static int sata_rcar_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
@@ -884,11 +881,10 @@ static int sata_rcar_probe(struct platform_device *pdev)
return -ENOMEM;
of_id = of_match_device(sata_rcar_match, &pdev->dev);
- if (of_id)
- priv->type = (enum sata_rcar_type)of_id->data;
- else
- priv->type = platform_get_device_id(pdev)->driver_data;
+ if (!of_id)
+ return -ENODEV;
+ priv->type = (enum sata_rcar_type)of_id->data;
priv->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(priv->clk)) {
dev_err(&pdev->dev, "failed to get access to sata clock\n");
@@ -1018,7 +1014,6 @@ static const struct dev_pm_ops sata_rcar_pm_ops = {
static struct platform_driver sata_rcar_driver = {
.probe = sata_rcar_probe,
.remove = sata_rcar_remove,
- .id_table = sata_rcar_id_table,
.driver = {
.name = DRV_NAME,
.of_match_table = sata_rcar_match,
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index fab504fd9cfd..48301cb3a316 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -1396,6 +1396,8 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host)
addr = 0;
length = size * 1024 * 1024;
buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL);
+ if (!buf)
+ return 1;
while (addr < length) {
pdc20621_put_to_dimm(host, buf, addr,
ECC_ERASE_BUF_SZ);
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 5df4575b5ba7..47c43386786b 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -24,13 +24,17 @@
#include <linux/msi.h>
#include <linux/slab.h>
-#define DEV_ID_SHIFT 24
+#define DEV_ID_SHIFT 21
+#define MAX_DEV_MSIS (1 << (32 - DEV_ID_SHIFT))
/*
* Internal data structure containing a (made up, but unique) devid
* and the callback to write the MSI message.
*/
struct platform_msi_priv_data {
+ struct device *dev;
+ void *host_data;
+ msi_alloc_info_t arg;
irq_write_msi_msg_t write_msg;
int devid;
};
@@ -110,39 +114,49 @@ static void platform_msi_update_chip_ops(struct msi_domain_info *info)
chip->irq_write_msi_msg = platform_msi_write_msg;
}
-static void platform_msi_free_descs(struct device *dev)
+static void platform_msi_free_descs(struct device *dev, int base, int nvec)
{
struct msi_desc *desc, *tmp;
list_for_each_entry_safe(desc, tmp, dev_to_msi_list(dev), list) {
- list_del(&desc->list);
- free_msi_entry(desc);
+ if (desc->platform.msi_index >= base &&
+ desc->platform.msi_index < (base + nvec)) {
+ list_del(&desc->list);
+ free_msi_entry(desc);
+ }
}
}
-static int platform_msi_alloc_descs(struct device *dev, int nvec,
- struct platform_msi_priv_data *data)
+static int platform_msi_alloc_descs_with_irq(struct device *dev, int virq,
+ int nvec,
+ struct platform_msi_priv_data *data)
{
- int i;
+ struct msi_desc *desc;
+ int i, base = 0;
- for (i = 0; i < nvec; i++) {
- struct msi_desc *desc;
+ if (!list_empty(dev_to_msi_list(dev))) {
+ desc = list_last_entry(dev_to_msi_list(dev),
+ struct msi_desc, list);
+ base = desc->platform.msi_index + 1;
+ }
+ for (i = 0; i < nvec; i++) {
desc = alloc_msi_entry(dev);
if (!desc)
break;
desc->platform.msi_priv_data = data;
- desc->platform.msi_index = i;
+ desc->platform.msi_index = base + i;
desc->nvec_used = 1;
+ desc->irq = virq ? virq + i : 0;
list_add_tail(&desc->list, dev_to_msi_list(dev));
}
if (i != nvec) {
/* Clean up the mess */
- platform_msi_free_descs(dev);
+ platform_msi_free_descs(dev, base, nvec);
return -ENOMEM;
}
@@ -150,6 +164,13 @@ static int platform_msi_alloc_descs(struct device *dev, int nvec,
return 0;
}
+static int platform_msi_alloc_descs(struct device *dev, int nvec,
+ struct platform_msi_priv_data *data)
+
+{
+ return platform_msi_alloc_descs_with_irq(dev, 0, nvec, data);
+}
+
/**
* platform_msi_create_irq_domain - Create a platform MSI interrupt domain
* @fwnode: Optional fwnode of the interrupt controller
@@ -180,56 +201,75 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
return domain;
}
-/**
- * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
- * @dev: The device for which to allocate interrupts
- * @nvec: The number of interrupts to allocate
- * @write_msi_msg: Callback to write an interrupt message for @dev
- *
- * Returns:
- * Zero for success, or an error code in case of failure
- */
-int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
- irq_write_msi_msg_t write_msi_msg)
+static struct platform_msi_priv_data *
+platform_msi_alloc_priv_data(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
{
- struct platform_msi_priv_data *priv_data;
- int err;
-
+ struct platform_msi_priv_data *datap;
/*
* Limit the number of interrupts to 256 per device. Should we
* need to bump this up, DEV_ID_SHIFT should be adjusted
* accordingly (which would impact the max number of MSI
* capable devices).
*/
- if (!dev->msi_domain || !write_msi_msg || !nvec ||
- nvec > (1 << (32 - DEV_ID_SHIFT)))
- return -EINVAL;
+ if (!dev->msi_domain || !write_msi_msg || !nvec || nvec > MAX_DEV_MSIS)
+ return ERR_PTR(-EINVAL);
if (dev->msi_domain->bus_token != DOMAIN_BUS_PLATFORM_MSI) {
dev_err(dev, "Incompatible msi_domain, giving up\n");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
/* Already had a helping of MSI? Greed... */
if (!list_empty(dev_to_msi_list(dev)))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+
+ datap = kzalloc(sizeof(*datap), GFP_KERNEL);
+ if (!datap)
+ return ERR_PTR(-ENOMEM);
+
+ datap->devid = ida_simple_get(&platform_msi_devid_ida,
+ 0, 1 << DEV_ID_SHIFT, GFP_KERNEL);
+ if (datap->devid < 0) {
+ int err = datap->devid;
+ kfree(datap);
+ return ERR_PTR(err);
+ }
- priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
- if (!priv_data)
- return -ENOMEM;
+ datap->write_msg = write_msi_msg;
+ datap->dev = dev;
- priv_data->devid = ida_simple_get(&platform_msi_devid_ida,
- 0, 1 << DEV_ID_SHIFT, GFP_KERNEL);
- if (priv_data->devid < 0) {
- err = priv_data->devid;
- goto out_free_data;
- }
+ return datap;
+}
+
+static void platform_msi_free_priv_data(struct platform_msi_priv_data *data)
+{
+ ida_simple_remove(&platform_msi_devid_ida, data->devid);
+ kfree(data);
+}
+
+/**
+ * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
+ * @dev: The device for which to allocate interrupts
+ * @nvec: The number of interrupts to allocate
+ * @write_msi_msg: Callback to write an interrupt message for @dev
+ *
+ * Returns:
+ * Zero for success, or an error code in case of failure
+ */
+int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg)
+{
+ struct platform_msi_priv_data *priv_data;
+ int err;
- priv_data->write_msg = write_msi_msg;
+ priv_data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
+ if (IS_ERR(priv_data))
+ return PTR_ERR(priv_data);
err = platform_msi_alloc_descs(dev, nvec, priv_data);
if (err)
- goto out_free_id;
+ goto out_free_priv_data;
err = msi_domain_alloc_irqs(dev->msi_domain, dev, nvec);
if (err)
@@ -238,11 +278,9 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
return 0;
out_free_desc:
- platform_msi_free_descs(dev);
-out_free_id:
- ida_simple_remove(&platform_msi_devid_ida, priv_data->devid);
-out_free_data:
- kfree(priv_data);
+ platform_msi_free_descs(dev, 0, nvec);
+out_free_priv_data:
+ platform_msi_free_priv_data(priv_data);
return err;
}
@@ -253,18 +291,126 @@ out_free_data:
*/
void platform_msi_domain_free_irqs(struct device *dev)
{
- struct msi_desc *desc;
+ if (!list_empty(dev_to_msi_list(dev))) {
+ struct msi_desc *desc;
+
+ desc = first_msi_entry(dev);
+ platform_msi_free_priv_data(desc->platform.msi_priv_data);
+ }
+
+ msi_domain_free_irqs(dev->msi_domain, dev);
+ platform_msi_free_descs(dev, 0, MAX_DEV_MSIS);
+}
+
+/**
+ * platform_msi_get_host_data - Query the private data associated with
+ * a platform-msi domain
+ * @domain: The platform-msi domain
+ *
+ * Returns the private data provided when calling
+ * platform_msi_create_device_domain.
+ */
+void *platform_msi_get_host_data(struct irq_domain *domain)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ return data->host_data;
+}
+
+/**
+ * platform_msi_create_device_domain - Create a platform-msi domain
+ *
+ * @dev: The device generating the MSIs
+ * @nvec: The number of MSIs that need to be allocated
+ * @write_msi_msg: Callback to write an interrupt message for @dev
+ * @ops: The hierarchy domain operations to use
+ * @host_data: Private data associated to this domain
+ *
+ * Returns an irqdomain for @nvec interrupts
+ */
+struct irq_domain *
+platform_msi_create_device_domain(struct device *dev,
+ unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct platform_msi_priv_data *data;
+ struct irq_domain *domain;
+ int err;
+
+ data = platform_msi_alloc_priv_data(dev, nvec, write_msi_msg);
+ if (IS_ERR(data))
+ return NULL;
+
+ data->host_data = host_data;
+ domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
+ of_node_to_fwnode(dev->of_node),
+ ops, data);
+ if (!domain)
+ goto free_priv;
- desc = first_msi_entry(dev);
- if (desc) {
- struct platform_msi_priv_data *data;
+ err = msi_domain_prepare_irqs(domain->parent, dev, nvec, &data->arg);
+ if (err)
+ goto free_domain;
+
+ return domain;
- data = desc->platform.msi_priv_data;
+free_domain:
+ irq_domain_remove(domain);
+free_priv:
+ platform_msi_free_priv_data(data);
+ return NULL;
+}
+
+/**
+ * platform_msi_domain_free - Free interrupts associated with a platform-msi
+ * domain
+ *
+ * @domain: The platform-msi domain
+ * @virq: The base irq from which to perform the free operation
+ * @nvec: How many interrupts to free from @virq
+ */
+void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nvec)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ struct msi_desc *desc;
+ for_each_msi_entry(desc, data->dev) {
+ if (WARN_ON(!desc->irq || desc->nvec_used != 1))
+ return;
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
- ida_simple_remove(&platform_msi_devid_ida, data->devid);
- kfree(data);
+ irq_domain_free_irqs_common(domain, desc->irq, 1);
}
+}
- msi_domain_free_irqs(dev->msi_domain, dev);
- platform_msi_free_descs(dev);
+/**
+ * platform_msi_domain_alloc - Allocate interrupts associated with
+ * a platform-msi domain
+ *
+ * @domain: The platform-msi domain
+ * @virq: The base irq from which to perform the allocate operation
+ * @nvec: How many interrupts to free from @virq
+ *
+ * Return 0 on success, or an error code on failure. Must be called
+ * with irq_domain_mutex held (which can only be done as part of a
+ * top-level interrupt allocation).
+ */
+int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct platform_msi_priv_data *data = domain->host_data;
+ int err;
+
+ err = platform_msi_alloc_descs_with_irq(data->dev, virq, nr_irqs, data);
+ if (err)
+ return err;
+
+ err = msi_domain_populate_irqs(domain->parent, data->dev,
+ virq, nr_irqs, &data->arg);
+ if (err)
+ platform_msi_domain_free(domain, virq, nr_irqs);
+
+ return err;
}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 1dd6d3bf1098..73e399466c6e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -117,6 +117,26 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
EXPORT_SYMBOL_GPL(platform_get_irq);
/**
+ * platform_irq_count - Count the number of IRQs a platform device uses
+ * @dev: platform device
+ *
+ * Return: Number of IRQs a platform device uses or EPROBE_DEFER
+ */
+int platform_irq_count(struct platform_device *dev)
+{
+ int ret, nr = 0;
+
+ while ((ret = platform_get_irq(dev, nr)) >= 0)
+ nr++;
+
+ if (ret == -EPROBE_DEFER)
+ return ret;
+
+ return nr;
+}
+EXPORT_SYMBOL_GPL(platform_irq_count);
+
+/**
* platform_get_resource_byname - get a resource for a device by name
* @dev: platform device
* @type: resource type
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 0c98a9d51a24..44ce80606944 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -140,7 +140,7 @@ static int via_rng_init(struct hwrng *rng)
* RNG configuration like it used to be the case in this
* register */
if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
- if (!cpu_has_xstore_enabled) {
+ if (!boot_cpu_has(X86_FEATURE_XSTORE_EN)) {
pr_err(PFX "can't enable hardware RNG "
"if XSTORE is not enabled\n");
return -ENODEV;
@@ -200,8 +200,9 @@ static int __init mod_init(void)
{
int err;
- if (!cpu_has_xstore)
+ if (!boot_cpu_has(X86_FEATURE_XSTORE))
return -ENODEV;
+
pr_info("VIA RNG detected\n");
err = hwrng_register(&via_rng);
if (err) {
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 2eb5f0efae90..b251013eef0a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -28,10 +28,16 @@ config CLKSRC_MMIO
bool
config DIGICOLOR_TIMER
- bool
+ bool "Digicolor timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables the support for the digicolor timer driver.
config DW_APB_TIMER
- bool
+ bool "DW APB timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables the support for the dw_apb timer.
config DW_APB_TIMER_OF
bool
@@ -39,47 +45,77 @@ config DW_APB_TIMER_OF
select CLKSRC_OF
config ROCKCHIP_TIMER
- bool
+ bool "Rockchip timer driver" if COMPILE_TEST
+ depends on ARM || ARM64
select CLKSRC_OF
+ help
+ Enables the support for the rockchip timer driver.
config ARMADA_370_XP_TIMER
- bool
+ bool "Armada 370 and XP timer driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
+ help
+ Enables the support for the Armada 370 and XP timer driver.
config MESON6_TIMER
- bool
+ bool "Meson6 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
+ help
+ Enables the support for the Meson6 timer driver.
config ORION_TIMER
+ bool "Orion timer driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
select CLKSRC_MMIO
- bool
+ help
+ Enables the support for the Orion timer driver
config SUN4I_TIMER
+ bool "Sun4i timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
- bool
+ help
+ Enables support for the Sun4i timer.
config SUN5I_HSTIMER
+ bool "Sun5i timer driver" if COMPILE_TEST
select CLKSRC_MMIO
- bool
+ depends on COMMON_CLK
+ help
+ Enables support the Sun5i timer.
config TEGRA_TIMER
- bool
+ bool "Tegra timer driver" if COMPILE_TEST
+ depends on ARM
+ help
+ Enables support for the Tegra driver.
config VT8500_TIMER
- bool
+ bool "VT8500 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ Enables support for the VT8500 driver.
config CADENCE_TTC_TIMER
- bool
+ bool "Cadence TTC timer driver" if COMPILE_TEST
+ depends on COMMON_CLK
+ help
+ Enables support for the cadence ttc driver.
config ASM9260_TIMER
- bool
+ bool "ASM9260 timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
select CLKSRC_OF
+ help
+ Enables support for the ASM9260 timer.
config CLKSRC_NOMADIK_MTU
- bool
- depends on (ARCH_NOMADIK || ARCH_U8500)
+ bool "Nomakdik clocksource driver" if COMPILE_TEST
+ depends on ARM
select CLKSRC_MMIO
help
Support for Multi Timer Unit. MTU provides access
@@ -93,9 +129,8 @@ config CLKSRC_NOMADIK_MTU_SCHED_CLOCK
Use the Multi Timer Unit as the sched_clock.
config CLKSRC_DBX500_PRCMU
- bool "Clocksource PRCMU Timer"
- depends on UX500_SOC_DB8500
- default y
+ bool "Clocksource PRCMU Timer" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
Use the always on PRCMU Timer as clocksource
@@ -116,13 +151,18 @@ config CLKSRC_EFM32
event device.
config CLKSRC_LPC32XX
- bool
+ bool "Clocksource for LPC32XX" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_MMIO
select CLKSRC_OF
+ help
+ Support for the LPC32XX clocksource.
config CLKSRC_PISTACHIO
- bool
+ bool "Clocksource for Pistachio SoC" if COMPILE_TEST
select CLKSRC_OF
+ help
+ Enables the clocksource for the Pistachio SoC.
config CLKSRC_TI_32K
bool "Texas Instruments 32.768 Hz Clocksource" if COMPILE_TEST
@@ -199,13 +239,14 @@ config CLKSRC_METAG_GENERIC
This option enables support for the Meta per-thread timers.
config CLKSRC_EXYNOS_MCT
- def_bool y if ARCH_EXYNOS
- depends on !ARM64
+ bool "Exynos multi core timer driver" if COMPILE_TEST
+ depends on ARM
help
Support for Multi Core Timer controller on Exynos SoCs.
config CLKSRC_SAMSUNG_PWM
- bool
+ bool "PWM timer drvier for Samsung S3C, S5P" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
This is a new clocksource driver for the PWM timer found in
Samsung S3C, S5P and Exynos SoCs, replacing an earlier driver
@@ -213,7 +254,8 @@ config CLKSRC_SAMSUNG_PWM
needed only on systems that do not have the Exynos MCT available.
config FSL_FTM_TIMER
- bool
+ bool "Freescale FlexTimer Module driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
help
Support for Freescale FlexTimer Module (FTM) timer.
@@ -226,9 +268,12 @@ config SYS_SUPPORTS_SH_CMT
bool
config MTK_TIMER
+ bool "Mediatek timer driver" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
select CLKSRC_OF
select CLKSRC_MMIO
- bool
+ help
+ Support for Mediatek timer driver.
config SYS_SUPPORTS_SH_MTU2
bool
@@ -279,7 +324,12 @@ config EM_TIMER_STI
such as EMEV2 from former NEC Electronics.
config CLKSRC_QCOM
- bool
+ bool "Qualcomm MSM timer" if COMPILE_TEST
+ depends on ARM
+ select CLKSRC_OF
+ help
+ This enables the clocksource and the per CPU clockevent driver for the
+ Qualcomm SoCs.
config CLKSRC_VERSATILE
bool "ARM Versatile (Express) reference platforms clock source"
@@ -298,21 +348,40 @@ config CLKSRC_MIPS_GIC
select CLKSRC_OF
config CLKSRC_TANGO_XTAL
- bool
+ bool "Clocksource for Tango SoC" if COMPILE_TEST
+ depends on ARM
select CLKSRC_OF
+ select CLKSRC_MMIO
+ help
+ This enables the clocksource for Tango SoC
config CLKSRC_PXA
- def_bool y if ARCH_PXA || ARCH_SA1100
- select CLKSRC_OF if OF
+ bool "Clocksource for PXA or SA-11x0 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ select CLKSRC_MMIO
help
This enables OST0 support available on PXA and SA-11x0
platforms.
+config H8300_TMR8
+ bool "Clockevent timer for the H8300 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the 8 bits timer for the H8300 platform.
+
config H8300_TMR16
- bool
+ bool "Clockevent timer for the H83069 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the 16 bits timer for the H8300 platform with the
+ H83069 cpu.
config H8300_TPU
- bool
+ bool "Clocksource for the H8300 platform" if COMPILE_TEST
+ depends on GENERIC_CLOCKEVENTS
+ help
+ This enables the clocksource for the H8300 platform with the
+ H8S2678 cpu.
config CLKSRC_IMX_GPT
bool "Clocksource using i.MX GPT" if COMPILE_TEST
@@ -320,8 +389,7 @@ config CLKSRC_IMX_GPT
select CLKSRC_MMIO
config CLKSRC_ST_LPC
- bool
- depends on ARCH_STI
+ bool "Low power clocksource found in the LPC" if COMPILE_TEST
select CLKSRC_OF if OF
help
Enable this option to use the Low Power controller timer
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 56bd16e77ae3..dc2b8997f6e6 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_CLKSRC_MIPS_GIC) += mips-gic-timer.o
obj-$(CONFIG_CLKSRC_TANGO_XTAL) += tango_xtal.o
obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o
obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o
-obj-$(CONFIG_H8300) += h8300_timer8.o
+obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o
obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o
obj-$(CONFIG_H8300_TPU) += h8300_tpu.o
obj-$(CONFIG_CLKSRC_ST_LPC) += clksrc_st_lpc.o
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 6eab88985670..28037d0b8dcd 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -109,10 +109,8 @@ static void acpi_pm_check_blacklist(struct pci_dev *dev)
/* the bug has been fixed in PIIX4M */
if (dev->revision < 3) {
- printk(KERN_WARNING "* Found PM-Timer Bug on the chipset."
- " Due to workarounds for a bug,\n"
- "* this clock source is slow. Consider trying"
- " other clock sources\n");
+ pr_warn("* Found PM-Timer Bug on the chipset. Due to workarounds for a bug,\n"
+ "* this clock source is slow. Consider trying other clock sources\n");
acpi_pm_need_workaround();
}
@@ -125,12 +123,9 @@ static void acpi_pm_check_graylist(struct pci_dev *dev)
if (acpi_pm_good)
return;
- printk(KERN_WARNING "* The chipset may have PM-Timer Bug. Due to"
- " workarounds for a bug,\n"
- "* this clock source is slow. If you are sure your timer"
- " does not have\n"
- "* this bug, please use \"acpi_pm_good\" to disable the"
- " workaround\n");
+ pr_warn("* The chipset may have PM-Timer Bug. Due to workarounds for a bug,\n"
+ "* this clock source is slow. If you are sure your timer does not have\n"
+ "* this bug, please use \"acpi_pm_good\" to disable the workaround\n");
acpi_pm_need_workaround();
}
@@ -162,8 +157,7 @@ static int verify_pmtmr_rate(void)
/* Check that the PMTMR delta is within 5% of what we expect */
if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% "
- "of normal - aborting.\n",
+ pr_info("PM-Timer running at invalid rate: %lu%% of normal - aborting.\n",
100UL * delta / PMTMR_EXPECTED_RATE);
return -1;
}
@@ -199,15 +193,14 @@ static int __init init_acpi_pm_clocksource(void)
break;
if ((value2 < value1) && ((value2) < 0xFFF))
break;
- printk(KERN_INFO "PM-Timer had inconsistent results:"
- " %#llx, %#llx - aborting.\n",
- value1, value2);
+ pr_info("PM-Timer had inconsistent results: %#llx, %#llx - aborting.\n",
+ value1, value2);
pmtmr_ioport = 0;
return -EINVAL;
}
if (i == ACPI_PM_READ_CHECKS) {
- printk(KERN_INFO "PM-Timer failed consistency check "
- " (%#llx) - aborting.\n", value1);
+ pr_info("PM-Timer failed consistency check (%#llx) - aborting.\n",
+ value1);
pmtmr_ioport = 0;
return -ENODEV;
}
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index a2cb6fae9295..d189d8cb69f7 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -99,17 +99,17 @@ static void gt_compare_set(unsigned long delta, int periodic)
counter += delta;
ctrl = GT_CONTROL_TIMER_ENABLE;
- writel(ctrl, gt_base + GT_CONTROL);
- writel(lower_32_bits(counter), gt_base + GT_COMP0);
- writel(upper_32_bits(counter), gt_base + GT_COMP1);
+ writel_relaxed(ctrl, gt_base + GT_CONTROL);
+ writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);
+ writel_relaxed(upper_32_bits(counter), gt_base + GT_COMP1);
if (periodic) {
- writel(delta, gt_base + GT_AUTO_INC);
+ writel_relaxed(delta, gt_base + GT_AUTO_INC);
ctrl |= GT_CONTROL_AUTO_INC;
}
ctrl |= GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE;
- writel(ctrl, gt_base + GT_CONTROL);
+ writel_relaxed(ctrl, gt_base + GT_CONTROL);
}
static int gt_clockevent_shutdown(struct clock_event_device *evt)
@@ -195,12 +195,23 @@ static cycle_t gt_clocksource_read(struct clocksource *cs)
return gt_counter_read();
}
+static void gt_resume(struct clocksource *cs)
+{
+ unsigned long ctrl;
+
+ ctrl = readl(gt_base + GT_CONTROL);
+ if (!(ctrl & GT_CONTROL_TIMER_ENABLE))
+ /* re-enable timer on resume */
+ writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
+}
+
static struct clocksource gt_clocksource = {
.name = "arm_global_timer",
.rating = 300,
.read = gt_clocksource_read,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .resume = gt_resume,
};
#ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c
index c76c75006ea6..63345260244d 100644
--- a/drivers/clocksource/dw_apb_timer.c
+++ b/drivers/clocksource/dw_apb_timer.c
@@ -49,20 +49,31 @@ clocksource_to_dw_apb_clocksource(struct clocksource *cs)
return container_of(cs, struct dw_apb_clocksource, cs);
}
-static unsigned long apbt_readl(struct dw_apb_timer *timer, unsigned long offs)
+static inline u32 apbt_readl(struct dw_apb_timer *timer, unsigned long offs)
{
return readl(timer->base + offs);
}
-static void apbt_writel(struct dw_apb_timer *timer, unsigned long val,
- unsigned long offs)
+static inline void apbt_writel(struct dw_apb_timer *timer, u32 val,
+ unsigned long offs)
{
writel(val, timer->base + offs);
}
+static inline u32 apbt_readl_relaxed(struct dw_apb_timer *timer, unsigned long offs)
+{
+ return readl_relaxed(timer->base + offs);
+}
+
+static inline void apbt_writel_relaxed(struct dw_apb_timer *timer, u32 val,
+ unsigned long offs)
+{
+ writel_relaxed(val, timer->base + offs);
+}
+
static void apbt_disable_int(struct dw_apb_timer *timer)
{
- unsigned long ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
ctrl |= APBTMR_CONTROL_INT;
apbt_writel(timer, ctrl, APBTMR_N_CONTROL);
@@ -81,7 +92,7 @@ void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced)
static void apbt_eoi(struct dw_apb_timer *timer)
{
- apbt_readl(timer, APBTMR_N_EOI);
+ apbt_readl_relaxed(timer, APBTMR_N_EOI);
}
static irqreturn_t dw_apb_clockevent_irq(int irq, void *data)
@@ -103,7 +114,7 @@ static irqreturn_t dw_apb_clockevent_irq(int irq, void *data)
static void apbt_enable_int(struct dw_apb_timer *timer)
{
- unsigned long ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(timer, APBTMR_N_CONTROL);
/* clear pending intr */
apbt_readl(timer, APBTMR_N_EOI);
ctrl &= ~APBTMR_CONTROL_INT;
@@ -113,7 +124,7 @@ static void apbt_enable_int(struct dw_apb_timer *timer)
static int apbt_shutdown(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=shutdown\n", __func__,
cpumask_first(evt->cpumask));
@@ -127,7 +138,7 @@ static int apbt_shutdown(struct clock_event_device *evt)
static int apbt_set_oneshot(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=oneshot\n", __func__,
cpumask_first(evt->cpumask));
@@ -160,7 +171,7 @@ static int apbt_set_periodic(struct clock_event_device *evt)
{
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
unsigned long period = DIV_ROUND_UP(dw_ced->timer.freq, HZ);
- unsigned long ctrl;
+ u32 ctrl;
pr_debug("%s CPU %d state=periodic\n", __func__,
cpumask_first(evt->cpumask));
@@ -196,17 +207,17 @@ static int apbt_resume(struct clock_event_device *evt)
static int apbt_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- unsigned long ctrl;
+ u32 ctrl;
struct dw_apb_clock_event_device *dw_ced = ced_to_dw_apb_ced(evt);
/* Disable timer */
- ctrl = apbt_readl(&dw_ced->timer, APBTMR_N_CONTROL);
+ ctrl = apbt_readl_relaxed(&dw_ced->timer, APBTMR_N_CONTROL);
ctrl &= ~APBTMR_CONTROL_ENABLE;
- apbt_writel(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
+ apbt_writel_relaxed(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
/* write new count */
- apbt_writel(&dw_ced->timer, delta, APBTMR_N_LOAD_COUNT);
+ apbt_writel_relaxed(&dw_ced->timer, delta, APBTMR_N_LOAD_COUNT);
ctrl |= APBTMR_CONTROL_ENABLE;
- apbt_writel(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
+ apbt_writel_relaxed(&dw_ced->timer, ctrl, APBTMR_N_CONTROL);
return 0;
}
@@ -323,7 +334,7 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs)
* start count down from 0xffff_ffff. this is done by toggling the
* enable bit then load initial load count to ~0.
*/
- unsigned long ctrl = apbt_readl(&dw_cs->timer, APBTMR_N_CONTROL);
+ u32 ctrl = apbt_readl(&dw_cs->timer, APBTMR_N_CONTROL);
ctrl &= ~APBTMR_CONTROL_ENABLE;
apbt_writel(&dw_cs->timer, ctrl, APBTMR_N_CONTROL);
@@ -338,11 +349,12 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs)
static cycle_t __apbt_read_clocksource(struct clocksource *cs)
{
- unsigned long current_count;
+ u32 current_count;
struct dw_apb_clocksource *dw_cs =
clocksource_to_dw_apb_clocksource(cs);
- current_count = apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE);
+ current_count = apbt_readl_relaxed(&dw_cs->timer,
+ APBTMR_N_CURRENT_VALUE);
return (cycle_t)~current_count;
}
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index a19a3f619cc7..860843cef572 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -16,6 +16,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/delay.h>
#include <linux/dw_apb_timer.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -130,6 +131,17 @@ static void __init init_sched_clock(void)
sched_clock_register(read_sched_clock, 32, sched_rate);
}
+#ifdef CONFIG_ARM
+static unsigned long dw_apb_delay_timer_read(void)
+{
+ return ~readl_relaxed(sched_io_base);
+}
+
+static struct delay_timer dw_apb_delay_timer = {
+ .read_current_timer = dw_apb_delay_timer_read,
+};
+#endif
+
static int num_called;
static void __init dw_apb_timer_init(struct device_node *timer)
{
@@ -142,6 +154,10 @@ static void __init dw_apb_timer_init(struct device_node *timer)
pr_debug("%s: found clocksource timer\n", __func__);
add_clocksource(timer);
init_sched_clock();
+#ifdef CONFIG_ARM
+ dw_apb_delay_timer.freq = sched_rate;
+ register_current_timer_delay(&dw_apb_delay_timer);
+#endif
break;
default:
break;
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 0e076c6fc006..75c44079b345 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -4,85 +4,56 @@
* Copyright 2015 Yoshinori Sato <ysato@users.sourcefoge.jp>
*/
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
#include <linux/clocksource.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
-
-#include <asm/segment.h>
-#include <asm/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#define TSTR 0
-#define TSNC 1
-#define TMDR 2
-#define TOLR 3
-#define TISRA 4
-#define TISRB 5
#define TISRC 6
#define TCR 0
-#define TIOR 1
#define TCNT 2
-#define GRA 4
-#define GRB 6
-
-#define FLAG_REPROGRAM (1 << 0)
-#define FLAG_SKIPEVENT (1 << 1)
-#define FLAG_IRQCONTEXT (1 << 2)
-#define FLAG_STARTED (1 << 3)
-#define ONESHOT 0
-#define PERIODIC 1
-
-#define RELATIVE 0
-#define ABSOLUTE 1
+#define bset(b, a) iowrite8(ioread8(a) | (1 << (b)), (a))
+#define bclr(b, a) iowrite8(ioread8(a) & ~(1 << (b)), (a))
struct timer16_priv {
- struct platform_device *pdev;
struct clocksource cs;
- struct irqaction irqaction;
unsigned long total_cycles;
- unsigned long mapbase;
- unsigned long mapcommon;
- unsigned long flags;
- unsigned short gra;
+ void __iomem *mapbase;
+ void __iomem *mapcommon;
unsigned short cs_enabled;
unsigned char enb;
- unsigned char imfa;
- unsigned char imiea;
unsigned char ovf;
- raw_spinlock_t lock;
- struct clk *clk;
+ unsigned char ovie;
};
static unsigned long timer16_get_counter(struct timer16_priv *p)
{
- unsigned long v1, v2, v3;
- int o1, o2;
+ unsigned short v1, v2, v3;
+ unsigned char o1, o2;
- o1 = ctrl_inb(p->mapcommon + TISRC) & p->ovf;
+ o1 = ioread8(p->mapcommon + TISRC) & p->ovf;
/* Make sure the timer value is stable. Stolen from acpi_pm.c */
do {
o2 = o1;
- v1 = ctrl_inw(p->mapbase + TCNT);
- v2 = ctrl_inw(p->mapbase + TCNT);
- v3 = ctrl_inw(p->mapbase + TCNT);
- o1 = ctrl_inb(p->mapcommon + TISRC) & p->ovf;
+ v1 = ioread16be(p->mapbase + TCNT);
+ v2 = ioread16be(p->mapbase + TCNT);
+ v3 = ioread16be(p->mapbase + TCNT);
+ o1 = ioread8(p->mapcommon + TISRC) & p->ovf;
} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
|| (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
- v2 |= 0x10000;
- return v2;
+ if (likely(!o1))
+ return v2;
+ else
+ return v2 + 0x10000;
}
@@ -90,8 +61,7 @@ static irqreturn_t timer16_interrupt(int irq, void *dev_id)
{
struct timer16_priv *p = (struct timer16_priv *)dev_id;
- ctrl_outb(ctrl_inb(p->mapcommon + TISRA) & ~p->imfa,
- p->mapcommon + TISRA);
+ bclr(p->ovf, p->mapcommon + TISRC);
p->total_cycles += 0x10000;
return IRQ_HANDLED;
@@ -105,13 +75,10 @@ static inline struct timer16_priv *cs_to_priv(struct clocksource *cs)
static cycle_t timer16_clocksource_read(struct clocksource *cs)
{
struct timer16_priv *p = cs_to_priv(cs);
- unsigned long flags, raw;
- unsigned long value;
+ unsigned long raw, value;
- raw_spin_lock_irqsave(&p->lock, flags);
value = p->total_cycles;
raw = timer16_get_counter(p);
- raw_spin_unlock_irqrestore(&p->lock, flags);
return value + raw;
}
@@ -123,10 +90,10 @@ static int timer16_enable(struct clocksource *cs)
WARN_ON(p->cs_enabled);
p->total_cycles = 0;
- ctrl_outw(0x0000, p->mapbase + TCNT);
- ctrl_outb(0x83, p->mapbase + TCR);
- ctrl_outb(ctrl_inb(p->mapcommon + TSTR) | p->enb,
- p->mapcommon + TSTR);
+ iowrite16be(0x0000, p->mapbase + TCNT);
+ iowrite8(0x83, p->mapbase + TCR);
+ bset(p->ovie, p->mapcommon + TISRC);
+ bset(p->enb, p->mapcommon + TSTR);
p->cs_enabled = true;
return 0;
@@ -138,116 +105,83 @@ static void timer16_disable(struct clocksource *cs)
WARN_ON(!p->cs_enabled);
- ctrl_outb(ctrl_inb(p->mapcommon + TSTR) & ~p->enb,
- p->mapcommon + TSTR);
+ bclr(p->ovie, p->mapcommon + TISRC);
+ bclr(p->enb, p->mapcommon + TSTR);
p->cs_enabled = false;
}
+static struct timer16_priv timer16_priv = {
+ .cs = {
+ .name = "h8300_16timer",
+ .rating = 200,
+ .read = timer16_clocksource_read,
+ .enable = timer16_enable,
+ .disable = timer16_disable,
+ .mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+};
+
#define REG_CH 0
#define REG_COMM 1
-static int timer16_setup(struct timer16_priv *p, struct platform_device *pdev)
+static void __init h8300_16timer_init(struct device_node *node)
{
- struct resource *res[2];
+ void __iomem *base[2];
int ret, irq;
unsigned int ch;
+ struct clk *clk;
- p->pdev = pdev;
-
- res[REG_CH] = platform_get_resource(p->pdev,
- IORESOURCE_MEM, REG_CH);
- res[REG_COMM] = platform_get_resource(p->pdev,
- IORESOURCE_MEM, REG_COMM);
- if (!res[REG_CH] || !res[REG_COMM]) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
- }
- irq = platform_get_irq(p->pdev, 0);
- if (irq < 0) {
- dev_err(&p->pdev->dev, "failed to get irq\n");
- return irq;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clocksource\n");
+ return;
}
- p->clk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->clk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->clk);
+ base[REG_CH] = of_iomap(node, 0);
+ if (!base[REG_CH]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto free_clk;
}
- of_property_read_u32(p->pdev->dev.of_node, "renesas,channel", &ch);
-
- p->pdev = pdev;
- p->mapbase = res[REG_CH]->start;
- p->mapcommon = res[REG_COMM]->start;
- p->enb = 1 << ch;
- p->imfa = 1 << ch;
- p->imiea = 1 << (4 + ch);
- p->cs.name = pdev->name;
- p->cs.rating = 200;
- p->cs.read = timer16_clocksource_read;
- p->cs.enable = timer16_enable;
- p->cs.disable = timer16_disable;
- p->cs.mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
- p->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
- ret = request_irq(irq, timer16_interrupt,
- IRQF_TIMER, pdev->name, p);
- if (ret < 0) {
- dev_err(&p->pdev->dev, "failed to request irq %d\n", irq);
- return ret;
+ base[REG_COMM] = of_iomap(node, 1);
+ if (!base[REG_COMM]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto unmap_ch;
}
- clocksource_register_hz(&p->cs, clk_get_rate(p->clk) / 8);
-
- return 0;
-}
-
-static int timer16_probe(struct platform_device *pdev)
-{
- struct timer16_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ pr_err("failed to get irq for clockevent\n");
+ goto unmap_comm;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ of_property_read_u32(node, "renesas,channel", &ch);
- return timer16_setup(p, pdev);
-}
-
-static int timer16_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
+ timer16_priv.mapbase = base[REG_CH];
+ timer16_priv.mapcommon = base[REG_COMM];
+ timer16_priv.enb = ch;
+ timer16_priv.ovf = ch;
+ timer16_priv.ovie = 4 + ch;
-static const struct of_device_id timer16_of_table[] = {
- { .compatible = "renesas,16bit-timer" },
- { }
-};
-static struct platform_driver timer16_driver = {
- .probe = timer16_probe,
- .remove = timer16_remove,
- .driver = {
- .name = "h8300h-16timer",
- .of_match_table = of_match_ptr(timer16_of_table),
+ ret = request_irq(irq, timer16_interrupt,
+ IRQF_TIMER, timer16_priv.cs.name, &timer16_priv);
+ if (ret < 0) {
+ pr_err("failed to request irq %d of clocksource\n", irq);
+ goto unmap_comm;
}
-};
-static int __init timer16_init(void)
-{
- return platform_driver_register(&timer16_driver);
-}
+ clocksource_register_hz(&timer16_priv.cs,
+ clk_get_rate(clk) / 8);
+ return;
-static void __exit timer16_exit(void)
-{
- platform_driver_unregister(&timer16_driver);
+unmap_comm:
+ iounmap(base[REG_COMM]);
+unmap_ch:
+ iounmap(base[REG_CH]);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(timer16_init);
-module_exit(timer16_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8/300H 16bit Timer Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer", h8300_16timer_init);
diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index 44375d8b9bc4..c151941e1956 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -8,19 +8,15 @@
*/
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/clockchips.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
-
-#include <asm/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#define _8TCR 0
#define _8TCSR 2
@@ -28,126 +24,74 @@
#define TCORB 6
#define _8TCNT 8
-#define FLAG_REPROGRAM (1 << 0)
-#define FLAG_SKIPEVENT (1 << 1)
-#define FLAG_IRQCONTEXT (1 << 2)
+#define CMIEA 6
+#define CMFA 6
+
#define FLAG_STARTED (1 << 3)
-#define ONESHOT 0
-#define PERIODIC 1
+#define SCALE 64
-#define RELATIVE 0
-#define ABSOLUTE 1
+#define bset(b, a) iowrite8(ioread8(a) | (1 << (b)), (a))
+#define bclr(b, a) iowrite8(ioread8(a) & ~(1 << (b)), (a))
struct timer8_priv {
- struct platform_device *pdev;
struct clock_event_device ced;
- struct irqaction irqaction;
- unsigned long mapbase;
- raw_spinlock_t lock;
+ void __iomem *mapbase;
unsigned long flags;
unsigned int rate;
- unsigned int tcora;
- struct clk *pclk;
};
-static unsigned long timer8_get_counter(struct timer8_priv *p)
-{
- unsigned long v1, v2, v3;
- int o1, o2;
-
- o1 = ctrl_inb(p->mapbase + _8TCSR) & 0x20;
-
- /* Make sure the timer value is stable. Stolen from acpi_pm.c */
- do {
- o2 = o1;
- v1 = ctrl_inw(p->mapbase + _8TCNT);
- v2 = ctrl_inw(p->mapbase + _8TCNT);
- v3 = ctrl_inw(p->mapbase + _8TCNT);
- o1 = ctrl_inb(p->mapbase + _8TCSR) & 0x20;
- } while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
- || (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
-
- v2 |= o1 << 10;
- return v2;
-}
-
static irqreturn_t timer8_interrupt(int irq, void *dev_id)
{
struct timer8_priv *p = dev_id;
- ctrl_outb(ctrl_inb(p->mapbase + _8TCSR) & ~0x40,
- p->mapbase + _8TCSR);
- p->flags |= FLAG_IRQCONTEXT;
- ctrl_outw(p->tcora, p->mapbase + TCORA);
- if (!(p->flags & FLAG_SKIPEVENT)) {
- if (clockevent_state_oneshot(&p->ced))
- ctrl_outw(0x0000, p->mapbase + _8TCR);
- p->ced.event_handler(&p->ced);
- }
- p->flags &= ~(FLAG_SKIPEVENT | FLAG_IRQCONTEXT);
+ if (clockevent_state_oneshot(&p->ced))
+ iowrite16be(0x0000, p->mapbase + _8TCR);
+
+ p->ced.event_handler(&p->ced);
+
+ bclr(CMFA, p->mapbase + _8TCSR);
return IRQ_HANDLED;
}
static void timer8_set_next(struct timer8_priv *p, unsigned long delta)
{
- unsigned long flags;
- unsigned long now;
-
- raw_spin_lock_irqsave(&p->lock, flags);
if (delta >= 0x10000)
- dev_warn(&p->pdev->dev, "delta out of range\n");
- now = timer8_get_counter(p);
- p->tcora = delta;
- ctrl_outb(ctrl_inb(p->mapbase + _8TCR) | 0x40, p->mapbase + _8TCR);
- if (delta > now)
- ctrl_outw(delta, p->mapbase + TCORA);
- else
- ctrl_outw(now + 1, p->mapbase + TCORA);
-
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ pr_warn("delta out of range\n");
+ bclr(CMIEA, p->mapbase + _8TCR);
+ iowrite16be(delta, p->mapbase + TCORA);
+ iowrite16be(0x0000, p->mapbase + _8TCNT);
+ bclr(CMFA, p->mapbase + _8TCSR);
+ bset(CMIEA, p->mapbase + _8TCR);
}
static int timer8_enable(struct timer8_priv *p)
{
- p->rate = clk_get_rate(p->pclk) / 64;
- ctrl_outw(0xffff, p->mapbase + TCORA);
- ctrl_outw(0x0000, p->mapbase + _8TCNT);
- ctrl_outw(0x0c02, p->mapbase + _8TCR);
+ iowrite16be(0xffff, p->mapbase + TCORA);
+ iowrite16be(0x0000, p->mapbase + _8TCNT);
+ iowrite16be(0x0c02, p->mapbase + _8TCR);
return 0;
}
static int timer8_start(struct timer8_priv *p)
{
- int ret = 0;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&p->lock, flags);
-
- if (!(p->flags & FLAG_STARTED))
- ret = timer8_enable(p);
+ int ret;
- if (ret)
- goto out;
- p->flags |= FLAG_STARTED;
+ if ((p->flags & FLAG_STARTED))
+ return 0;
- out:
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ ret = timer8_enable(p);
+ if (!ret)
+ p->flags |= FLAG_STARTED;
return ret;
}
static void timer8_stop(struct timer8_priv *p)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&p->lock, flags);
-
- ctrl_outw(0x0000, p->mapbase + _8TCR);
-
- raw_spin_unlock_irqrestore(&p->lock, flags);
+ iowrite16be(0x0000, p->mapbase + _8TCR);
}
static inline struct timer8_priv *ced_to_priv(struct clock_event_device *ced)
@@ -155,7 +99,7 @@ static inline struct timer8_priv *ced_to_priv(struct clock_event_device *ced)
return container_of(ced, struct timer8_priv, ced);
}
-static void timer8_clock_event_start(struct timer8_priv *p, int periodic)
+static void timer8_clock_event_start(struct timer8_priv *p, unsigned long delta)
{
struct clock_event_device *ced = &p->ced;
@@ -166,7 +110,7 @@ static void timer8_clock_event_start(struct timer8_priv *p, int periodic)
ced->max_delta_ns = clockevent_delta2ns(0xffff, ced);
ced->min_delta_ns = clockevent_delta2ns(0x0001, ced);
- timer8_set_next(p, periodic?(p->rate + HZ/2) / HZ:0x10000);
+ timer8_set_next(p, delta);
}
static int timer8_clock_event_shutdown(struct clock_event_device *ced)
@@ -179,9 +123,9 @@ static int timer8_clock_event_periodic(struct clock_event_device *ced)
{
struct timer8_priv *p = ced_to_priv(ced);
- dev_info(&p->pdev->dev, "used for periodic clock events\n");
+ pr_info("%s: used for periodic clock events\n", ced->name);
timer8_stop(p);
- timer8_clock_event_start(p, PERIODIC);
+ timer8_clock_event_start(p, (p->rate + HZ/2) / HZ);
return 0;
}
@@ -190,9 +134,9 @@ static int timer8_clock_event_oneshot(struct clock_event_device *ced)
{
struct timer8_priv *p = ced_to_priv(ced);
- dev_info(&p->pdev->dev, "used for oneshot clock events\n");
+ pr_info("%s: used for oneshot clock events\n", ced->name);
timer8_stop(p);
- timer8_clock_event_start(p, ONESHOT);
+ timer8_clock_event_start(p, 0x10000);
return 0;
}
@@ -208,110 +152,64 @@ static int timer8_clock_event_next(unsigned long delta,
return 0;
}
-static int timer8_setup(struct timer8_priv *p,
- struct platform_device *pdev)
+static struct timer8_priv timer8_priv = {
+ .ced = {
+ .name = "h8300_8timer",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ .rating = 200,
+ .set_next_event = timer8_clock_event_next,
+ .set_state_shutdown = timer8_clock_event_shutdown,
+ .set_state_periodic = timer8_clock_event_periodic,
+ .set_state_oneshot = timer8_clock_event_oneshot,
+ },
+};
+
+static void __init h8300_8timer_init(struct device_node *node)
{
- struct resource *res;
+ void __iomem *base;
int irq;
- int ret;
+ struct clk *clk;
- p->pdev = pdev;
-
- res = platform_get_resource(p->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clockevent\n");
+ return;
}
- irq = platform_get_irq(p->pdev, 0);
- if (irq < 0) {
- dev_err(&p->pdev->dev, "failed to get irq\n");
- return -ENXIO;
+ base = of_iomap(node, 0);
+ if (!base) {
+ pr_err("failed to map registers for clockevent\n");
+ goto free_clk;
}
- p->mapbase = res->start;
-
- p->irqaction.name = dev_name(&p->pdev->dev);
- p->irqaction.handler = timer8_interrupt;
- p->irqaction.dev_id = p;
- p->irqaction.flags = IRQF_TIMER;
-
- p->pclk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->pclk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->pclk);
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ pr_err("failed to get irq for clockevent\n");
+ goto unmap_reg;
}
- p->ced.name = pdev->name;
- p->ced.features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT;
- p->ced.rating = 200;
- p->ced.cpumask = cpumask_of(0);
- p->ced.set_next_event = timer8_clock_event_next;
- p->ced.set_state_shutdown = timer8_clock_event_shutdown;
- p->ced.set_state_periodic = timer8_clock_event_periodic;
- p->ced.set_state_oneshot = timer8_clock_event_oneshot;
-
- ret = setup_irq(irq, &p->irqaction);
- if (ret < 0) {
- dev_err(&p->pdev->dev,
- "failed to request irq %d\n", irq);
- return ret;
- }
- clockevents_register_device(&p->ced);
- platform_set_drvdata(pdev, p);
+ timer8_priv.mapbase = base;
- return 0;
-}
-
-static int timer8_probe(struct platform_device *pdev)
-{
- struct timer8_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ timer8_priv.rate = clk_get_rate(clk) / SCALE;
+ if (!timer8_priv.rate) {
+ pr_err("Failed to get rate for the clocksource\n");
+ goto unmap_reg;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- return timer8_setup(p, pdev);
-}
-
-static int timer8_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
-
-static const struct of_device_id timer8_of_table[] __maybe_unused = {
- { .compatible = "renesas,8bit-timer" },
- { }
-};
-
-MODULE_DEVICE_TABLE(of, timer8_of_table);
-static struct platform_driver timer8_driver = {
- .probe = timer8_probe,
- .remove = timer8_remove,
- .driver = {
- .name = "h8300-8timer",
- .of_match_table = of_match_ptr(timer8_of_table),
+ if (request_irq(irq, timer8_interrupt, IRQF_TIMER,
+ timer8_priv.ced.name, &timer8_priv) < 0) {
+ pr_err("failed to request irq %d for clockevent\n", irq);
+ goto unmap_reg;
}
-};
-static int __init timer8_init(void)
-{
- return platform_driver_register(&timer8_driver);
-}
+ clockevents_config_and_register(&timer8_priv.ced,
+ timer8_priv.rate, 1, 0x0000ffff);
-static void __exit timer8_exit(void)
-{
- platform_driver_unregister(&timer8_driver);
+ return;
+unmap_reg:
+ iounmap(base);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(timer8_init);
-module_exit(timer8_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8/300 8bit Timer Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index 5487410bfabb..d4c1a287c262 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -1,42 +1,30 @@
/*
- * H8/300 TPU Driver
+ * H8S TPU Driver
*
* Copyright 2015 Yoshinori Sato <ysato@users.sourcefoge.jp>
*
*/
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/kernel.h>
-#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/clocksource.h>
-#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
-#include <asm/irq.h>
+#define TCR 0x0
+#define TSR 0x5
+#define TCNT 0x6
-#define TCR 0
-#define TMDR 1
-#define TIOR 2
-#define TER 4
-#define TSR 5
-#define TCNT 6
-#define TGRA 8
-#define TGRB 10
-#define TGRC 12
-#define TGRD 14
+#define TCFV 0x10
struct tpu_priv {
- struct platform_device *pdev;
struct clocksource cs;
- struct clk *clk;
- unsigned long mapbase1;
- unsigned long mapbase2;
+ void __iomem *mapbase1;
+ void __iomem *mapbase2;
raw_spinlock_t lock;
unsigned int cs_enabled;
};
@@ -45,8 +33,8 @@ static inline unsigned long read_tcnt32(struct tpu_priv *p)
{
unsigned long tcnt;
- tcnt = ctrl_inw(p->mapbase1 + TCNT) << 16;
- tcnt |= ctrl_inw(p->mapbase2 + TCNT);
+ tcnt = ioread16be(p->mapbase1 + TCNT) << 16;
+ tcnt |= ioread16be(p->mapbase2 + TCNT);
return tcnt;
}
@@ -55,7 +43,7 @@ static int tpu_get_counter(struct tpu_priv *p, unsigned long long *val)
unsigned long v1, v2, v3;
int o1, o2;
- o1 = ctrl_inb(p->mapbase1 + TSR) & 0x10;
+ o1 = ioread8(p->mapbase1 + TSR) & TCFV;
/* Make sure the timer value is stable. Stolen from acpi_pm.c */
do {
@@ -63,7 +51,7 @@ static int tpu_get_counter(struct tpu_priv *p, unsigned long long *val)
v1 = read_tcnt32(p);
v2 = read_tcnt32(p);
v3 = read_tcnt32(p);
- o1 = ctrl_inb(p->mapbase1 + TSR) & 0x10;
+ o1 = ioread8(p->mapbase1 + TSR) & TCFV;
} while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3)
|| (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2)));
@@ -96,10 +84,10 @@ static int tpu_clocksource_enable(struct clocksource *cs)
WARN_ON(p->cs_enabled);
- ctrl_outw(0, p->mapbase1 + TCNT);
- ctrl_outw(0, p->mapbase2 + TCNT);
- ctrl_outb(0x0f, p->mapbase1 + TCR);
- ctrl_outb(0x03, p->mapbase2 + TCR);
+ iowrite16be(0, p->mapbase1 + TCNT);
+ iowrite16be(0, p->mapbase2 + TCNT);
+ iowrite8(0x0f, p->mapbase1 + TCR);
+ iowrite8(0x03, p->mapbase2 + TCR);
p->cs_enabled = true;
return 0;
@@ -111,96 +99,59 @@ static void tpu_clocksource_disable(struct clocksource *cs)
WARN_ON(!p->cs_enabled);
- ctrl_outb(0, p->mapbase1 + TCR);
- ctrl_outb(0, p->mapbase2 + TCR);
+ iowrite8(0, p->mapbase1 + TCR);
+ iowrite8(0, p->mapbase2 + TCR);
p->cs_enabled = false;
}
+static struct tpu_priv tpu_priv = {
+ .cs = {
+ .name = "H8S_TPU",
+ .rating = 200,
+ .read = tpu_clocksource_read,
+ .enable = tpu_clocksource_enable,
+ .disable = tpu_clocksource_disable,
+ .mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+};
+
#define CH_L 0
#define CH_H 1
-static int __init tpu_setup(struct tpu_priv *p, struct platform_device *pdev)
+static void __init h8300_tpu_init(struct device_node *node)
{
- struct resource *res[2];
-
- p->pdev = pdev;
+ void __iomem *base[2];
+ struct clk *clk;
- res[CH_L] = platform_get_resource(p->pdev, IORESOURCE_MEM, CH_L);
- res[CH_H] = platform_get_resource(p->pdev, IORESOURCE_MEM, CH_H);
- if (!res[CH_L] || !res[CH_H]) {
- dev_err(&p->pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ pr_err("failed to get clock for clocksource\n");
+ return;
}
- p->clk = clk_get(&p->pdev->dev, "fck");
- if (IS_ERR(p->clk)) {
- dev_err(&p->pdev->dev, "can't get clk\n");
- return PTR_ERR(p->clk);
+ base[CH_L] = of_iomap(node, CH_L);
+ if (!base[CH_L]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto free_clk;
}
-
- p->mapbase1 = res[CH_L]->start;
- p->mapbase2 = res[CH_H]->start;
-
- p->cs.name = pdev->name;
- p->cs.rating = 200;
- p->cs.read = tpu_clocksource_read;
- p->cs.enable = tpu_clocksource_enable;
- p->cs.disable = tpu_clocksource_disable;
- p->cs.mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8);
- p->cs.flags = CLOCK_SOURCE_IS_CONTINUOUS;
- clocksource_register_hz(&p->cs, clk_get_rate(p->clk) / 64);
- platform_set_drvdata(pdev, p);
-
- return 0;
-}
-
-static int tpu_probe(struct platform_device *pdev)
-{
- struct tpu_priv *p = platform_get_drvdata(pdev);
-
- if (p) {
- dev_info(&pdev->dev, "kept as earlytimer\n");
- return 0;
+ base[CH_H] = of_iomap(node, CH_H);
+ if (!base[CH_H]) {
+ pr_err("failed to map registers for clocksource\n");
+ goto unmap_L;
}
- p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ tpu_priv.mapbase1 = base[CH_L];
+ tpu_priv.mapbase2 = base[CH_H];
- return tpu_setup(p, pdev);
-}
-
-static int tpu_remove(struct platform_device *pdev)
-{
- return -EBUSY;
-}
-
-static const struct of_device_id tpu_of_table[] = {
- { .compatible = "renesas,tpu" },
- { }
-};
+ clocksource_register_hz(&tpu_priv.cs, clk_get_rate(clk) / 64);
-static struct platform_driver tpu_driver = {
- .probe = tpu_probe,
- .remove = tpu_remove,
- .driver = {
- .name = "h8s-tpu",
- .of_match_table = of_match_ptr(tpu_of_table),
- }
-};
-
-static int __init tpu_init(void)
-{
- return platform_driver_register(&tpu_driver);
-}
+ return;
-static void __exit tpu_exit(void)
-{
- platform_driver_unregister(&tpu_driver);
+unmap_L:
+ iounmap(base[CH_H]);
+free_clk:
+ clk_put(clk);
}
-subsys_initcall(tpu_init);
-module_exit(tpu_exit);
-MODULE_AUTHOR("Yoshinori Sato");
-MODULE_DESCRIPTION("H8S Timer Pulse Unit Driver");
-MODULE_LICENSE("GPL v2");
+CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
index fbfc74685e6a..d67bc356488f 100644
--- a/drivers/clocksource/mtk_timer.c
+++ b/drivers/clocksource/mtk_timer.c
@@ -16,6 +16,8 @@
* GNU General Public License for more details.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/clk.h>
#include <linux/clockchips.h>
#include <linux/interrupt.h>
@@ -187,10 +189,8 @@ static void __init mtk_timer_init(struct device_node *node)
struct clk *clk;
evt = kzalloc(sizeof(*evt), GFP_KERNEL);
- if (!evt) {
- pr_warn("Can't allocate mtk clock event driver struct");
+ if (!evt)
return;
- }
evt->dev.name = "mtk_tick";
evt->dev.rating = 300;
@@ -204,31 +204,31 @@ static void __init mtk_timer_init(struct device_node *node)
evt->gpt_base = of_io_request_and_map(node, 0, "mtk-timer");
if (IS_ERR(evt->gpt_base)) {
- pr_warn("Can't get resource\n");
- return;
+ pr_err("Can't get resource\n");
+ goto err_kzalloc;
}
evt->dev.irq = irq_of_parse_and_map(node, 0);
if (evt->dev.irq <= 0) {
- pr_warn("Can't parse IRQ");
+ pr_err("Can't parse IRQ\n");
goto err_mem;
}
clk = of_clk_get(node, 0);
if (IS_ERR(clk)) {
- pr_warn("Can't get timer clock");
+ pr_err("Can't get timer clock\n");
goto err_irq;
}
if (clk_prepare_enable(clk)) {
- pr_warn("Can't prepare clock");
+ pr_err("Can't prepare clock\n");
goto err_clk_put;
}
rate = clk_get_rate(clk);
if (request_irq(evt->dev.irq, mtk_timer_interrupt,
IRQF_TIMER | IRQF_IRQPOLL, "mtk_timer", evt)) {
- pr_warn("failed to setup irq %d\n", evt->dev.irq);
+ pr_err("failed to setup irq %d\n", evt->dev.irq);
goto err_clk_disable;
}
@@ -260,5 +260,7 @@ err_mem:
iounmap(evt->gpt_base);
of_address_to_resource(node, 0, &res);
release_mem_region(res.start, resource_size(&res));
+err_kzalloc:
+ kfree(evt);
}
CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index d3c1742ded1a..8c77a529d0d4 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c
@@ -17,16 +17,16 @@
#define TIMER_NAME "rk_timer"
-#define TIMER_LOAD_COUNT0 0x00
-#define TIMER_LOAD_COUNT1 0x04
-#define TIMER_CONTROL_REG 0x10
-#define TIMER_INT_STATUS 0x18
+#define TIMER_LOAD_COUNT0 0x00
+#define TIMER_LOAD_COUNT1 0x04
+#define TIMER_CONTROL_REG 0x10
+#define TIMER_INT_STATUS 0x18
-#define TIMER_DISABLE 0x0
-#define TIMER_ENABLE 0x1
-#define TIMER_MODE_FREE_RUNNING (0 << 1)
-#define TIMER_MODE_USER_DEFINED_COUNT (1 << 1)
-#define TIMER_INT_UNMASK (1 << 2)
+#define TIMER_DISABLE 0x0
+#define TIMER_ENABLE 0x1
+#define TIMER_MODE_FREE_RUNNING (0 << 1)
+#define TIMER_MODE_USER_DEFINED_COUNT (1 << 1)
+#define TIMER_INT_UNMASK (1 << 2)
struct bc_timer {
struct clock_event_device ce;
@@ -49,14 +49,12 @@ static inline void __iomem *rk_base(struct clock_event_device *ce)
static inline void rk_timer_disable(struct clock_event_device *ce)
{
writel_relaxed(TIMER_DISABLE, rk_base(ce) + TIMER_CONTROL_REG);
- dsb();
}
static inline void rk_timer_enable(struct clock_event_device *ce, u32 flags)
{
writel_relaxed(TIMER_ENABLE | TIMER_INT_UNMASK | flags,
rk_base(ce) + TIMER_CONTROL_REG);
- dsb();
}
static void rk_timer_update_counter(unsigned long cycles,
@@ -64,13 +62,11 @@ static void rk_timer_update_counter(unsigned long cycles,
{
writel_relaxed(cycles, rk_base(ce) + TIMER_LOAD_COUNT0);
writel_relaxed(0, rk_base(ce) + TIMER_LOAD_COUNT1);
- dsb();
}
static void rk_timer_interrupt_clear(struct clock_event_device *ce)
{
writel_relaxed(1, rk_base(ce) + TIMER_INT_STATUS);
- dsb();
}
static inline int rk_timer_set_next_event(unsigned long cycles,
@@ -173,4 +169,5 @@ static void __init rk_timer_init(struct device_node *np)
clockevents_config_and_register(ce, bc_timer.freq, 1, UINT_MAX);
}
+
CLOCKSOURCE_OF_DECLARE(rk_timer, "rockchip,rk3288-timer", rk_timer_init);
diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c
index d297b30d2bc0..2bcecafdeaea 100644
--- a/drivers/clocksource/tango_xtal.c
+++ b/drivers/clocksource/tango_xtal.c
@@ -19,19 +19,6 @@ static u64 notrace read_sched_clock(void)
return read_xtal_counter();
}
-static cycle_t read_clocksource(struct clocksource *cs)
-{
- return read_xtal_counter();
-}
-
-static struct clocksource tango_xtal = {
- .name = "tango-xtal",
- .rating = 350,
- .read = read_clocksource,
- .mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
static void __init tango_clocksource_init(struct device_node *np)
{
struct clk *clk;
@@ -53,8 +40,9 @@ static void __init tango_clocksource_init(struct device_node *np)
delay_timer.freq = xtal_freq;
delay_timer.read_current_timer = read_xtal_counter;
- ret = clocksource_register_hz(&tango_xtal, xtal_freq);
- if (ret != 0) {
+ ret = clocksource_mmio_init(xtal_in_cnt, "tango-xtal", xtal_freq, 350,
+ 32, clocksource_mmio_readl_up);
+ if (!ret) {
pr_err("%s: registration failed\n", np->full_name);
return;
}
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index 6ebda1177e79..38333aba3055 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -96,7 +96,8 @@ static struct clock_event_device tegra_clockevent = {
.name = "timer0",
.rating = 300,
.features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_PERIODIC,
+ CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_DYNIRQ,
.set_next_event = tegra_timer_set_next_event,
.set_state_shutdown = tegra_timer_shutdown,
.set_state_periodic = tegra_timer_set_periodic,
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index a1c06a2bc77c..1316876b487a 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -125,7 +125,7 @@ static int __init lpc32xx_clocksource_init(struct device_node *np)
clk = of_clk_get_by_name(np, "timerclk");
if (IS_ERR(clk)) {
- pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ pr_err("clock get failed (%ld)\n", PTR_ERR(clk));
return PTR_ERR(clk);
}
@@ -184,7 +184,7 @@ static int __init lpc32xx_clockevent_init(struct device_node *np)
clk = of_clk_get_by_name(np, "timerclk");
if (IS_ERR(clk)) {
- pr_err("clock get failed (%lu)\n", PTR_ERR(clk));
+ pr_err("clock get failed (%ld)\n", PTR_ERR(clk));
return PTR_ERR(clk);
}
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index bba679900054..3269d9ef7a18 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -84,7 +84,7 @@ pistachio_clocksource_read_cycles(struct clocksource *cs)
counter = gpt_readl(pcs->base, TIMER_CURRENT_VALUE, 0);
raw_spin_unlock_irqrestore(&pcs->lock, flags);
- return ~(cycle_t)counter;
+ return (cycle_t)~counter;
}
static u64 notrace pistachio_read_sched_clock(void)
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index bca9573e036a..24c83f9efd87 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -152,13 +152,6 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
-{
- struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
-
- return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
-}
-
static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
unsigned long event, void *data)
{
@@ -217,13 +210,8 @@ static int __init sun5i_setup_clocksource(struct device_node *node,
writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
base + TIMER_CTL_REG(1));
- cs->clksrc.name = node->name;
- cs->clksrc.rating = 340;
- cs->clksrc.read = sun5i_clksrc_read;
- cs->clksrc.mask = CLOCKSOURCE_MASK(32);
- cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
-
- ret = clocksource_register_hz(&cs->clksrc, rate);
+ ret = clocksource_mmio_init(base + TIMER_CNTVAL_LO_REG(1), node->name,
+ rate, 340, 32, clocksource_mmio_readl_down);
if (ret) {
pr_err("Couldn't register clock source.\n");
goto err_remove_notifier;
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index a92e94b40b5b..de49805fbb09 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -30,7 +30,6 @@
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/delay.h>
-#include <asm/mach/time.h>
#include <linux/of.h>
#include <linux/of_address.h>
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index d7373ca69c99..25693b045371 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -179,26 +179,21 @@ static int cn_call_callback(struct sk_buff *skb)
*
* It checks skb, netlink header and msg sizes, and calls callback helper.
*/
-static void cn_rx_skb(struct sk_buff *__skb)
+static void cn_rx_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
- struct sk_buff *skb;
int len, err;
- skb = skb_get(__skb);
-
if (skb->len >= NLMSG_HDRLEN) {
nlh = nlmsg_hdr(skb);
len = nlmsg_len(nlh);
if (len < (int)sizeof(struct cn_msg) ||
skb->len < nlh->nlmsg_len ||
- len > CONNECTOR_MAX_MSG_SIZE) {
- kfree_skb(skb);
+ len > CONNECTOR_MAX_MSG_SIZE)
return;
- }
- err = cn_call_callback(skb);
+ err = cn_call_callback(skb_get(skb));
if (err < 0)
kfree_skb(skb);
}
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index da2d6777bd09..97a364694bfc 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -515,7 +515,7 @@ static int __init padlock_init(void)
if (!x86_match_cpu(padlock_cpu_id))
return -ENODEV;
- if (!cpu_has_xcrypt_enabled) {
+ if (!boot_cpu_has(X86_FEATURE_XCRYPT_EN)) {
printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n");
return -ENODEV;
}
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 4e154c9b9206..8c5f90647b7a 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -540,7 +540,7 @@ static int __init padlock_init(void)
struct shash_alg *sha1;
struct shash_alg *sha256;
- if (!x86_match_cpu(padlock_sha_ids) || !cpu_has_phe_enabled)
+ if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
return -ENODEV;
/* Register the newly added algorithm module if on *
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
index cddfa8dbf4bd..068e920ecb68 100644
--- a/drivers/dma/mic_x100_dma.c
+++ b/drivers/dma/mic_x100_dma.c
@@ -317,7 +317,6 @@ mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
struct device *dev = mic_dma_ch_to_device(mic_ch);
int result;
- struct dma_async_tx_descriptor *tx = NULL;
if (!len && !flags)
return NULL;
@@ -325,13 +324,10 @@ mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
spin_lock(&mic_ch->prep_lock);
result = mic_dma_do_dma(mic_ch, flags, dma_src, dma_dest, len);
if (result >= 0)
- tx = allocate_tx(mic_ch);
-
- if (!tx)
- dev_err(dev, "Error enqueueing dma, error=%d\n", result);
-
+ return allocate_tx(mic_ch);
+ dev_err(dev, "Error enqueueing dma, error=%d\n", result);
spin_unlock(&mic_ch->prep_lock);
- return tx;
+ return NULL;
}
static struct dma_async_tx_descriptor *
@@ -339,14 +335,13 @@ mic_dma_prep_interrupt_lock(struct dma_chan *ch, unsigned long flags)
{
struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
int ret;
- struct dma_async_tx_descriptor *tx = NULL;
spin_lock(&mic_ch->prep_lock);
ret = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
if (!ret)
- tx = allocate_tx(mic_ch);
+ return allocate_tx(mic_ch);
spin_unlock(&mic_ch->prep_lock);
- return tx;
+ return NULL;
}
/* Return the status of the transaction */
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index 9dfa2b0fa5da..9cb93c5b655d 100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -29,6 +29,7 @@
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/of_device.h>
@@ -1610,6 +1611,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
/* Register DMA channel rx irq */
for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
chan = &pdma->chan[i];
+ irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
ret = devm_request_irq(chan->dev, chan->rx_irq,
xgene_dma_chan_ring_isr,
0, chan->name, chan);
@@ -1620,6 +1622,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
for (j = 0; j < i; j++) {
chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
devm_free_irq(chan->dev, chan->rx_irq, chan);
}
@@ -1640,6 +1643,7 @@ static void xgene_dma_free_irqs(struct xgene_dma *pdma)
for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
devm_free_irq(chan->dev, chan->rx_irq, chan);
}
}
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index ac1ce4a73edf..0e08e665f715 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -521,6 +521,7 @@ static int __init dmi_present(const u8 *buf)
dmi_ver = smbios_ver;
else
dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F);
+ dmi_ver <<= 8;
dmi_num = get_unaligned_le16(buf + 12);
dmi_len = get_unaligned_le16(buf + 6);
dmi_base = get_unaligned_le32(buf + 8);
@@ -528,15 +529,14 @@ static int __init dmi_present(const u8 *buf)
if (dmi_walk_early(dmi_decode) == 0) {
if (smbios_ver) {
pr_info("SMBIOS %d.%d present.\n",
- dmi_ver >> 8, dmi_ver & 0xFF);
+ dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
} else {
smbios_entry_point_size = 15;
memcpy(smbios_entry_point, buf,
smbios_entry_point_size);
pr_info("Legacy DMI %d.%d present.\n",
- dmi_ver >> 8, dmi_ver & 0xFF);
+ dmi_ver >> 16, (dmi_ver >> 8) & 0xFF);
}
- dmi_ver <<= 8;
dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index ffa902ece872..05a895496fc6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -156,6 +156,7 @@ nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
return -ENOMEM;
nvkm_object_ctor(&nv40_gr_chan, oclass, &chan->object);
chan->gr = gr;
+ chan->fifo = fifoch;
*pobject = &chan->object;
spin_lock_irqsave(&chan->gr->base.engine.lock, flags);
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 3a20db4f8604..72d6182666cb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -21,10 +21,13 @@
#include <linux/device.h>
#include <linux/dma-iommu.h>
+#include <linux/gfp.h>
#include <linux/huge_mm.h>
#include <linux/iommu.h>
#include <linux/iova.h>
#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/vmalloc.h>
int iommu_dma_init(void)
{
@@ -191,6 +194,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
{
struct page **pages;
unsigned int i = 0, array_size = count * sizeof(*pages);
+ unsigned int order = MAX_ORDER;
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
@@ -204,14 +208,15 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
while (count) {
struct page *page = NULL;
- int j, order = __fls(count);
+ int j;
/*
* Higher-order allocations are a convenience rather
* than a necessity, hence using __GFP_NORETRY until
* falling back to single-page allocations.
*/
- for (order = min(order, MAX_ORDER); order > 0; order--) {
+ for (order = min_t(unsigned int, order, __fls(count));
+ order > 0; order--) {
page = alloc_pages(gfp | __GFP_NORETRY, order);
if (!page)
continue;
@@ -453,7 +458,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
size_t s_offset = iova_offset(iovad, s->offset);
size_t s_length = s->length;
- sg_dma_address(s) = s->offset;
+ sg_dma_address(s) = s_offset;
sg_dma_len(s) = s_length;
s->offset -= s_offset;
s_length = iova_align(iovad, s_length + s_offset);
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 1fae1881648c..c12ba4516df2 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -753,7 +753,7 @@ static inline void set_irq_posting_cap(void)
* should have X86_FEATURE_CX16 support, this has been confirmed
* with Intel hardware guys.
*/
- if ( cpu_has_cx16 )
+ if (boot_cpu_has(X86_FEATURE_CX16))
intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
for_each_iommu(iommu, drhd)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 8cf605fa9946..dfb868e2d129 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -295,7 +295,7 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
{
- phys_addr_t ttbr;
+ u64 ttbr;
/*
* Allocate the page table operations.
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 4d7294e5d982..11fc2a27fa2e 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,6 +8,11 @@ config ARM_GIC
select IRQ_DOMAIN_HIERARCHY
select MULTI_IRQ_HANDLER
+config ARM_GIC_MAX_NR
+ int
+ default 2 if ARCH_REALVIEW
+ default 1
+
config ARM_GIC_V2M
bool
depends on ARM_GIC
@@ -27,6 +32,14 @@ config ARM_GIC_V3_ITS
bool
select PCI_MSI_IRQ_DOMAIN
+config HISILICON_IRQ_MBIGEN
+ bool "Support mbigen interrupt controller"
+ default n
+ depends on ARM_GIC_V3 && ARM_GIC_V3_ITS && GENERIC_MSI_IRQ_DOMAIN
+ help
+ Enable the mbigen interrupt controller used on
+ Hisilicon platform.
+
config ARM_NVIC
bool
select IRQ_DOMAIN
@@ -138,6 +151,12 @@ config TB10X_IRQC
select IRQ_DOMAIN
select GENERIC_IRQ_CHIP
+config TS4800_IRQ
+ tristate "TS-4800 IRQ controller"
+ select IRQ_DOMAIN
+ help
+ Support for the TS-4800 FPGA IRQ controller
+
config VERSATILE_FPGA_IRQ
bool
select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 177f78f6e6d6..d4c2e4ebc308 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -21,9 +21,11 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o
obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o
obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o
obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o
+obj-$(CONFIG_REALVIEW_DT) += irq-gic-realview.o
obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o
obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o
obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o
+obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o
obj-$(CONFIG_ARM_NVIC) += irq-nvic.o
obj-$(CONFIG_ARM_VIC) += irq-vic.o
obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o
@@ -39,6 +41,7 @@ obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
obj-$(CONFIG_ST_IRQCHIP) += irq-st.o
obj-$(CONFIG_TB10X_IRQC) += irq-tb10x.o
+obj-$(CONFIG_TS4800_IRQ) += irq-ts4800.o
obj-$(CONFIG_XTENSA) += irq-xtensa-pic.o
obj-$(CONFIG_XTENSA_MX) += irq-xtensa-mx.o
obj-$(CONFIG_IRQ_CROSSBAR) += irq-crossbar.o
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index f68708281fcf..963065a0d774 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -21,6 +21,9 @@
#include <linux/irqdomain.h>
#include <asm/exception.h>
+#define LOCAL_CONTROL 0x000
+#define LOCAL_PRESCALER 0x008
+
/*
* The low 2 bits identify the CPU that the GPU IRQ goes to, and the
* next 2 bits identify the CPU that the GPU FIQ goes to.
@@ -50,14 +53,16 @@
/* Same status bits as above, but for FIQ. */
#define LOCAL_FIQ_PENDING0 0x070
/*
- * Mailbox0 write-to-set bits. There are 16 mailboxes, 4 per CPU, and
+ * Mailbox write-to-set bits. There are 16 mailboxes, 4 per CPU, and
* these bits are organized by mailbox number and then CPU number. We
* use mailbox 0 for IPIs. The mailbox's interrupt is raised while
* any bit is set.
*/
#define LOCAL_MAILBOX0_SET0 0x080
-/* Mailbox0 write-to-clear bits. */
+#define LOCAL_MAILBOX3_SET0 0x08c
+/* Mailbox write-to-clear bits. */
#define LOCAL_MAILBOX0_CLR0 0x0c0
+#define LOCAL_MAILBOX3_CLR0 0x0cc
#define LOCAL_IRQ_CNTPSIRQ 0
#define LOCAL_IRQ_CNTPNSIRQ 1
@@ -162,7 +167,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
u32 stat;
stat = readl_relaxed(intc.base + LOCAL_IRQ_PENDING0 + 4 * cpu);
- if (stat & 0x10) {
+ if (stat & BIT(LOCAL_IRQ_MAILBOX0)) {
#ifdef CONFIG_SMP
void __iomem *mailbox0 = (intc.base +
LOCAL_MAILBOX0_CLR0 + 16 * cpu);
@@ -172,7 +177,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
writel(1 << ipi, mailbox0);
handle_IPI(ipi, regs);
#endif
- } else {
+ } else if (stat) {
u32 hwirq = ffs(stat) - 1;
handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
@@ -217,6 +222,24 @@ static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
.notifier_call = bcm2836_arm_irqchip_cpu_notify,
.priority = 100,
};
+
+int __init bcm2836_smp_boot_secondary(unsigned int cpu,
+ struct task_struct *idle)
+{
+ unsigned long secondary_startup_phys =
+ (unsigned long)virt_to_phys((void *)secondary_startup);
+
+ dsb();
+ writel(secondary_startup_phys,
+ intc.base + LOCAL_MAILBOX3_SET0 + 16 * cpu);
+
+ return 0;
+}
+
+static const struct smp_operations bcm2836_smp_ops __initconst = {
+ .smp_boot_secondary = bcm2836_smp_boot_secondary,
+};
+
#endif
static const struct irq_domain_ops bcm2836_arm_irqchip_intc_ops = {
@@ -234,9 +257,31 @@ bcm2836_arm_irqchip_smp_init(void)
register_cpu_notifier(&bcm2836_arm_irqchip_cpu_notifier);
set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
+ smp_set_ops(&bcm2836_smp_ops);
#endif
}
+/*
+ * The LOCAL_IRQ_CNT* timer firings are based off of the external
+ * oscillator with some scaling. The firmware sets up CNTFRQ to
+ * report 19.2Mhz, but doesn't set up the scaling registers.
+ */
+static void bcm2835_init_local_timer_frequency(void)
+{
+ /*
+ * Set the timer to source from the 19.2Mhz crystal clock (bit
+ * 8 unset), and only increment by 1 instead of 2 (bit 9
+ * unset).
+ */
+ writel(0, intc.base + LOCAL_CONTROL);
+
+ /*
+ * Set the timer prescaler to 1:1 (timer freq = input freq *
+ * 2**31 / prescaler)
+ */
+ writel(0x80000000, intc.base + LOCAL_PRESCALER);
+}
+
static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
struct device_node *parent)
{
@@ -246,6 +291,8 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node,
node->full_name);
}
+ bcm2835_init_local_timer_frequency();
+
intc.domain = irq_domain_add_linear(node, LAST_IRQ + 1,
&bcm2836_arm_irqchip_intc_ops,
NULL);
diff --git a/drivers/irqchip/irq-gic-realview.c b/drivers/irqchip/irq-gic-realview.c
new file mode 100644
index 000000000000..aa46eb280a7f
--- /dev/null
+++ b/drivers/irqchip/irq-gic-realview.c
@@ -0,0 +1,43 @@
+/*
+ * Special GIC quirks for the ARM RealView
+ * Copyright (C) 2015 Linus Walleij
+ */
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/bitops.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
+
+#define REALVIEW_SYS_LOCK_OFFSET 0x20
+#define REALVIEW_PB11MP_SYS_PLD_CTRL1 0x74
+#define VERSATILE_LOCK_VAL 0xA05F
+#define PLD_INTMODE_MASK BIT(22)|BIT(23)|BIT(24)
+#define PLD_INTMODE_LEGACY 0x0
+#define PLD_INTMODE_NEW_DCC BIT(22)
+#define PLD_INTMODE_NEW_NO_DCC BIT(23)
+#define PLD_INTMODE_FIQ_ENABLE BIT(24)
+
+static int __init
+realview_gic_of_init(struct device_node *node, struct device_node *parent)
+{
+ static struct regmap *map;
+
+ /* The PB11MPCore GIC needs to be configured in the syscon */
+ map = syscon_regmap_lookup_by_compatible("arm,realview-pb11mp-syscon");
+ if (!IS_ERR(map)) {
+ /* new irq mode with no DCC */
+ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
+ VERSATILE_LOCK_VAL);
+ regmap_update_bits(map, REALVIEW_PB11MP_SYS_PLD_CTRL1,
+ PLD_INTMODE_NEW_NO_DCC,
+ PLD_INTMODE_MASK);
+ regmap_write(map, REALVIEW_SYS_LOCK_OFFSET, 0x0000);
+ pr_info("TC11MP GIC: set up interrupt controller to NEW mode, no DCC\n");
+ } else {
+ pr_err("TC11MP GIC setup: could not find syscon\n");
+ return -ENXIO;
+ }
+ return gic_of_init(node, parent);
+}
+IRQCHIP_DECLARE(armtc11mp_gic, "arm,tc11mp-gic", realview_gic_of_init);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 87f8d104acab..c779f83e511d 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -15,9 +15,11 @@
#define pr_fmt(fmt) "GICv2m: " fmt
+#include <linux/acpi.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
+#include <linux/msi.h>
#include <linux/of_address.h>
#include <linux/of_pci.h>
#include <linux/slab.h>
@@ -55,7 +57,7 @@ static DEFINE_SPINLOCK(v2m_lock);
struct v2m_data {
struct list_head entry;
- struct device_node *node;
+ struct fwnode_handle *fwnode;
struct resource res; /* GICv2m resource */
void __iomem *base; /* GICv2m virt address */
u32 spi_start; /* The SPI number that MSIs start */
@@ -138,6 +140,11 @@ static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
fwspec.param[0] = 0;
fwspec.param[1] = hwirq - 32;
fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+ } else if (is_fwnode_irqchip(domain->parent->fwnode)) {
+ fwspec.fwnode = domain->parent->fwnode;
+ fwspec.param_count = 2;
+ fwspec.param[0] = hwirq;
+ fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
} else {
return -EINVAL;
}
@@ -254,7 +261,9 @@ static void gicv2m_teardown(void)
list_del(&v2m->entry);
kfree(v2m->bm);
iounmap(v2m->base);
- of_node_put(v2m->node);
+ of_node_put(to_of_node(v2m->fwnode));
+ if (is_fwnode_irqchip(v2m->fwnode))
+ irq_domain_free_fwnode(v2m->fwnode);
kfree(v2m);
}
}
@@ -268,7 +277,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
if (!v2m)
return 0;
- inner_domain = irq_domain_create_tree(of_node_to_fwnode(v2m->node),
+ inner_domain = irq_domain_create_tree(v2m->fwnode,
&gicv2m_domain_ops, v2m);
if (!inner_domain) {
pr_err("Failed to create GICv2m domain\n");
@@ -277,10 +286,10 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
inner_domain->bus_token = DOMAIN_BUS_NEXUS;
inner_domain->parent = parent;
- pci_domain = pci_msi_create_irq_domain(of_node_to_fwnode(v2m->node),
+ pci_domain = pci_msi_create_irq_domain(v2m->fwnode,
&gicv2m_msi_domain_info,
inner_domain);
- plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(v2m->node),
+ plat_domain = platform_msi_create_irq_domain(v2m->fwnode,
&gicv2m_pmsi_domain_info,
inner_domain);
if (!pci_domain || !plat_domain) {
@@ -296,8 +305,9 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
return 0;
}
-static int __init gicv2m_init_one(struct device_node *node,
- struct irq_domain *parent)
+static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
+ u32 spi_start, u32 nr_spis,
+ struct resource *res)
{
int ret;
struct v2m_data *v2m;
@@ -309,13 +319,9 @@ static int __init gicv2m_init_one(struct device_node *node,
}
INIT_LIST_HEAD(&v2m->entry);
- v2m->node = node;
+ v2m->fwnode = fwnode;
- ret = of_address_to_resource(node, 0, &v2m->res);
- if (ret) {
- pr_err("Failed to allocate v2m resource.\n");
- goto err_free_v2m;
- }
+ memcpy(&v2m->res, res, sizeof(struct resource));
v2m->base = ioremap(v2m->res.start, resource_size(&v2m->res));
if (!v2m->base) {
@@ -324,10 +330,9 @@ static int __init gicv2m_init_one(struct device_node *node,
goto err_free_v2m;
}
- if (!of_property_read_u32(node, "arm,msi-base-spi", &v2m->spi_start) &&
- !of_property_read_u32(node, "arm,msi-num-spis", &v2m->nr_spis)) {
- pr_info("Overriding V2M MSI_TYPER (base:%u, num:%u)\n",
- v2m->spi_start, v2m->nr_spis);
+ if (spi_start && nr_spis) {
+ v2m->spi_start = spi_start;
+ v2m->nr_spis = nr_spis;
} else {
u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
@@ -359,10 +364,9 @@ static int __init gicv2m_init_one(struct device_node *node,
}
list_add_tail(&v2m->entry, &v2m_nodes);
- pr_info("Node %s: range[%#lx:%#lx], SPI[%d:%d]\n", node->name,
- (unsigned long)v2m->res.start, (unsigned long)v2m->res.end,
- v2m->spi_start, (v2m->spi_start + v2m->nr_spis));
+ pr_info("range%pR, SPI[%d:%d]\n", res,
+ v2m->spi_start, (v2m->spi_start + v2m->nr_spis - 1));
return 0;
err_iounmap:
@@ -377,19 +381,36 @@ static struct of_device_id gicv2m_device_id[] = {
{},
};
-int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
+static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent)
{
int ret = 0;
+ struct device_node *node = to_of_node(parent_handle);
struct device_node *child;
for (child = of_find_matching_node(node, gicv2m_device_id); child;
child = of_find_matching_node(child, gicv2m_device_id)) {
+ u32 spi_start = 0, nr_spis = 0;
+ struct resource res;
+
if (!of_find_property(child, "msi-controller", NULL))
continue;
- ret = gicv2m_init_one(child, parent);
+ ret = of_address_to_resource(child, 0, &res);
+ if (ret) {
+ pr_err("Failed to allocate v2m resource.\n");
+ break;
+ }
+
+ if (!of_property_read_u32(child, "arm,msi-base-spi",
+ &spi_start) &&
+ !of_property_read_u32(child, "arm,msi-num-spis", &nr_spis))
+ pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+ spi_start, nr_spis);
+
+ ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res);
if (ret) {
- of_node_put(node);
+ of_node_put(child);
break;
}
}
@@ -400,3 +421,101 @@ int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
gicv2m_teardown();
return ret;
}
+
+#ifdef CONFIG_ACPI
+static int acpi_num_msi;
+
+static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
+{
+ struct v2m_data *data;
+
+ if (WARN_ON(acpi_num_msi <= 0))
+ return NULL;
+
+ /* We only return the fwnode of the first MSI frame. */
+ data = list_first_entry_or_null(&v2m_nodes, struct v2m_data, entry);
+ if (!data)
+ return NULL;
+
+ return data->fwnode;
+}
+
+static int __init
+acpi_parse_madt_msi(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ int ret;
+ struct resource res;
+ u32 spi_start = 0, nr_spis = 0;
+ struct acpi_madt_generic_msi_frame *m;
+ struct fwnode_handle *fwnode;
+
+ m = (struct acpi_madt_generic_msi_frame *)header;
+ if (BAD_MADT_ENTRY(m, end))
+ return -EINVAL;
+
+ res.start = m->base_address;
+ res.end = m->base_address + SZ_4K - 1;
+ res.flags = IORESOURCE_MEM;
+
+ if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) {
+ spi_start = m->spi_base;
+ nr_spis = m->spi_count;
+
+ pr_info("ACPI overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+ spi_start, nr_spis);
+ }
+
+ fwnode = irq_domain_alloc_fwnode((void *)m->base_address);
+ if (!fwnode) {
+ pr_err("Unable to allocate GICv2m domain token\n");
+ return -EINVAL;
+ }
+
+ ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res);
+ if (ret)
+ irq_domain_free_fwnode(fwnode);
+
+ return ret;
+}
+
+static int __init gicv2m_acpi_init(struct irq_domain *parent)
+{
+ int ret;
+
+ if (acpi_num_msi > 0)
+ return 0;
+
+ acpi_num_msi = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_MSI_FRAME,
+ acpi_parse_madt_msi, 0);
+
+ if (acpi_num_msi <= 0)
+ goto err_out;
+
+ ret = gicv2m_allocate_domains(parent);
+ if (ret)
+ goto err_out;
+
+ pci_msi_register_fwnode_provider(&gicv2m_get_fwnode);
+
+ return 0;
+
+err_out:
+ gicv2m_teardown();
+ return -EINVAL;
+}
+#else /* CONFIG_ACPI */
+static int __init gicv2m_acpi_init(struct irq_domain *parent)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_ACPI */
+
+int __init gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent)
+{
+ if (is_of_node(parent_handle))
+ return gicv2m_of_init(parent_handle, parent);
+
+ return gicv2m_acpi_init(parent);
+}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index abf2ffaed392..911758c056c1 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -69,6 +69,7 @@ union gic_base {
};
struct gic_chip_data {
+ struct irq_chip chip;
union gic_base dist_base;
union gic_base cpu_base;
#ifdef CONFIG_CPU_PM
@@ -99,11 +100,7 @@ static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
-#ifndef MAX_GIC_NR
-#define MAX_GIC_NR 1
-#endif
-
-static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly;
+static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
#ifdef CONFIG_GIC_NON_BANKED
static void __iomem *gic_get_percpu_base(union gic_base *base)
@@ -336,7 +333,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
irqnr = irqstat & GICC_IAR_INT_ID_MASK;
- if (likely(irqnr > 15 && irqnr < 1021)) {
+ if (likely(irqnr > 15 && irqnr < 1020)) {
if (static_key_true(&supports_deactivate))
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
handle_domain_irq(gic->domain, irqnr, regs);
@@ -383,7 +380,6 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
}
static struct irq_chip gic_chip = {
- .name = "GIC",
.irq_mask = gic_mask_irq,
.irq_unmask = gic_unmask_irq,
.irq_eoi = gic_eoi_irq,
@@ -417,8 +413,7 @@ static struct irq_chip gic_eoimode1_chip = {
void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
{
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq,
&gic_data[gic_nr]);
}
@@ -524,7 +519,7 @@ int gic_cpu_if_down(unsigned int gic_nr)
void __iomem *cpu_base;
u32 val = 0;
- if (gic_nr >= MAX_GIC_NR)
+ if (gic_nr >= CONFIG_ARM_GIC_MAX_NR)
return -EINVAL;
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -548,8 +543,7 @@ static void gic_dist_save(unsigned int gic_nr)
void __iomem *dist_base;
int i;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
@@ -587,8 +581,7 @@ static void gic_dist_restore(unsigned int gic_nr)
unsigned int i;
void __iomem *dist_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
@@ -634,8 +627,7 @@ static void gic_cpu_save(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -664,8 +656,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(&gic_data[gic_nr]);
@@ -703,7 +694,7 @@ static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v)
{
int i;
- for (i = 0; i < MAX_GIC_NR; i++) {
+ for (i = 0; i < CONFIG_ARM_GIC_MAX_NR; i++) {
#ifdef CONFIG_GIC_NON_BANKED
/* Skip over unused GICs */
if (!gic_data[i].get_base)
@@ -835,8 +826,7 @@ void gic_migrate_target(unsigned int new_cpu_id)
int i, ror_val, cpu = smp_processor_id();
u32 val, cur_target_mask, active_mask;
- if (gic_nr >= MAX_GIC_NR)
- BUG();
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
dist_base = gic_data_dist_base(&gic_data[gic_nr]);
if (!dist_base)
@@ -925,20 +915,15 @@ void __init gic_init_physaddr(struct device_node *node)
static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hw)
{
- struct irq_chip *chip = &gic_chip;
-
- if (static_key_true(&supports_deactivate)) {
- if (d->host_data == (void *)&gic_data[0])
- chip = &gic_eoimode1_chip;
- }
+ struct gic_chip_data *gic = d->host_data;
if (hw < 32) {
irq_set_percpu_devid(irq);
- irq_domain_set_info(d, irq, hw, chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
handle_percpu_devid_irq, NULL, NULL);
irq_set_status_flags(irq, IRQ_NOAUTOEN);
} else {
- irq_domain_set_info(d, irq, hw, chip, d->host_data,
+ irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
handle_fasteoi_irq, NULL, NULL);
irq_set_probe(irq);
}
@@ -972,7 +957,7 @@ static int gic_irq_domain_translate(struct irq_domain *d,
return 0;
}
- if (fwspec->fwnode->type == FWNODE_IRQCHIP) {
+ if (is_fwnode_irqchip(fwspec->fwnode)) {
if(fwspec->param_count != 2)
return -EINVAL;
@@ -1040,11 +1025,20 @@ static void __init __gic_init_bases(unsigned int gic_nr, int irq_start,
struct gic_chip_data *gic;
int gic_irqs, irq_base, i;
- BUG_ON(gic_nr >= MAX_GIC_NR);
+ BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
gic_check_cpu_features();
gic = &gic_data[gic_nr];
+
+ /* Initialize irq_chip */
+ if (static_key_true(&supports_deactivate) && gic_nr == 0) {
+ gic->chip = gic_eoimode1_chip;
+ } else {
+ gic->chip = gic_chip;
+ gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d", gic_nr);
+ }
+
#ifdef CONFIG_GIC_NON_BANKED
if (percpu_offset) { /* Frankein-GIC without banked registers... */
unsigned int cpu;
@@ -1196,7 +1190,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
return true;
}
-static int __init
+int __init
gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *cpu_base;
@@ -1234,7 +1228,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
}
if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
- gicv2m_of_init(node, gic_data[gic_cnt].domain);
+ gicv2m_init(&node->fwnode, gic_data[gic_cnt].domain);
gic_cnt++;
return 0;
@@ -1359,6 +1353,10 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
__gic_init_bases(0, -1, dist_base, cpu_base, 0, domain_handle);
acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
+
+ if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+ gicv2m_init(NULL, gic_data[0].domain);
+
return 0;
}
IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
new file mode 100644
index 000000000000..4dd3eb8a40b3
--- /dev/null
+++ b/drivers/irqchip/irq-mbigen.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C) 2015 Hisilicon Limited, All Rights Reserved.
+ * Author: Jun Ma <majun258@huawei.com>
+ * Author: Yun Wu <wuyun.wu@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* Interrupt numbers per mbigen node supported */
+#define IRQS_PER_MBIGEN_NODE 128
+
+/* 64 irqs (Pin0-pin63) are reserved for each mbigen chip */
+#define RESERVED_IRQ_PER_MBIGEN_CHIP 64
+
+/* The maximum IRQ pin number of mbigen chip(start from 0) */
+#define MAXIMUM_IRQ_PIN_NUM 1407
+
+/**
+ * In mbigen vector register
+ * bit[21:12]: event id value
+ * bit[11:0]: device id
+ */
+#define IRQ_EVENT_ID_SHIFT 12
+#define IRQ_EVENT_ID_MASK 0x3ff
+
+/* register range of each mbigen node */
+#define MBIGEN_NODE_OFFSET 0x1000
+
+/* offset of vector register in mbigen node */
+#define REG_MBIGEN_VEC_OFFSET 0x200
+
+/**
+ * offset of clear register in mbigen node
+ * This register is used to clear the status
+ * of interrupt
+ */
+#define REG_MBIGEN_CLEAR_OFFSET 0xa000
+
+/**
+ * offset of interrupt type register
+ * This register is used to configure interrupt
+ * trigger type
+ */
+#define REG_MBIGEN_TYPE_OFFSET 0x0
+
+/**
+ * struct mbigen_device - holds the information of mbigen device.
+ *
+ * @pdev: pointer to the platform device structure of mbigen chip.
+ * @base: mapped address of this mbigen chip.
+ */
+struct mbigen_device {
+ struct platform_device *pdev;
+ void __iomem *base;
+};
+
+static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq)
+{
+ unsigned int nid, pin;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ nid = hwirq / IRQS_PER_MBIGEN_NODE + 1;
+ pin = hwirq % IRQS_PER_MBIGEN_NODE;
+
+ return pin * 4 + nid * MBIGEN_NODE_OFFSET
+ + REG_MBIGEN_VEC_OFFSET;
+}
+
+static inline void get_mbigen_type_reg(irq_hw_number_t hwirq,
+ u32 *mask, u32 *addr)
+{
+ unsigned int nid, irq_ofst, ofst;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ nid = hwirq / IRQS_PER_MBIGEN_NODE + 1;
+ irq_ofst = hwirq % IRQS_PER_MBIGEN_NODE;
+
+ *mask = 1 << (irq_ofst % 32);
+ ofst = irq_ofst / 32 * 4;
+
+ *addr = ofst + nid * MBIGEN_NODE_OFFSET
+ + REG_MBIGEN_TYPE_OFFSET;
+}
+
+static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq,
+ u32 *mask, u32 *addr)
+{
+ unsigned int ofst;
+
+ hwirq -= RESERVED_IRQ_PER_MBIGEN_CHIP;
+ ofst = hwirq / 32 * 4;
+
+ *mask = 1 << (hwirq % 32);
+ *addr = ofst + REG_MBIGEN_CLEAR_OFFSET;
+}
+
+static void mbigen_eoi_irq(struct irq_data *data)
+{
+ void __iomem *base = data->chip_data;
+ u32 mask, addr;
+
+ get_mbigen_clear_reg(data->hwirq, &mask, &addr);
+
+ writel_relaxed(mask, base + addr);
+
+ irq_chip_eoi_parent(data);
+}
+
+static int mbigen_set_type(struct irq_data *data, unsigned int type)
+{
+ void __iomem *base = data->chip_data;
+ u32 mask, addr, val;
+
+ if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
+ return -EINVAL;
+
+ get_mbigen_type_reg(data->hwirq, &mask, &addr);
+
+ val = readl_relaxed(base + addr);
+
+ if (type == IRQ_TYPE_LEVEL_HIGH)
+ val |= mask;
+ else
+ val &= ~mask;
+
+ writel_relaxed(val, base + addr);
+
+ return 0;
+}
+
+static struct irq_chip mbigen_irq_chip = {
+ .name = "mbigen-v2",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = mbigen_eoi_irq,
+ .irq_set_type = mbigen_set_type,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+};
+
+static void mbigen_write_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct irq_data *d = irq_get_irq_data(desc->irq);
+ void __iomem *base = d->chip_data;
+ u32 val;
+
+ base += get_mbigen_vec_reg(d->hwirq);
+ val = readl_relaxed(base);
+
+ val &= ~(IRQ_EVENT_ID_MASK << IRQ_EVENT_ID_SHIFT);
+ val |= (msg->data << IRQ_EVENT_ID_SHIFT);
+
+ /* The address of doorbell is encoded in mbigen register by default
+ * So,we don't need to program the doorbell address at here
+ */
+ writel_relaxed(val, base);
+}
+
+static int mbigen_domain_translate(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *hwirq,
+ unsigned int *type)
+{
+ if (is_of_node(fwspec->fwnode)) {
+ if (fwspec->param_count != 2)
+ return -EINVAL;
+
+ if ((fwspec->param[0] > MAXIMUM_IRQ_PIN_NUM) ||
+ (fwspec->param[0] < RESERVED_IRQ_PER_MBIGEN_CHIP))
+ return -EINVAL;
+ else
+ *hwirq = fwspec->param[0];
+
+ /* If there is no valid irq type, just use the default type */
+ if ((fwspec->param[1] == IRQ_TYPE_EDGE_RISING) ||
+ (fwspec->param[1] == IRQ_TYPE_LEVEL_HIGH))
+ *type = fwspec->param[1];
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int mbigen_irq_domain_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs,
+ void *args)
+{
+ struct irq_fwspec *fwspec = args;
+ irq_hw_number_t hwirq;
+ unsigned int type;
+ struct mbigen_device *mgn_chip;
+ int i, err;
+
+ err = mbigen_domain_translate(domain, fwspec, &hwirq, &type);
+ if (err)
+ return err;
+
+ err = platform_msi_domain_alloc(domain, virq, nr_irqs);
+ if (err)
+ return err;
+
+ mgn_chip = platform_msi_get_host_data(domain);
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &mbigen_irq_chip, mgn_chip->base);
+
+ return 0;
+}
+
+static struct irq_domain_ops mbigen_domain_ops = {
+ .translate = mbigen_domain_translate,
+ .alloc = mbigen_irq_domain_alloc,
+ .free = irq_domain_free_irqs_common,
+};
+
+static int mbigen_device_probe(struct platform_device *pdev)
+{
+ struct mbigen_device *mgn_chip;
+ struct resource *res;
+ struct irq_domain *domain;
+ u32 num_pins;
+
+ mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL);
+ if (!mgn_chip)
+ return -ENOMEM;
+
+ mgn_chip->pdev = pdev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mgn_chip->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mgn_chip->base))
+ return PTR_ERR(mgn_chip->base);
+
+ if (of_property_read_u32(pdev->dev.of_node, "num-pins", &num_pins) < 0) {
+ dev_err(&pdev->dev, "No num-pins property\n");
+ return -EINVAL;
+ }
+
+ domain = platform_msi_create_device_domain(&pdev->dev, num_pins,
+ mbigen_write_msg,
+ &mbigen_domain_ops,
+ mgn_chip);
+
+ if (!domain)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, mgn_chip);
+
+ dev_info(&pdev->dev, "Allocated %d MSIs\n", num_pins);
+
+ return 0;
+}
+
+static const struct of_device_id mbigen_of_match[] = {
+ { .compatible = "hisilicon,mbigen-v2" },
+ { /* END */ }
+};
+MODULE_DEVICE_TABLE(of, mbigen_of_match);
+
+static struct platform_driver mbigen_platform_driver = {
+ .driver = {
+ .name = "Hisilicon MBIGEN-V2",
+ .owner = THIS_MODULE,
+ .of_match_table = mbigen_of_match,
+ },
+ .probe = mbigen_device_probe,
+};
+
+module_platform_driver(mbigen_platform_driver);
+
+MODULE_AUTHOR("Jun Ma <majun258@huawei.com>");
+MODULE_AUTHOR("Yun Wu <wuyun.wu@huawei.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Hisilicon MBI Generator driver");
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index 8587d0f8d8c0..9d1bcfc33e4c 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -47,6 +47,7 @@
#define INTC_ILR0 0x0100
#define ACTIVEIRQ_MASK 0x7f /* omap2/3 active interrupt bits */
+#define SPURIOUSIRQ_MASK (0x1ffffff << 7)
#define INTCPS_NR_ILR_REGS 128
#define INTCPS_NR_MIR_REGS 4
@@ -207,7 +208,6 @@ static int __init omap_alloc_gc_of(struct irq_domain *d, void __iomem *base)
ct = gc->chip_types;
ct->type = IRQ_TYPE_LEVEL_MASK;
- ct->handler = handle_level_irq;
ct->chip.irq_ack = omap_mask_ack_irq;
ct->chip.irq_mask = irq_gc_mask_disable_reg;
@@ -330,11 +330,35 @@ static int __init omap_init_irq(u32 base, struct device_node *node)
static asmlinkage void __exception_irq_entry
omap_intc_handle_irq(struct pt_regs *regs)
{
+ extern unsigned long irq_err_count;
u32 irqnr;
irqnr = intc_readl(INTC_SIR);
+
+ /*
+ * A spurious IRQ can result if interrupt that triggered the
+ * sorting is no longer active during the sorting (10 INTC
+ * functional clock cycles after interrupt assertion). Or a
+ * change in interrupt mask affected the result during sorting
+ * time. There is no special handling required except ignoring
+ * the SIR register value just read and retrying.
+ * See section 6.2.5 of AM335x TRM Literature Number: SPRUH73K
+ *
+ * Many a times, a spurious interrupt situation has been fixed
+ * by adding a flush for the posted write acking the IRQ in
+ * the device driver. Typically, this is going be the device
+ * driver whose interrupt was handled just before the spurious
+ * IRQ occurred. Pay attention to those device drivers if you
+ * run into hitting the spurious IRQ condition below.
+ */
+ if (unlikely((irqnr & SPURIOUSIRQ_MASK) == SPURIOUSIRQ_MASK)) {
+ pr_err_once("%s: spurious irq!\n", __func__);
+ irq_err_count++;
+ omap_ack_irq(NULL);
+ return;
+ }
+
irqnr &= ACTIVEIRQ_MASK;
- WARN_ONCE(!irqnr, "Spurious IRQ ?\n");
handle_domain_irq(domain, irqnr, regs);
}
diff --git a/drivers/irqchip/irq-renesas-h8300h.c b/drivers/irqchip/irq-renesas-h8300h.c
index 6fd30d5ee14d..c378768d75b3 100644
--- a/drivers/irqchip/irq-renesas-h8300h.c
+++ b/drivers/irqchip/irq-renesas-h8300h.c
@@ -21,9 +21,9 @@ static const char ipr_bit[] = {
10, 10, 10, 10, 9, 9, 9, 9,
};
-static void *intc_baseaddr;
+static void __iomem *intc_baseaddr;
-#define IPR ((unsigned long)intc_baseaddr + 6)
+#define IPR (intc_baseaddr + 6)
static void h8300h_disable_irq(struct irq_data *data)
{
@@ -81,8 +81,8 @@ static int __init h8300h_intc_of_init(struct device_node *intc,
BUG_ON(!intc_baseaddr);
/* All interrupt priority low */
- ctrl_outb(0x00, IPR + 0);
- ctrl_outb(0x00, IPR + 1);
+ writeb(0x00, IPR + 0);
+ writeb(0x00, IPR + 1);
domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL);
BUG_ON(!domain);
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index c325806561be..713177d97c7a 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -31,7 +31,6 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/of_device.h>
-#include <linux/platform_data/irq-renesas-intc-irqpin.h>
#include <linux/pm_runtime.h>
#define INTC_IRQPIN_MAX 8 /* maximum 8 interrupts per driver instance */
@@ -75,18 +74,20 @@ struct intc_irqpin_irq {
struct intc_irqpin_priv {
struct intc_irqpin_iomem iomem[INTC_IRQPIN_REG_NR];
struct intc_irqpin_irq irq[INTC_IRQPIN_MAX];
- struct renesas_intc_irqpin_config config;
- unsigned int number_of_irqs;
+ unsigned int sense_bitfield_width;
struct platform_device *pdev;
struct irq_chip irq_chip;
struct irq_domain *irq_domain;
struct clk *clk;
- bool shared_irqs;
+ unsigned shared_irqs:1;
+ unsigned needs_clk:1;
u8 shared_irq_mask;
};
-struct intc_irqpin_irlm_config {
+struct intc_irqpin_config {
unsigned int irlm_bit;
+ unsigned needs_irlm:1;
+ unsigned needs_clk:1;
};
static unsigned long intc_irqpin_read32(void __iomem *iomem)
@@ -171,7 +172,7 @@ static void intc_irqpin_mask_unmask_prio(struct intc_irqpin_priv *p,
static int intc_irqpin_set_sense(struct intc_irqpin_priv *p, int irq, int value)
{
/* The SENSE register is assumed to be 32-bit. */
- int bitfield_width = p->config.sense_bitfield_width;
+ int bitfield_width = p->sense_bitfield_width;
int shift = 32 - (irq + 1) * bitfield_width;
dev_dbg(&p->pdev->dev, "sense irq = %d, mode = %d\n", irq, value);
@@ -361,8 +362,15 @@ static const struct irq_domain_ops intc_irqpin_irq_domain_ops = {
.xlate = irq_domain_xlate_twocell,
};
-static const struct intc_irqpin_irlm_config intc_irqpin_irlm_r8a777x = {
+static const struct intc_irqpin_config intc_irqpin_irlm_r8a777x = {
.irlm_bit = 23, /* ICR0.IRLM0 */
+ .needs_irlm = 1,
+ .needs_clk = 0,
+};
+
+static const struct intc_irqpin_config intc_irqpin_rmobile = {
+ .needs_irlm = 0,
+ .needs_clk = 1,
};
static const struct of_device_id intc_irqpin_dt_ids[] = {
@@ -371,14 +379,18 @@ static const struct of_device_id intc_irqpin_dt_ids[] = {
.data = &intc_irqpin_irlm_r8a777x },
{ .compatible = "renesas,intc-irqpin-r8a7779",
.data = &intc_irqpin_irlm_r8a777x },
+ { .compatible = "renesas,intc-irqpin-r8a7740",
+ .data = &intc_irqpin_rmobile },
+ { .compatible = "renesas,intc-irqpin-sh73a0",
+ .data = &intc_irqpin_rmobile },
{},
};
MODULE_DEVICE_TABLE(of, intc_irqpin_dt_ids);
static int intc_irqpin_probe(struct platform_device *pdev)
{
+ const struct intc_irqpin_config *config = NULL;
struct device *dev = &pdev->dev;
- struct renesas_intc_irqpin_config *pdata = dev->platform_data;
const struct of_device_id *of_id;
struct intc_irqpin_priv *p;
struct intc_irqpin_iomem *i;
@@ -388,6 +400,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
void (*enable_fn)(struct irq_data *d);
void (*disable_fn)(struct irq_data *d);
const char *name = dev_name(dev);
+ bool control_parent;
+ unsigned int nirqs;
int ref_irq;
int ret;
int k;
@@ -399,23 +413,28 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* deal with driver instance configuration */
- if (pdata) {
- memcpy(&p->config, pdata, sizeof(*pdata));
- } else {
- of_property_read_u32(dev->of_node, "sense-bitfield-width",
- &p->config.sense_bitfield_width);
- p->config.control_parent = of_property_read_bool(dev->of_node,
- "control-parent");
- }
- if (!p->config.sense_bitfield_width)
- p->config.sense_bitfield_width = 4; /* default to 4 bits */
+ of_property_read_u32(dev->of_node, "sense-bitfield-width",
+ &p->sense_bitfield_width);
+ control_parent = of_property_read_bool(dev->of_node, "control-parent");
+ if (!p->sense_bitfield_width)
+ p->sense_bitfield_width = 4; /* default to 4 bits */
p->pdev = pdev;
platform_set_drvdata(pdev, p);
+ of_id = of_match_device(intc_irqpin_dt_ids, dev);
+ if (of_id && of_id->data) {
+ config = of_id->data;
+ p->needs_clk = config->needs_clk;
+ }
+
p->clk = devm_clk_get(dev, NULL);
if (IS_ERR(p->clk)) {
- dev_warn(dev, "unable to get clock\n");
+ if (p->needs_clk) {
+ dev_err(dev, "unable to get clock\n");
+ ret = PTR_ERR(p->clk);
+ goto err0;
+ }
p->clk = NULL;
}
@@ -443,8 +462,8 @@ static int intc_irqpin_probe(struct platform_device *pdev)
p->irq[k].requested_irq = irq->start;
}
- p->number_of_irqs = k;
- if (p->number_of_irqs < 1) {
+ nirqs = k;
+ if (nirqs < 1) {
dev_err(dev, "not enough IRQ resources\n");
ret = -EINVAL;
goto err0;
@@ -485,20 +504,16 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* configure "individual IRQ mode" where needed */
- of_id = of_match_device(intc_irqpin_dt_ids, dev);
- if (of_id && of_id->data) {
- const struct intc_irqpin_irlm_config *irlm_config = of_id->data;
-
+ if (config && config->needs_irlm) {
if (io[INTC_IRQPIN_REG_IRLM])
intc_irqpin_read_modify_write(p, INTC_IRQPIN_REG_IRLM,
- irlm_config->irlm_bit,
- 1, 1);
+ config->irlm_bit, 1, 1);
else
dev_warn(dev, "unable to select IRLM mode\n");
}
/* mask all interrupts using priority */
- for (k = 0; k < p->number_of_irqs; k++)
+ for (k = 0; k < nirqs; k++)
intc_irqpin_mask_unmask_prio(p, k, 1);
/* clear all pending interrupts */
@@ -506,16 +521,16 @@ static int intc_irqpin_probe(struct platform_device *pdev)
/* scan for shared interrupt lines */
ref_irq = p->irq[0].requested_irq;
- p->shared_irqs = true;
- for (k = 1; k < p->number_of_irqs; k++) {
+ p->shared_irqs = 1;
+ for (k = 1; k < nirqs; k++) {
if (ref_irq != p->irq[k].requested_irq) {
- p->shared_irqs = false;
+ p->shared_irqs = 0;
break;
}
}
/* use more severe masking method if requested */
- if (p->config.control_parent) {
+ if (control_parent) {
enable_fn = intc_irqpin_irq_enable_force;
disable_fn = intc_irqpin_irq_disable_force;
} else if (!p->shared_irqs) {
@@ -534,9 +549,7 @@ static int intc_irqpin_probe(struct platform_device *pdev)
irq_chip->irq_set_wake = intc_irqpin_irq_set_wake;
irq_chip->flags = IRQCHIP_MASK_ON_SUSPEND;
- p->irq_domain = irq_domain_add_simple(dev->of_node,
- p->number_of_irqs,
- p->config.irq_base,
+ p->irq_domain = irq_domain_add_simple(dev->of_node, nirqs, 0,
&intc_irqpin_irq_domain_ops, p);
if (!p->irq_domain) {
ret = -ENXIO;
@@ -555,7 +568,7 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
} else {
/* request interrupts one by one */
- for (k = 0; k < p->number_of_irqs; k++) {
+ for (k = 0; k < nirqs; k++) {
if (devm_request_irq(dev, p->irq[k].requested_irq,
intc_irqpin_irq_handler, 0, name,
&p->irq[k])) {
@@ -567,17 +580,10 @@ static int intc_irqpin_probe(struct platform_device *pdev)
}
/* unmask all interrupts on prio level */
- for (k = 0; k < p->number_of_irqs; k++)
+ for (k = 0; k < nirqs; k++)
intc_irqpin_mask_unmask_prio(p, k, 0);
- dev_info(dev, "driving %d irqs\n", p->number_of_irqs);
-
- /* warn in case of mismatch if irq base is specified */
- if (p->config.irq_base) {
- if (p->config.irq_base != p->irq[0].domain_irq)
- dev_warn(dev, "irq base mismatch (%d/%d)\n",
- p->config.irq_base, p->irq[0].domain_irq);
- }
+ dev_info(dev, "driving %d irqs\n", nirqs);
return 0;
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 4ef178078e5b..0820f67cc9a7 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -50,6 +50,12 @@ static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = {
.enable = 0x34,
};
+static struct sunxi_sc_nmi_reg_offs sun9i_reg_offs = {
+ .ctrl = 0x00,
+ .pend = 0x08,
+ .enable = 0x04,
+};
+
static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off,
u32 val)
{
@@ -207,3 +213,10 @@ static int __init sun7i_sc_nmi_irq_init(struct device_node *node,
return sunxi_sc_nmi_irq_init(node, &sun7i_reg_offs);
}
IRQCHIP_DECLARE(sun7i_sc_nmi, "allwinner,sun7i-a20-sc-nmi", sun7i_sc_nmi_irq_init);
+
+static int __init sun9i_nmi_irq_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return sunxi_sc_nmi_irq_init(node, &sun9i_reg_offs);
+}
+IRQCHIP_DECLARE(sun9i_nmi, "allwinner,sun9i-a80-nmi", sun9i_nmi_irq_init);
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
new file mode 100644
index 000000000000..4192bdcd2734
--- /dev/null
+++ b/drivers/irqchip/irq-ts4800.c
@@ -0,0 +1,163 @@
+/*
+ * Multiplexed-IRQs driver for TS-4800's FPGA
+ *
+ * Copyright (c) 2015 - Savoir-faire Linux
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#define IRQ_MASK 0x4
+#define IRQ_STATUS 0x8
+
+struct ts4800_irq_data {
+ void __iomem *base;
+ struct irq_domain *domain;
+ struct irq_chip irq_chip;
+};
+
+static void ts4800_irq_mask(struct irq_data *d)
+{
+ struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+ u16 reg = readw(data->base + IRQ_MASK);
+ u16 mask = 1 << d->hwirq;
+
+ writew(reg | mask, data->base + IRQ_MASK);
+}
+
+static void ts4800_irq_unmask(struct irq_data *d)
+{
+ struct ts4800_irq_data *data = irq_data_get_irq_chip_data(d);
+ u16 reg = readw(data->base + IRQ_MASK);
+ u16 mask = 1 << d->hwirq;
+
+ writew(reg & ~mask, data->base + IRQ_MASK);
+}
+
+static int ts4800_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct ts4800_irq_data *data = d->host_data;
+
+ irq_set_chip_and_handler(irq, &data->irq_chip, handle_simple_irq);
+ irq_set_chip_data(irq, data);
+ irq_set_noprobe(irq);
+
+ return 0;
+}
+
+struct irq_domain_ops ts4800_ic_ops = {
+ .map = ts4800_irqdomain_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static void ts4800_ic_chained_handle_irq(struct irq_desc *desc)
+{
+ struct ts4800_irq_data *data = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ u16 status = readw(data->base + IRQ_STATUS);
+
+ chained_irq_enter(chip, desc);
+
+ if (unlikely(status == 0)) {
+ handle_bad_irq(desc);
+ goto out;
+ }
+
+ do {
+ unsigned int bit = __ffs(status);
+ int irq = irq_find_mapping(data->domain, bit);
+
+ status &= ~(1 << bit);
+ generic_handle_irq(irq);
+ } while (status);
+
+out:
+ chained_irq_exit(chip, desc);
+}
+
+static int ts4800_ic_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct ts4800_irq_data *data;
+ struct irq_chip *irq_chip;
+ struct resource *res;
+ int parent_irq;
+
+ data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(data->base))
+ return PTR_ERR(data->base);
+
+ writew(0xFFFF, data->base + IRQ_MASK);
+
+ parent_irq = irq_of_parse_and_map(node, 0);
+ if (!parent_irq) {
+ dev_err(&pdev->dev, "failed to get parent IRQ\n");
+ return -EINVAL;
+ }
+
+ irq_chip = &data->irq_chip;
+ irq_chip->name = dev_name(&pdev->dev);
+ irq_chip->irq_mask = ts4800_irq_mask;
+ irq_chip->irq_unmask = ts4800_irq_unmask;
+
+ data->domain = irq_domain_add_linear(node, 8, &ts4800_ic_ops, data);
+ if (!data->domain) {
+ dev_err(&pdev->dev, "cannot add IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ irq_set_chained_handler_and_data(parent_irq,
+ ts4800_ic_chained_handle_irq, data);
+
+ platform_set_drvdata(pdev, data);
+
+ return 0;
+}
+
+static int ts4800_ic_remove(struct platform_device *pdev)
+{
+ struct ts4800_irq_data *data = platform_get_drvdata(pdev);
+
+ irq_domain_remove(data->domain);
+
+ return 0;
+}
+
+static const struct of_device_id ts4800_ic_of_match[] = {
+ { .compatible = "technologic,ts4800-irqc", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ts4800_ic_of_match);
+
+static struct platform_driver ts4800_ic_driver = {
+ .probe = ts4800_ic_probe,
+ .remove = ts4800_ic_remove,
+ .driver = {
+ .name = "ts4800-irqc",
+ .of_match_table = ts4800_ic_of_match,
+ },
+};
+module_platform_driver(ts4800_ic_driver);
+
+MODULE_AUTHOR("Damien Riegel <damien.riegel@savoirfairelinux.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:ts4800_irqc");
diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c
index 4c48fa88a03d..cb9d8ec37507 100644
--- a/drivers/irqchip/irq-zevio.c
+++ b/drivers/irqchip/irq-zevio.c
@@ -43,8 +43,7 @@ static void __iomem *zevio_irq_io;
static void zevio_irq_ack(struct irq_data *irqd)
{
struct irq_chip_generic *gc = irq_data_get_irq_chip_data(irqd);
- struct irq_chip_regs *regs =
- &container_of(irqd->chip, struct irq_chip_type, chip)->regs;
+ struct irq_chip_regs *regs = &irq_data_get_chip_type(irqd)->regs;
readl(gc->reg_base + regs->ack);
}
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index d8486168415a..5914263090fc 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -171,11 +171,7 @@ static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
static inline int mmc_get_devidx(struct gendisk *disk)
{
- int devmaj = MAJOR(disk_devt(disk));
- int devidx = MINOR(disk_devt(disk)) / perdev_minors;
-
- if (!devmaj)
- devidx = disk->first_minor / perdev_minors;
+ int devidx = disk->first_minor / perdev_minors;
return devidx;
}
@@ -344,7 +340,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
struct mmc_blk_ioc_data *idata;
int err;
- idata = kzalloc(sizeof(*idata), GFP_KERNEL);
+ idata = kmalloc(sizeof(*idata), GFP_KERNEL);
if (!idata) {
err = -ENOMEM;
goto out;
@@ -364,7 +360,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
if (!idata->buf_bytes)
return idata;
- idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL);
+ idata->buf = kmalloc(idata->buf_bytes, GFP_KERNEL);
if (!idata->buf) {
err = -ENOMEM;
goto idata_err;
@@ -2244,6 +2240,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
md->disk->queue = md->queue.queue;
md->disk->driverfs_dev = parent;
set_disk_ro(md->disk, md->read_only || default_ro);
+ md->disk->flags = GENHD_FL_EXT_DEVT;
if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
md->disk->flags |= GENHD_FL_NO_PART_SCAN;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 972ff844cf5a..4bc48f10452f 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -349,6 +349,8 @@ int mmc_add_card(struct mmc_card *card)
card->dev.of_node = mmc_of_find_child_device(card->host, 0);
+ device_enable_async_suspend(&card->dev);
+
ret = device_add(&card->dev);
if (ret)
return ret;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5ae89e48fd85..f95d41ffc766 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -55,7 +55,6 @@
*/
#define MMC_BKOPS_MAX_TIMEOUT (4 * 60 * 1000) /* max time to wait in ms */
-static struct workqueue_struct *workqueue;
static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
/*
@@ -66,21 +65,16 @@ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
bool use_spi_crc = 1;
module_param(use_spi_crc, bool, 0);
-/*
- * Internal function. Schedule delayed work in the MMC work queue.
- */
static int mmc_schedule_delayed_work(struct delayed_work *work,
unsigned long delay)
{
- return queue_delayed_work(workqueue, work, delay);
-}
-
-/*
- * Internal function. Flush all scheduled work from the MMC work queue.
- */
-static void mmc_flush_scheduled_work(void)
-{
- flush_workqueue(workqueue);
+ /*
+ * We use the system_freezable_wq, because of two reasons.
+ * First, it allows several works (not the same work item) to be
+ * executed simultaneously. Second, the queue becomes frozen when
+ * userspace becomes frozen during system PM.
+ */
+ return queue_delayed_work(system_freezable_wq, work, delay);
}
#ifdef CONFIG_FAIL_MMC_REQUEST
@@ -1485,7 +1479,7 @@ int mmc_regulator_get_supply(struct mmc_host *mmc)
if (IS_ERR(mmc->supply.vmmc)) {
if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
return -EPROBE_DEFER;
- dev_info(dev, "No vmmc regulator found\n");
+ dev_dbg(dev, "No vmmc regulator found\n");
} else {
ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
if (ret > 0)
@@ -1497,7 +1491,7 @@ int mmc_regulator_get_supply(struct mmc_host *mmc)
if (IS_ERR(mmc->supply.vqmmc)) {
if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
return -EPROBE_DEFER;
- dev_info(dev, "No vqmmc regulator found\n");
+ dev_dbg(dev, "No vqmmc regulator found\n");
}
return 0;
@@ -2476,15 +2470,20 @@ static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
* sdio_reset sends CMD52 to reset card. Since we do not know
* if the card is being re-initialized, just send it. CMD52
* should be ignored by SD/eMMC cards.
+ * Skip it if we already know that we do not support SDIO commands
*/
- sdio_reset(host);
+ if (!(host->caps2 & MMC_CAP2_NO_SDIO))
+ sdio_reset(host);
+
mmc_go_idle(host);
mmc_send_if_cond(host, host->ocr_avail);
/* Order's important: probe SDIO, then SD, then MMC */
- if (!mmc_attach_sdio(host))
- return 0;
+ if (!(host->caps2 & MMC_CAP2_NO_SDIO))
+ if (!mmc_attach_sdio(host))
+ return 0;
+
if (!mmc_attach_sd(host))
return 0;
if (!mmc_attach_mmc(host))
@@ -2498,9 +2497,6 @@ int _mmc_detect_card_removed(struct mmc_host *host)
{
int ret;
- if (host->caps & MMC_CAP_NONREMOVABLE)
- return 0;
-
if (!host->card || mmc_card_removed(host->card))
return 1;
@@ -2536,6 +2532,9 @@ int mmc_detect_card_removed(struct mmc_host *host)
if (!card)
return 1;
+ if (host->caps & MMC_CAP_NONREMOVABLE)
+ return 0;
+
ret = mmc_card_removed(card);
/*
* The card will be considered unchanged unless we have been asked to
@@ -2567,11 +2566,6 @@ void mmc_rescan(struct work_struct *work)
container_of(work, struct mmc_host, detect.work);
int i;
- if (host->trigger_card_event && host->ops->card_event) {
- host->ops->card_event(host);
- host->trigger_card_event = false;
- }
-
if (host->rescan_disable)
return;
@@ -2580,6 +2574,13 @@ void mmc_rescan(struct work_struct *work)
return;
host->rescan_entered = 1;
+ if (host->trigger_card_event && host->ops->card_event) {
+ mmc_claim_host(host);
+ host->ops->card_event(host);
+ mmc_release_host(host);
+ host->trigger_card_event = false;
+ }
+
mmc_bus_get(host);
/*
@@ -2611,15 +2612,14 @@ void mmc_rescan(struct work_struct *work)
*/
mmc_bus_put(host);
+ mmc_claim_host(host);
if (!(host->caps & MMC_CAP_NONREMOVABLE) && host->ops->get_cd &&
host->ops->get_cd(host) == 0) {
- mmc_claim_host(host);
mmc_power_off(host);
mmc_release_host(host);
goto out;
}
- mmc_claim_host(host);
for (i = 0; i < ARRAY_SIZE(freqs); i++) {
if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
break;
@@ -2663,7 +2663,6 @@ void mmc_stop_host(struct mmc_host *host)
host->rescan_disable = 1;
cancel_delayed_work_sync(&host->detect);
- mmc_flush_scheduled_work();
/* clear pm flags now and let card drivers set them as needed */
host->pm_flags = 0;
@@ -2759,14 +2758,13 @@ int mmc_flush_cache(struct mmc_card *card)
}
EXPORT_SYMBOL(mmc_flush_cache);
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
/* Do the card removal on suspend if card is assumed removeable
* Do that in pm notifier while userspace isn't yet frozen, so we will be able
to sync the card.
*/
-int mmc_pm_notify(struct notifier_block *notify_block,
- unsigned long mode, void *unused)
+static int mmc_pm_notify(struct notifier_block *notify_block,
+ unsigned long mode, void *unused)
{
struct mmc_host *host = container_of(
notify_block, struct mmc_host, pm_notify);
@@ -2813,6 +2811,17 @@ int mmc_pm_notify(struct notifier_block *notify_block,
return 0;
}
+
+void mmc_register_pm_notifier(struct mmc_host *host)
+{
+ host->pm_notify.notifier_call = mmc_pm_notify;
+ register_pm_notifier(&host->pm_notify);
+}
+
+void mmc_unregister_pm_notifier(struct mmc_host *host)
+{
+ unregister_pm_notifier(&host->pm_notify);
+}
#endif
/**
@@ -2836,13 +2845,9 @@ static int __init mmc_init(void)
{
int ret;
- workqueue = alloc_ordered_workqueue("kmmcd", 0);
- if (!workqueue)
- return -ENOMEM;
-
ret = mmc_register_bus();
if (ret)
- goto destroy_workqueue;
+ return ret;
ret = mmc_register_host_class();
if (ret)
@@ -2858,9 +2863,6 @@ unregister_host_class:
mmc_unregister_host_class();
unregister_bus:
mmc_unregister_bus();
-destroy_workqueue:
- destroy_workqueue(workqueue);
-
return ret;
}
@@ -2869,7 +2871,6 @@ static void __exit mmc_exit(void)
sdio_unregister_bus();
mmc_unregister_host_class();
mmc_unregister_bus();
- destroy_workqueue(workqueue);
}
subsys_initcall(mmc_init);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 09241e56d628..0fa86a2afc26 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -90,5 +90,13 @@ int mmc_execute_tuning(struct mmc_card *card);
int mmc_hs200_to_hs400(struct mmc_card *card);
int mmc_hs400_to_hs200(struct mmc_card *card);
+#ifdef CONFIG_PM_SLEEP
+void mmc_register_pm_notifier(struct mmc_host *host);
+void mmc_unregister_pm_notifier(struct mmc_host *host);
+#else
+static inline void mmc_register_pm_notifier(struct mmc_host *host) { }
+static inline void mmc_unregister_pm_notifier(struct mmc_host *host) { }
+#endif
+
#endif
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index da950c44204d..0aecd5c00b86 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -21,7 +21,6 @@
#include <linux/export.h>
#include <linux/leds.h>
#include <linux/slab.h>
-#include <linux/suspend.h>
#include <linux/mmc/host.h>
#include <linux/mmc/card.h>
@@ -275,7 +274,8 @@ int mmc_of_parse(struct mmc_host *host)
host->caps2 |= MMC_CAP2_FULL_PWR_CYCLE;
if (of_property_read_bool(np, "keep-power-in-suspend"))
host->pm_caps |= MMC_PM_KEEP_POWER;
- if (of_property_read_bool(np, "enable-sdio-wakeup"))
+ if (of_property_read_bool(np, "wakeup-source") ||
+ of_property_read_bool(np, "enable-sdio-wakeup")) /* legacy */
host->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
if (of_property_read_bool(np, "mmc-ddr-1_8v"))
host->caps |= MMC_CAP_1_8V_DDR;
@@ -348,9 +348,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
spin_lock_init(&host->lock);
init_waitqueue_head(&host->wq);
INIT_DELAYED_WORK(&host->detect, mmc_rescan);
-#ifdef CONFIG_PM
- host->pm_notify.notifier_call = mmc_pm_notify;
-#endif
setup_timer(&host->retune_timer, mmc_retune_timer, (unsigned long)host);
/*
@@ -395,7 +392,7 @@ int mmc_add_host(struct mmc_host *host)
#endif
mmc_start_host(host);
- register_pm_notifier(&host->pm_notify);
+ mmc_register_pm_notifier(host);
return 0;
}
@@ -412,7 +409,7 @@ EXPORT_SYMBOL(mmc_add_host);
*/
void mmc_remove_host(struct mmc_host *host)
{
- unregister_pm_notifier(&host->pm_notify);
+ mmc_unregister_pm_notifier(host);
mmc_stop_host(host);
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 3a9a79ec4343..bf49e44571f2 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1076,8 +1076,7 @@ static int mmc_select_hs400(struct mmc_card *card)
mmc_set_clock(host, max_dtr);
/* Switch card to HS mode */
- val = EXT_CSD_TIMING_HS |
- card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
+ val = EXT_CSD_TIMING_HS;
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
EXT_CSD_HS_TIMING, val,
card->ext_csd.generic_cmd6_time,
@@ -1160,8 +1159,7 @@ int mmc_hs400_to_hs200(struct mmc_card *card)
mmc_set_clock(host, max_dtr);
/* Switch HS400 to HS DDR */
- val = EXT_CSD_TIMING_HS |
- card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
+ val = EXT_CSD_TIMING_HS;
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING,
val, card->ext_csd.generic_cmd6_time,
true, send_status, true);
@@ -1907,16 +1905,8 @@ static int mmc_shutdown(struct mmc_host *host)
*/
static int mmc_resume(struct mmc_host *host)
{
- int err = 0;
-
- if (!(host->caps & MMC_CAP_RUNTIME_RESUME)) {
- err = _mmc_resume(host);
- pm_runtime_set_active(&host->card->dev);
- pm_runtime_mark_last_busy(&host->card->dev);
- }
pm_runtime_enable(&host->card->dev);
-
- return err;
+ return 0;
}
/*
@@ -1944,12 +1934,9 @@ static int mmc_runtime_resume(struct mmc_host *host)
{
int err;
- if (!(host->caps & (MMC_CAP_AGGRESSIVE_PM | MMC_CAP_RUNTIME_RESUME)))
- return 0;
-
err = _mmc_resume(host);
- if (err)
- pr_err("%s: error %d doing aggressive resume\n",
+ if (err && err != -ENOMEDIUM)
+ pr_err("%s: error %d doing runtime resume\n",
mmc_hostname(host), err);
return 0;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 1f444269ebbe..2c90635c89af 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -489,6 +489,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
unsigned long timeout;
u32 status = 0;
bool use_r1b_resp = use_busy_signal;
+ bool expired = false;
mmc_retune_hold(host);
@@ -545,6 +546,12 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
if (send_status) {
+ /*
+ * Due to the possibility of being preempted after
+ * sending the status command, check the expiration
+ * time first.
+ */
+ expired = time_after(jiffies, timeout);
err = __mmc_send_status(card, &status, ignore_crc);
if (err)
goto out;
@@ -565,7 +572,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
}
/* Timeout if the device never leaves the program state. */
- if (time_after(jiffies, timeout)) {
+ if (expired && R1_CURRENT_STATE(status) == R1_STATE_PRG) {
pr_err("%s: Card stuck in programming state! %s\n",
mmc_hostname(host), __func__);
err = -ETIMEDOUT;
diff --git a/drivers/mmc/core/pwrseq.h b/drivers/mmc/core/pwrseq.h
index 096da48c6a7e..133de0426687 100644
--- a/drivers/mmc/core/pwrseq.h
+++ b/drivers/mmc/core/pwrseq.h
@@ -16,7 +16,7 @@ struct mmc_pwrseq_ops {
};
struct mmc_pwrseq {
- struct mmc_pwrseq_ops *ops;
+ const struct mmc_pwrseq_ops *ops;
};
#ifdef CONFIG_OF
diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c
index ad4f94ec7e8d..4a82bc77fe49 100644
--- a/drivers/mmc/core/pwrseq_emmc.c
+++ b/drivers/mmc/core/pwrseq_emmc.c
@@ -51,7 +51,7 @@ static void mmc_pwrseq_emmc_free(struct mmc_host *host)
kfree(pwrseq);
}
-static struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
+static const struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
.post_power_on = mmc_pwrseq_emmc_reset,
.free = mmc_pwrseq_emmc_free,
};
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index d10538bb5e07..2b16263458af 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -87,7 +87,7 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
kfree(pwrseq);
}
-static struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
+static const struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
.pre_power_on = mmc_pwrseq_simple_pre_power_on,
.post_power_on = mmc_pwrseq_simple_post_power_on,
.power_off = mmc_pwrseq_simple_power_off,
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 141eaa923e18..f2b164b214ae 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1128,16 +1128,8 @@ out:
*/
static int mmc_sd_resume(struct mmc_host *host)
{
- int err = 0;
-
- if (!(host->caps & MMC_CAP_RUNTIME_RESUME)) {
- err = _mmc_sd_resume(host);
- pm_runtime_set_active(&host->card->dev);
- pm_runtime_mark_last_busy(&host->card->dev);
- }
pm_runtime_enable(&host->card->dev);
-
- return err;
+ return 0;
}
/*
@@ -1165,12 +1157,9 @@ static int mmc_sd_runtime_resume(struct mmc_host *host)
{
int err;
- if (!(host->caps & (MMC_CAP_AGGRESSIVE_PM | MMC_CAP_RUNTIME_RESUME)))
- return 0;
-
err = _mmc_sd_resume(host);
- if (err)
- pr_err("%s: error %d doing aggressive resume\n",
+ if (err && err != -ENOMEDIUM)
+ pr_err("%s: error %d doing runtime resume\n",
mmc_hostname(host), err);
return 0;
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 16d838e6d623..d61ba1a0495e 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -630,7 +630,7 @@ try_again:
*/
if (!powered_resume && (rocr & ocr & R4_18V_PRESENT)) {
err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
- ocr);
+ ocr_card);
if (err == -EAGAIN) {
sdio_reset(host);
mmc_go_idle(host);
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 7e327a6dd53d..86f5b3223aae 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -322,6 +322,7 @@ int sdio_add_func(struct sdio_func *func)
sdio_set_of_node(func);
sdio_acpi_set_handle(func);
+ device_enable_async_suspend(&func->dev);
ret = device_add(&func->dev);
if (ret == 0)
sdio_func_set_present(func);
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 1dee533634c9..1526b8a10b09 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -455,6 +455,7 @@ config MMC_TIFM_SD
config MMC_MVSDIO
tristate "Marvell MMC/SD/SDIO host driver"
depends on PLAT_ORION
+ depends on OF
---help---
This selects the Marvell SDIO host driver.
SDIO may currently be found on the Kirkwood 88F6281 and 88F6192
diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
deleted file mode 100644
index 0aa44e679df4..000000000000
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Atmel MultiMedia Card Interface driver
- *
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Superset of MCI IP registers integrated in Atmel AVR32 and AT91 Processors
- * Registers and bitfields marked with [2] are only available in MCI2
- */
-
-#ifndef __DRIVERS_MMC_ATMEL_MCI_H__
-#define __DRIVERS_MMC_ATMEL_MCI_H__
-
-/* MCI Register Definitions */
-#define ATMCI_CR 0x0000 /* Control */
-# define ATMCI_CR_MCIEN ( 1 << 0) /* MCI Enable */
-# define ATMCI_CR_MCIDIS ( 1 << 1) /* MCI Disable */
-# define ATMCI_CR_PWSEN ( 1 << 2) /* Power Save Enable */
-# define ATMCI_CR_PWSDIS ( 1 << 3) /* Power Save Disable */
-# define ATMCI_CR_SWRST ( 1 << 7) /* Software Reset */
-#define ATMCI_MR 0x0004 /* Mode */
-# define ATMCI_MR_CLKDIV(x) ((x) << 0) /* Clock Divider */
-# define ATMCI_MR_PWSDIV(x) ((x) << 8) /* Power Saving Divider */
-# define ATMCI_MR_RDPROOF ( 1 << 11) /* Read Proof */
-# define ATMCI_MR_WRPROOF ( 1 << 12) /* Write Proof */
-# define ATMCI_MR_PDCFBYTE ( 1 << 13) /* Force Byte Transfer */
-# define ATMCI_MR_PDCPADV ( 1 << 14) /* Padding Value */
-# define ATMCI_MR_PDCMODE ( 1 << 15) /* PDC-oriented Mode */
-# define ATMCI_MR_CLKODD(x) ((x) << 16) /* LSB of Clock Divider */
-#define ATMCI_DTOR 0x0008 /* Data Timeout */
-# define ATMCI_DTOCYC(x) ((x) << 0) /* Data Timeout Cycles */
-# define ATMCI_DTOMUL(x) ((x) << 4) /* Data Timeout Multiplier */
-#define ATMCI_SDCR 0x000c /* SD Card / SDIO */
-# define ATMCI_SDCSEL_SLOT_A ( 0 << 0) /* Select SD slot A */
-# define ATMCI_SDCSEL_SLOT_B ( 1 << 0) /* Select SD slot A */
-# define ATMCI_SDCSEL_MASK ( 3 << 0)
-# define ATMCI_SDCBUS_1BIT ( 0 << 6) /* 1-bit data bus */
-# define ATMCI_SDCBUS_4BIT ( 2 << 6) /* 4-bit data bus */
-# define ATMCI_SDCBUS_8BIT ( 3 << 6) /* 8-bit data bus[2] */
-# define ATMCI_SDCBUS_MASK ( 3 << 6)
-#define ATMCI_ARGR 0x0010 /* Command Argument */
-#define ATMCI_CMDR 0x0014 /* Command */
-# define ATMCI_CMDR_CMDNB(x) ((x) << 0) /* Command Opcode */
-# define ATMCI_CMDR_RSPTYP_NONE ( 0 << 6) /* No response */
-# define ATMCI_CMDR_RSPTYP_48BIT ( 1 << 6) /* 48-bit response */
-# define ATMCI_CMDR_RSPTYP_136BIT ( 2 << 6) /* 136-bit response */
-# define ATMCI_CMDR_SPCMD_INIT ( 1 << 8) /* Initialization command */
-# define ATMCI_CMDR_SPCMD_SYNC ( 2 << 8) /* Synchronized command */
-# define ATMCI_CMDR_SPCMD_INT ( 4 << 8) /* Interrupt command */
-# define ATMCI_CMDR_SPCMD_INTRESP ( 5 << 8) /* Interrupt response */
-# define ATMCI_CMDR_OPDCMD ( 1 << 11) /* Open Drain */
-# define ATMCI_CMDR_MAXLAT_5CYC ( 0 << 12) /* Max latency 5 cycles */
-# define ATMCI_CMDR_MAXLAT_64CYC ( 1 << 12) /* Max latency 64 cycles */
-# define ATMCI_CMDR_START_XFER ( 1 << 16) /* Start data transfer */
-# define ATMCI_CMDR_STOP_XFER ( 2 << 16) /* Stop data transfer */
-# define ATMCI_CMDR_TRDIR_WRITE ( 0 << 18) /* Write data */
-# define ATMCI_CMDR_TRDIR_READ ( 1 << 18) /* Read data */
-# define ATMCI_CMDR_BLOCK ( 0 << 19) /* Single-block transfer */
-# define ATMCI_CMDR_MULTI_BLOCK ( 1 << 19) /* Multi-block transfer */
-# define ATMCI_CMDR_STREAM ( 2 << 19) /* MMC Stream transfer */
-# define ATMCI_CMDR_SDIO_BYTE ( 4 << 19) /* SDIO Byte transfer */
-# define ATMCI_CMDR_SDIO_BLOCK ( 5 << 19) /* SDIO Block transfer */
-# define ATMCI_CMDR_SDIO_SUSPEND ( 1 << 24) /* SDIO Suspend Command */
-# define ATMCI_CMDR_SDIO_RESUME ( 2 << 24) /* SDIO Resume Command */
-#define ATMCI_BLKR 0x0018 /* Block */
-# define ATMCI_BCNT(x) ((x) << 0) /* Data Block Count */
-# define ATMCI_BLKLEN(x) ((x) << 16) /* Data Block Length */
-#define ATMCI_CSTOR 0x001c /* Completion Signal Timeout[2] */
-# define ATMCI_CSTOCYC(x) ((x) << 0) /* CST cycles */
-# define ATMCI_CSTOMUL(x) ((x) << 4) /* CST multiplier */
-#define ATMCI_RSPR 0x0020 /* Response 0 */
-#define ATMCI_RSPR1 0x0024 /* Response 1 */
-#define ATMCI_RSPR2 0x0028 /* Response 2 */
-#define ATMCI_RSPR3 0x002c /* Response 3 */
-#define ATMCI_RDR 0x0030 /* Receive Data */
-#define ATMCI_TDR 0x0034 /* Transmit Data */
-#define ATMCI_SR 0x0040 /* Status */
-#define ATMCI_IER 0x0044 /* Interrupt Enable */
-#define ATMCI_IDR 0x0048 /* Interrupt Disable */
-#define ATMCI_IMR 0x004c /* Interrupt Mask */
-# define ATMCI_CMDRDY ( 1 << 0) /* Command Ready */
-# define ATMCI_RXRDY ( 1 << 1) /* Receiver Ready */
-# define ATMCI_TXRDY ( 1 << 2) /* Transmitter Ready */
-# define ATMCI_BLKE ( 1 << 3) /* Data Block Ended */
-# define ATMCI_DTIP ( 1 << 4) /* Data Transfer In Progress */
-# define ATMCI_NOTBUSY ( 1 << 5) /* Data Not Busy */
-# define ATMCI_ENDRX ( 1 << 6) /* End of RX Buffer */
-# define ATMCI_ENDTX ( 1 << 7) /* End of TX Buffer */
-# define ATMCI_SDIOIRQA ( 1 << 8) /* SDIO IRQ in slot A */
-# define ATMCI_SDIOIRQB ( 1 << 9) /* SDIO IRQ in slot B */
-# define ATMCI_SDIOWAIT ( 1 << 12) /* SDIO Read Wait Operation Status */
-# define ATMCI_CSRCV ( 1 << 13) /* CE-ATA Completion Signal Received */
-# define ATMCI_RXBUFF ( 1 << 14) /* RX Buffer Full */
-# define ATMCI_TXBUFE ( 1 << 15) /* TX Buffer Empty */
-# define ATMCI_RINDE ( 1 << 16) /* Response Index Error */
-# define ATMCI_RDIRE ( 1 << 17) /* Response Direction Error */
-# define ATMCI_RCRCE ( 1 << 18) /* Response CRC Error */
-# define ATMCI_RENDE ( 1 << 19) /* Response End Bit Error */
-# define ATMCI_RTOE ( 1 << 20) /* Response Time-Out Error */
-# define ATMCI_DCRCE ( 1 << 21) /* Data CRC Error */
-# define ATMCI_DTOE ( 1 << 22) /* Data Time-Out Error */
-# define ATMCI_CSTOE ( 1 << 23) /* Completion Signal Time-out Error */
-# define ATMCI_BLKOVRE ( 1 << 24) /* DMA Block Overrun Error */
-# define ATMCI_DMADONE ( 1 << 25) /* DMA Transfer Done */
-# define ATMCI_FIFOEMPTY ( 1 << 26) /* FIFO Empty Flag */
-# define ATMCI_XFRDONE ( 1 << 27) /* Transfer Done Flag */
-# define ATMCI_ACKRCV ( 1 << 28) /* Boot Operation Acknowledge Received */
-# define ATMCI_ACKRCVE ( 1 << 29) /* Boot Operation Acknowledge Error */
-# define ATMCI_OVRE ( 1 << 30) /* RX Overrun Error */
-# define ATMCI_UNRE ( 1 << 31) /* TX Underrun Error */
-#define ATMCI_DMA 0x0050 /* DMA Configuration[2] */
-# define ATMCI_DMA_OFFSET(x) ((x) << 0) /* DMA Write Buffer Offset */
-# define ATMCI_DMA_CHKSIZE(x) ((x) << 4) /* DMA Channel Read and Write Chunk Size */
-# define ATMCI_DMAEN ( 1 << 8) /* DMA Hardware Handshaking Enable */
-#define ATMCI_CFG 0x0054 /* Configuration[2] */
-# define ATMCI_CFG_FIFOMODE_1DATA ( 1 << 0) /* MCI Internal FIFO control mode */
-# define ATMCI_CFG_FERRCTRL_COR ( 1 << 4) /* Flow Error flag reset control mode */
-# define ATMCI_CFG_HSMODE ( 1 << 8) /* High Speed Mode */
-# define ATMCI_CFG_LSYNC ( 1 << 12) /* Synchronize on the last block */
-#define ATMCI_WPMR 0x00e4 /* Write Protection Mode[2] */
-# define ATMCI_WP_EN ( 1 << 0) /* WP Enable */
-# define ATMCI_WP_KEY (0x4d4349 << 8) /* WP Key */
-#define ATMCI_WPSR 0x00e8 /* Write Protection Status[2] */
-# define ATMCI_GET_WP_VS(x) ((x) & 0x0f)
-# define ATMCI_GET_WP_VSRC(x) (((x) >> 8) & 0xffff)
-#define ATMCI_VERSION 0x00FC /* Version */
-#define ATMCI_FIFO_APERTURE 0x0200 /* FIFO Aperture[2] */
-
-/* This is not including the FIFO Aperture on MCI2 */
-#define ATMCI_REGS_SIZE 0x100
-
-/* Register access macros */
-#ifdef CONFIG_AVR32
-#define atmci_readl(port, reg) \
- __raw_readl((port)->regs + reg)
-#define atmci_writel(port, reg, value) \
- __raw_writel((value), (port)->regs + reg)
-#else
-#define atmci_readl(port, reg) \
- readl_relaxed((port)->regs + reg)
-#define atmci_writel(port, reg, value) \
- writel_relaxed((value), (port)->regs + reg)
-#endif
-
-/* On AVR chips the Peripheral DMA Controller is not connected to MCI. */
-#ifdef CONFIG_AVR32
-# define ATMCI_PDC_CONNECTED 0
-#else
-# define ATMCI_PDC_CONNECTED 1
-#endif
-
-/*
- * Fix sconfig's burst size according to atmel MCI. We need to convert them as:
- * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
- *
- * This can be done by finding most significant bit set.
- */
-static inline unsigned int atmci_convert_chksize(unsigned int maxburst)
-{
- if (maxburst > 1)
- return fls(maxburst) - 2;
- else
- return 0;
-}
-
-#endif /* __DRIVERS_MMC_ATMEL_MCI_H__ */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index bf62e429f7fc..a36ebdae2388 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -44,7 +44,141 @@
#include <asm/io.h>
#include <asm/unaligned.h>
-#include "atmel-mci-regs.h"
+/*
+ * Superset of MCI IP registers integrated in Atmel AVR32 and AT91 Processors
+ * Registers and bitfields marked with [2] are only available in MCI2
+ */
+
+/* MCI Register Definitions */
+#define ATMCI_CR 0x0000 /* Control */
+#define ATMCI_CR_MCIEN BIT(0) /* MCI Enable */
+#define ATMCI_CR_MCIDIS BIT(1) /* MCI Disable */
+#define ATMCI_CR_PWSEN BIT(2) /* Power Save Enable */
+#define ATMCI_CR_PWSDIS BIT(3) /* Power Save Disable */
+#define ATMCI_CR_SWRST BIT(7) /* Software Reset */
+#define ATMCI_MR 0x0004 /* Mode */
+#define ATMCI_MR_CLKDIV(x) ((x) << 0) /* Clock Divider */
+#define ATMCI_MR_PWSDIV(x) ((x) << 8) /* Power Saving Divider */
+#define ATMCI_MR_RDPROOF BIT(11) /* Read Proof */
+#define ATMCI_MR_WRPROOF BIT(12) /* Write Proof */
+#define ATMCI_MR_PDCFBYTE BIT(13) /* Force Byte Transfer */
+#define ATMCI_MR_PDCPADV BIT(14) /* Padding Value */
+#define ATMCI_MR_PDCMODE BIT(15) /* PDC-oriented Mode */
+#define ATMCI_MR_CLKODD(x) ((x) << 16) /* LSB of Clock Divider */
+#define ATMCI_DTOR 0x0008 /* Data Timeout */
+#define ATMCI_DTOCYC(x) ((x) << 0) /* Data Timeout Cycles */
+#define ATMCI_DTOMUL(x) ((x) << 4) /* Data Timeout Multiplier */
+#define ATMCI_SDCR 0x000c /* SD Card / SDIO */
+#define ATMCI_SDCSEL_SLOT_A (0 << 0) /* Select SD slot A */
+#define ATMCI_SDCSEL_SLOT_B (1 << 0) /* Select SD slot A */
+#define ATMCI_SDCSEL_MASK (3 << 0)
+#define ATMCI_SDCBUS_1BIT (0 << 6) /* 1-bit data bus */
+#define ATMCI_SDCBUS_4BIT (2 << 6) /* 4-bit data bus */
+#define ATMCI_SDCBUS_8BIT (3 << 6) /* 8-bit data bus[2] */
+#define ATMCI_SDCBUS_MASK (3 << 6)
+#define ATMCI_ARGR 0x0010 /* Command Argument */
+#define ATMCI_CMDR 0x0014 /* Command */
+#define ATMCI_CMDR_CMDNB(x) ((x) << 0) /* Command Opcode */
+#define ATMCI_CMDR_RSPTYP_NONE (0 << 6) /* No response */
+#define ATMCI_CMDR_RSPTYP_48BIT (1 << 6) /* 48-bit response */
+#define ATMCI_CMDR_RSPTYP_136BIT (2 << 6) /* 136-bit response */
+#define ATMCI_CMDR_SPCMD_INIT (1 << 8) /* Initialization command */
+#define ATMCI_CMDR_SPCMD_SYNC (2 << 8) /* Synchronized command */
+#define ATMCI_CMDR_SPCMD_INT (4 << 8) /* Interrupt command */
+#define ATMCI_CMDR_SPCMD_INTRESP (5 << 8) /* Interrupt response */
+#define ATMCI_CMDR_OPDCMD (1 << 11) /* Open Drain */
+#define ATMCI_CMDR_MAXLAT_5CYC (0 << 12) /* Max latency 5 cycles */
+#define ATMCI_CMDR_MAXLAT_64CYC (1 << 12) /* Max latency 64 cycles */
+#define ATMCI_CMDR_START_XFER (1 << 16) /* Start data transfer */
+#define ATMCI_CMDR_STOP_XFER (2 << 16) /* Stop data transfer */
+#define ATMCI_CMDR_TRDIR_WRITE (0 << 18) /* Write data */
+#define ATMCI_CMDR_TRDIR_READ (1 << 18) /* Read data */
+#define ATMCI_CMDR_BLOCK (0 << 19) /* Single-block transfer */
+#define ATMCI_CMDR_MULTI_BLOCK (1 << 19) /* Multi-block transfer */
+#define ATMCI_CMDR_STREAM (2 << 19) /* MMC Stream transfer */
+#define ATMCI_CMDR_SDIO_BYTE (4 << 19) /* SDIO Byte transfer */
+#define ATMCI_CMDR_SDIO_BLOCK (5 << 19) /* SDIO Block transfer */
+#define ATMCI_CMDR_SDIO_SUSPEND (1 << 24) /* SDIO Suspend Command */
+#define ATMCI_CMDR_SDIO_RESUME (2 << 24) /* SDIO Resume Command */
+#define ATMCI_BLKR 0x0018 /* Block */
+#define ATMCI_BCNT(x) ((x) << 0) /* Data Block Count */
+#define ATMCI_BLKLEN(x) ((x) << 16) /* Data Block Length */
+#define ATMCI_CSTOR 0x001c /* Completion Signal Timeout[2] */
+#define ATMCI_CSTOCYC(x) ((x) << 0) /* CST cycles */
+#define ATMCI_CSTOMUL(x) ((x) << 4) /* CST multiplier */
+#define ATMCI_RSPR 0x0020 /* Response 0 */
+#define ATMCI_RSPR1 0x0024 /* Response 1 */
+#define ATMCI_RSPR2 0x0028 /* Response 2 */
+#define ATMCI_RSPR3 0x002c /* Response 3 */
+#define ATMCI_RDR 0x0030 /* Receive Data */
+#define ATMCI_TDR 0x0034 /* Transmit Data */
+#define ATMCI_SR 0x0040 /* Status */
+#define ATMCI_IER 0x0044 /* Interrupt Enable */
+#define ATMCI_IDR 0x0048 /* Interrupt Disable */
+#define ATMCI_IMR 0x004c /* Interrupt Mask */
+#define ATMCI_CMDRDY BIT(0) /* Command Ready */
+#define ATMCI_RXRDY BIT(1) /* Receiver Ready */
+#define ATMCI_TXRDY BIT(2) /* Transmitter Ready */
+#define ATMCI_BLKE BIT(3) /* Data Block Ended */
+#define ATMCI_DTIP BIT(4) /* Data Transfer In Progress */
+#define ATMCI_NOTBUSY BIT(5) /* Data Not Busy */
+#define ATMCI_ENDRX BIT(6) /* End of RX Buffer */
+#define ATMCI_ENDTX BIT(7) /* End of TX Buffer */
+#define ATMCI_SDIOIRQA BIT(8) /* SDIO IRQ in slot A */
+#define ATMCI_SDIOIRQB BIT(9) /* SDIO IRQ in slot B */
+#define ATMCI_SDIOWAIT BIT(12) /* SDIO Read Wait Operation Status */
+#define ATMCI_CSRCV BIT(13) /* CE-ATA Completion Signal Received */
+#define ATMCI_RXBUFF BIT(14) /* RX Buffer Full */
+#define ATMCI_TXBUFE BIT(15) /* TX Buffer Empty */
+#define ATMCI_RINDE BIT(16) /* Response Index Error */
+#define ATMCI_RDIRE BIT(17) /* Response Direction Error */
+#define ATMCI_RCRCE BIT(18) /* Response CRC Error */
+#define ATMCI_RENDE BIT(19) /* Response End Bit Error */
+#define ATMCI_RTOE BIT(20) /* Response Time-Out Error */
+#define ATMCI_DCRCE BIT(21) /* Data CRC Error */
+#define ATMCI_DTOE BIT(22) /* Data Time-Out Error */
+#define ATMCI_CSTOE BIT(23) /* Completion Signal Time-out Error */
+#define ATMCI_BLKOVRE BIT(24) /* DMA Block Overrun Error */
+#define ATMCI_DMADONE BIT(25) /* DMA Transfer Done */
+#define ATMCI_FIFOEMPTY BIT(26) /* FIFO Empty Flag */
+#define ATMCI_XFRDONE BIT(27) /* Transfer Done Flag */
+#define ATMCI_ACKRCV BIT(28) /* Boot Operation Acknowledge Received */
+#define ATMCI_ACKRCVE BIT(29) /* Boot Operation Acknowledge Error */
+#define ATMCI_OVRE BIT(30) /* RX Overrun Error */
+#define ATMCI_UNRE BIT(31) /* TX Underrun Error */
+#define ATMCI_DMA 0x0050 /* DMA Configuration[2] */
+#define ATMCI_DMA_OFFSET(x) ((x) << 0) /* DMA Write Buffer Offset */
+#define ATMCI_DMA_CHKSIZE(x) ((x) << 4) /* DMA Channel Read and Write Chunk Size */
+#define ATMCI_DMAEN BIT(8) /* DMA Hardware Handshaking Enable */
+#define ATMCI_CFG 0x0054 /* Configuration[2] */
+#define ATMCI_CFG_FIFOMODE_1DATA BIT(0) /* MCI Internal FIFO control mode */
+#define ATMCI_CFG_FERRCTRL_COR BIT(4) /* Flow Error flag reset control mode */
+#define ATMCI_CFG_HSMODE BIT(8) /* High Speed Mode */
+#define ATMCI_CFG_LSYNC BIT(12) /* Synchronize on the last block */
+#define ATMCI_WPMR 0x00e4 /* Write Protection Mode[2] */
+#define ATMCI_WP_EN BIT(0) /* WP Enable */
+#define ATMCI_WP_KEY (0x4d4349 << 8) /* WP Key */
+#define ATMCI_WPSR 0x00e8 /* Write Protection Status[2] */
+#define ATMCI_GET_WP_VS(x) ((x) & 0x0f)
+#define ATMCI_GET_WP_VSRC(x) (((x) >> 8) & 0xffff)
+#define ATMCI_VERSION 0x00FC /* Version */
+#define ATMCI_FIFO_APERTURE 0x0200 /* FIFO Aperture[2] */
+
+/* This is not including the FIFO Aperture on MCI2 */
+#define ATMCI_REGS_SIZE 0x100
+
+/* Register access macros */
+#define atmci_readl(port, reg) \
+ __raw_readl((port)->regs + reg)
+#define atmci_writel(port, reg, value) \
+ __raw_writel((value), (port)->regs + reg)
+
+/* On AVR chips the Peripheral DMA Controller is not connected to MCI. */
+#ifdef CONFIG_AVR32
+# define ATMCI_PDC_CONNECTED 0
+#else
+# define ATMCI_PDC_CONNECTED 1
+#endif
#define AUTOSUSPEND_DELAY 50
@@ -584,6 +718,29 @@ static inline unsigned int atmci_get_version(struct atmel_mci *host)
return atmci_readl(host, ATMCI_VERSION) & 0x00000fff;
}
+/*
+ * Fix sconfig's burst size according to atmel MCI. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+ * With version 0x600, we need to convert them as: 1 -> 0, 2 -> 1, 4 -> 2,
+ * 8 -> 3, 16 -> 4.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline unsigned int atmci_convert_chksize(struct atmel_mci *host,
+ unsigned int maxburst)
+{
+ unsigned int version = atmci_get_version(host);
+ unsigned int offset = 2;
+
+ if (version >= 0x600)
+ offset = 1;
+
+ if (maxburst > 1)
+ return fls(maxburst) - offset;
+ else
+ return 0;
+}
+
static void atmci_timeout_timer(unsigned long data)
{
struct atmel_mci *host;
@@ -1034,11 +1191,13 @@ atmci_prepare_data_dma(struct atmel_mci *host, struct mmc_data *data)
if (data->flags & MMC_DATA_READ) {
direction = DMA_FROM_DEVICE;
host->dma_conf.direction = slave_dirn = DMA_DEV_TO_MEM;
- maxburst = atmci_convert_chksize(host->dma_conf.src_maxburst);
+ maxburst = atmci_convert_chksize(host,
+ host->dma_conf.src_maxburst);
} else {
direction = DMA_TO_DEVICE;
host->dma_conf.direction = slave_dirn = DMA_MEM_TO_DEV;
- maxburst = atmci_convert_chksize(host->dma_conf.dst_maxburst);
+ maxburst = atmci_convert_chksize(host,
+ host->dma_conf.dst_maxburst);
}
if (host->caps.has_dma_conf_reg)
diff --git a/drivers/mmc/host/cb710-mmc.h b/drivers/mmc/host/cb710-mmc.h
index 8984ec878fc9..8ecd9e56636a 100644
--- a/drivers/mmc/host/cb710-mmc.h
+++ b/drivers/mmc/host/cb710-mmc.h
@@ -29,8 +29,7 @@ static inline struct mmc_host *cb710_slot_to_mmc(struct cb710_slot *slot)
static inline struct cb710_slot *cb710_mmc_to_slot(struct mmc_host *mmc)
{
- struct platform_device *pdev = container_of(mmc_dev(mmc),
- struct platform_device, dev);
+ struct platform_device *pdev = to_platform_device(mmc_dev(mmc));
return cb710_pdev_to_slot(pdev);
}
diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index 7e1d13b68b06..81bdeeb05a4d 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -60,7 +60,7 @@ int dw_mci_pltfm_register(struct platform_device *pdev,
regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
/* Get registers' physical base address */
- host->phy_regs = (void *)(regs->start);
+ host->phy_regs = regs->start;
host->regs = devm_ioremap_resource(&pdev->dev, regs);
if (IS_ERR(host->regs))
return PTR_ERR(host->regs);
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 9becebeeccd1..d9c92f31da64 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -239,20 +239,12 @@ static int dw_mci_rockchip_init(struct dw_mci *host)
return 0;
}
-/* Common capabilities of RK3288 SoC */
-static unsigned long dw_mci_rk3288_dwmmc_caps[4] = {
- MMC_CAP_RUNTIME_RESUME, /* emmc */
- MMC_CAP_RUNTIME_RESUME, /* sdmmc */
- MMC_CAP_RUNTIME_RESUME, /* sdio0 */
- MMC_CAP_RUNTIME_RESUME, /* sdio1 */
-};
static const struct dw_mci_drv_data rk2928_drv_data = {
.prepare_command = dw_mci_rockchip_prepare_command,
.init = dw_mci_rockchip_init,
};
static const struct dw_mci_drv_data rk3288_drv_data = {
- .caps = dw_mci_rk3288_dwmmc_caps,
.prepare_command = dw_mci_rockchip_prepare_command,
.set_ios = dw_mci_rk3288_set_ios,
.execute_tuning = dw_mci_rk3288_execute_tuning,
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 7a6cedbe48a8..712835177e8b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -699,7 +699,7 @@ static int dw_mci_edmac_start_dma(struct dw_mci *host,
int ret = 0;
/* Set external dma config: burst size, burst width */
- cfg.dst_addr = (dma_addr_t)(host->phy_regs + fifo_offset);
+ cfg.dst_addr = host->phy_regs + fifo_offset;
cfg.src_addr = cfg.dst_addr;
cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1634,12 +1634,6 @@ static int dw_mci_command_complete(struct dw_mci *host, struct mmc_command *cmd)
else
cmd->error = 0;
- if (cmd->error) {
- /* newer ip versions need a delay between retries */
- if (host->quirks & DW_MCI_QUIRK_RETRY_DELAY)
- mdelay(20);
- }
-
return cmd->error;
}
@@ -2355,16 +2349,6 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
pending = mci_readl(host, MINTSTS); /* read-only mask reg */
- /*
- * DTO fix - version 2.10a and below, and only if internal DMA
- * is configured.
- */
- if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
- if (!pending &&
- ((mci_readl(host, STATUS) >> 17) & 0x1fff))
- pending |= SDMMC_INT_DATA_OVER;
- }
-
if (pending) {
/* Check volt switch first, since it can look like an error */
if ((host->state == STATE_SENDING_CMD11) &&
@@ -3165,9 +3149,6 @@ int dw_mci_probe(struct dw_mci *host)
/* Now that slots are all setup, we can enable card detect */
dw_mci_enable_cd(host);
- if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
- dev_info(host->dev, "Internal DMAC interrupt fix enabled.\n");
-
return 0;
err_dmaunmap:
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 33dfd7e72516..82a97ac4e956 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -972,7 +972,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
if ((events & MSDC_INT_XFER_COMPL) && (!stop || !stop->error)) {
data->bytes_xfered = data->blocks * data->blksz;
} else {
- dev_err(host->dev, "interrupt events: %x\n", events);
+ dev_dbg(host->dev, "interrupt events: %x\n", events);
msdc_reset_hw(host);
host->error |= REQ_DAT_ERR;
data->bytes_xfered = 0;
@@ -982,10 +982,10 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
else if (events & MSDC_INT_DATCRCERR)
data->error = -EILSEQ;
- dev_err(host->dev, "%s: cmd=%d; blocks=%d",
+ dev_dbg(host->dev, "%s: cmd=%d; blocks=%d",
__func__, mrq->cmd->opcode, data->blocks);
- dev_err(host->dev, "data_error=%d xfer_size=%d\n",
- (int)data->error, data->bytes_xfered);
+ dev_dbg(host->dev, "data_error=%d xfer_size=%d\n",
+ (int)data->error, data->bytes_xfered);
}
msdc_data_xfer_next(host, mrq, data);
@@ -1543,7 +1543,6 @@ static int msdc_drv_probe(struct platform_device *pdev)
mmc->f_min = host->src_clk_freq / (4 * 255);
mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
- mmc->caps |= MMC_CAP_RUNTIME_RESUME;
/* MMC core transfer sizes tunable parameters */
mmc->max_segs = MAX_BD_NUM;
mmc->max_seg_size = BDMA_DESC_BUFLEN;
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index a448498e3af2..42296e55b9de 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -20,15 +20,12 @@
#include <linux/scatterlist.h>
#include <linux/irq.h>
#include <linux/clk.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/mmc/host.h>
#include <linux/mmc/slot-gpio.h>
#include <asm/sizes.h>
#include <asm/unaligned.h>
-#include <linux/platform_data/mmc-mvsdio.h>
#include "mvsdio.h"
@@ -704,6 +701,10 @@ static int mvsd_probe(struct platform_device *pdev)
struct resource *r;
int ret, irq;
+ if (!np) {
+ dev_err(&pdev->dev, "no DT node\n");
+ return -ENODEV;
+ }
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
if (!r || irq < 0)
@@ -727,8 +728,12 @@ static int mvsd_probe(struct platform_device *pdev)
* fixed rate clock).
*/
host->clk = devm_clk_get(&pdev->dev, NULL);
- if (!IS_ERR(host->clk))
- clk_prepare_enable(host->clk);
+ if (IS_ERR(host->clk)) {
+ dev_err(&pdev->dev, "no clock associated\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ clk_prepare_enable(host->clk);
mmc->ops = &mvsd_ops;
@@ -744,45 +749,10 @@ static int mvsd_probe(struct platform_device *pdev)
mmc->max_seg_size = mmc->max_blk_size * mmc->max_blk_count;
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
- if (np) {
- if (IS_ERR(host->clk)) {
- dev_err(&pdev->dev, "DT platforms must have a clock associated\n");
- ret = -EINVAL;
- goto out;
- }
-
- host->base_clock = clk_get_rate(host->clk) / 2;
- ret = mmc_of_parse(mmc);
- if (ret < 0)
- goto out;
- } else {
- const struct mvsdio_platform_data *mvsd_data;
-
- mvsd_data = pdev->dev.platform_data;
- if (!mvsd_data) {
- ret = -ENXIO;
- goto out;
- }
- mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ |
- MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
- host->base_clock = mvsd_data->clock / 2;
- /* GPIO 0 regarded as invalid for backward compatibility */
- if (mvsd_data->gpio_card_detect &&
- gpio_is_valid(mvsd_data->gpio_card_detect)) {
- ret = mmc_gpio_request_cd(mmc,
- mvsd_data->gpio_card_detect,
- 0);
- if (ret)
- goto out;
- } else {
- mmc->caps |= MMC_CAP_NEEDS_POLL;
- }
-
- if (mvsd_data->gpio_write_protect &&
- gpio_is_valid(mvsd_data->gpio_write_protect))
- mmc_gpio_request_ro(mmc, mvsd_data->gpio_write_protect);
- }
-
+ host->base_clock = clk_get_rate(host->clk) / 2;
+ ret = mmc_of_parse(mmc);
+ if (ret < 0)
+ goto out;
if (maxfreq)
mmc->f_max = maxfreq;
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index 6e218fb1a669..660170cd04d9 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -55,8 +55,8 @@ static int of_mmc_spi_init(struct device *dev,
{
struct of_mmc_spi *oms = to_of_mmc_spi(dev);
- return request_threaded_irq(oms->detect_irq, NULL, irqhandler, 0,
- dev_name(dev), mmc);
+ return request_threaded_irq(oms->detect_irq, NULL, irqhandler,
+ IRQF_ONESHOT, dev_name(dev), mmc);
}
static void of_mmc_spi_exit(struct device *dev, void *mmc)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 7fb0753abe30..b6639ea0bf18 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2250,10 +2250,8 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
pm_runtime_get_sync(host->dev);
mmc_remove_host(host->mmc);
- if (host->tx_chan)
- dma_release_channel(host->tx_chan);
- if (host->rx_chan)
- dma_release_channel(host->rx_chan);
+ dma_release_channel(host->tx_chan);
+ dma_release_channel(host->rx_chan);
pm_runtime_put_sync(host->dev);
pm_runtime_disable(host->dev);
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 1f1582f6cccb..f25f29253595 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -76,6 +76,7 @@
#define ESDHC_STD_TUNING_EN (1 << 24)
/* NOTE: the minimum valid tuning start tap for mx6sl is 1 */
#define ESDHC_TUNING_START_TAP 0x1
+#define ESDHC_TUNING_STEP_MASK 0x00070000
#define ESDHC_TUNING_STEP_SHIFT 16
/* pinctrl state */
@@ -489,9 +490,11 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
m |= ESDHC_MIX_CTRL_FBCLK_SEL;
tuning_ctrl = readl(host->ioaddr + ESDHC_TUNING_CTRL);
tuning_ctrl |= ESDHC_STD_TUNING_EN | ESDHC_TUNING_START_TAP;
- if (imx_data->boarddata.tuning_step)
+ if (imx_data->boarddata.tuning_step) {
+ tuning_ctrl &= ~ESDHC_TUNING_STEP_MASK;
tuning_ctrl |= imx_data->boarddata.tuning_step << ESDHC_TUNING_STEP_SHIFT;
- writel(tuning_ctrl, host->ioaddr + ESDHC_TUNING_CTRL);
+ }
+ writel(tuning_ctrl, host->ioaddr + ESDHC_TUNING_CTRL);
} else {
v &= ~ESDHC_MIX_CTRL_EXE_TUNE;
}
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 06d0b50dfe71..7e7d8f0c9438 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -21,6 +21,8 @@
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
#include "sdhci-pltfm.h"
@@ -51,6 +53,60 @@ static const struct of_device_id sdhci_at91_dt_match[] = {
{}
};
+#ifdef CONFIG_PM
+static int sdhci_at91_runtime_suspend(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_at91_priv *priv = pltfm_host->priv;
+ int ret;
+
+ ret = sdhci_runtime_suspend_host(host);
+
+ clk_disable_unprepare(priv->gck);
+ clk_disable_unprepare(priv->hclock);
+ clk_disable_unprepare(priv->mainck);
+
+ return ret;
+}
+
+static int sdhci_at91_runtime_resume(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_at91_priv *priv = pltfm_host->priv;
+ int ret;
+
+ ret = clk_prepare_enable(priv->mainck);
+ if (ret) {
+ dev_err(dev, "can't enable mainck\n");
+ return ret;
+ }
+
+ ret = clk_prepare_enable(priv->hclock);
+ if (ret) {
+ dev_err(dev, "can't enable hclock\n");
+ return ret;
+ }
+
+ ret = clk_prepare_enable(priv->gck);
+ if (ret) {
+ dev_err(dev, "can't enable gck\n");
+ return ret;
+ }
+
+ return sdhci_runtime_resume_host(host);
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops sdhci_at91_dev_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(sdhci_at91_runtime_suspend,
+ sdhci_at91_runtime_resume,
+ NULL)
+};
+
static int sdhci_at91_probe(struct platform_device *pdev)
{
const struct of_device_id *match;
@@ -144,12 +200,23 @@ static int sdhci_at91_probe(struct platform_device *pdev)
sdhci_get_of_property(pdev);
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
+ pm_runtime_use_autosuspend(&pdev->dev);
+
ret = sdhci_add_host(host);
if (ret)
- goto clocks_disable_unprepare;
+ goto pm_runtime_disable;
+
+ pm_runtime_put_autosuspend(&pdev->dev);
return 0;
+pm_runtime_disable:
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
clocks_disable_unprepare:
clk_disable_unprepare(priv->gck);
clk_disable_unprepare(priv->mainck);
@@ -165,6 +232,10 @@ static int sdhci_at91_remove(struct platform_device *pdev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_at91_priv *priv = pltfm_host->priv;
+ pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+
sdhci_pltfm_unregister(pdev);
clk_disable_unprepare(priv->gck);
@@ -178,7 +249,7 @@ static struct platform_driver sdhci_at91_driver = {
.driver = {
.name = "sdhci-at91",
.of_match_table = sdhci_at91_dt_match,
- .pm = SDHCI_PLTFM_PMOPS,
+ .pm = &sdhci_at91_dev_pm_ops,
},
.probe = sdhci_at91_probe,
.remove = sdhci_at91_remove,
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 90e94a028a49..83b1226471c1 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -584,6 +584,8 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
{
struct sdhci_host *host;
struct device_node *np;
+ struct sdhci_pltfm_host *pltfm_host;
+ struct sdhci_esdhc *esdhc;
int ret;
np = pdev->dev.of_node;
@@ -600,6 +602,14 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
sdhci_get_of_property(pdev);
+ pltfm_host = sdhci_priv(host);
+ esdhc = pltfm_host->priv;
+ if (esdhc->vendor_ver == VENDOR_V_22)
+ host->quirks2 |= SDHCI_QUIRK2_HOST_NO_CMD23;
+
+ if (esdhc->vendor_ver > VENDOR_V_22)
+ host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ;
+
if (of_device_is_compatible(np, "fsl,p5040-esdhc") ||
of_device_is_compatible(np, "fsl,p5020-esdhc") ||
of_device_is_compatible(np, "fsl,p4080-esdhc") ||
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index cf7ad458b4f4..cc851b065d0a 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -277,7 +277,7 @@ static int spt_select_drive_strength(struct sdhci_host *host,
if (sdhci_pci_spt_drive_strength > 0)
drive_strength = sdhci_pci_spt_drive_strength & 0xf;
else
- drive_strength = 1; /* 33-ohm */
+ drive_strength = 0; /* Default 50-ohm */
if ((mmc_driver_type_mask(drive_strength) & card_drv) == 0)
drive_strength = 0; /* Default 50-ohm */
@@ -1464,7 +1464,7 @@ static int sdhci_pci_resume(struct device *dev)
static int sdhci_pci_runtime_suspend(struct device *dev)
{
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
struct sdhci_pci_chip *chip;
struct sdhci_pci_slot *slot;
int i, ret;
@@ -1500,7 +1500,7 @@ err_pci_runtime_suspend:
static int sdhci_pci_runtime_resume(struct device *dev)
{
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
struct sdhci_pci_chip *chip;
struct sdhci_pci_slot *slot;
int i, ret;
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 87fb5ea8ebe7..072bb27a65cf 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -104,7 +104,8 @@ void sdhci_get_of_property(struct platform_device *pdev)
if (of_find_property(np, "keep-power-in-suspend", NULL))
host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
- if (of_find_property(np, "enable-sdio-wakeup", NULL))
+ if (of_property_read_bool(np, "wakeup-source") ||
+ of_property_read_bool(np, "enable-sdio-wakeup")) /* legacy */
host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
}
#else
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index ad28b49f0203..83c4bf7bc16c 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -22,12 +22,20 @@
#include <linux/of_device.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
#include <linux/mmc/slot-gpio.h>
#include <linux/gpio/consumer.h>
#include "sdhci-pltfm.h"
/* Tegra SDHOST controller vendor register definitions */
+#define SDHCI_TEGRA_VENDOR_CLOCK_CTRL 0x100
+#define SDHCI_CLOCK_CTRL_TAP_MASK 0x00ff0000
+#define SDHCI_CLOCK_CTRL_TAP_SHIFT 16
+#define SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE BIT(5)
+#define SDHCI_CLOCK_CTRL_PADPIPE_CLKEN_OVERRIDE BIT(3)
+#define SDHCI_CLOCK_CTRL_SPI_MODE_CLKEN_OVERRIDE BIT(2)
+
#define SDHCI_TEGRA_VENDOR_MISC_CTRL 0x120
#define SDHCI_MISC_CTRL_ENABLE_SDR104 0x8
#define SDHCI_MISC_CTRL_ENABLE_SDR50 0x10
@@ -37,9 +45,9 @@
#define NVQUIRK_FORCE_SDHCI_SPEC_200 BIT(0)
#define NVQUIRK_ENABLE_BLOCK_GAP_DET BIT(1)
#define NVQUIRK_ENABLE_SDHCI_SPEC_300 BIT(2)
-#define NVQUIRK_DISABLE_SDR50 BIT(3)
-#define NVQUIRK_DISABLE_SDR104 BIT(4)
-#define NVQUIRK_DISABLE_DDR50 BIT(5)
+#define NVQUIRK_ENABLE_SDR50 BIT(3)
+#define NVQUIRK_ENABLE_SDR104 BIT(4)
+#define NVQUIRK_ENABLE_DDR50 BIT(5)
struct sdhci_tegra_soc_data {
const struct sdhci_pltfm_data *pdata;
@@ -49,6 +57,7 @@ struct sdhci_tegra_soc_data {
struct sdhci_tegra {
const struct sdhci_tegra_soc_data *soc_data;
struct gpio_desc *power_gpio;
+ bool ddr_signaling;
};
static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -124,25 +133,33 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = pltfm_host->priv;
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
- u32 misc_ctrl;
+ u32 misc_ctrl, clk_ctrl;
sdhci_reset(host, mask);
if (!(mask & SDHCI_RESET_ALL))
return;
- misc_ctrl = sdhci_readw(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+ misc_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
/* Erratum: Enable SDHCI spec v3.00 support */
if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
- /* Don't advertise UHS modes which aren't supported yet */
- if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR50)
- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR50;
- if (soc_data->nvquirks & NVQUIRK_DISABLE_DDR50)
- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_DDR50;
- if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR104)
- misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR104;
- sdhci_writew(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+ /* Advertise UHS modes as supported by host */
+ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50)
+ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR50;
+ if (soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
+ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50;
+ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104)
+ misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104;
+ sdhci_writel(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+
+ clk_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+ clk_ctrl &= ~SDHCI_CLOCK_CTRL_SPI_MODE_CLKEN_OVERRIDE;
+ if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50)
+ clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE;
+ sdhci_writel(host, clk_ctrl, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+
+ tegra_host->ddr_signaling = false;
}
static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
@@ -164,15 +181,99 @@ static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
}
+static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_tegra *tegra_host = pltfm_host->priv;
+ unsigned long host_clk;
+
+ if (!clock)
+ return;
+
+ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
+ clk_set_rate(pltfm_host->clk, host_clk);
+ host->max_clk = clk_get_rate(pltfm_host->clk);
+
+ return sdhci_set_clock(host, clock);
+}
+
+static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
+ unsigned timing)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_tegra *tegra_host = pltfm_host->priv;
+
+ if (timing == MMC_TIMING_UHS_DDR50)
+ tegra_host->ddr_signaling = true;
+
+ return sdhci_set_uhs_signaling(host, timing);
+}
+
+static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ /*
+ * DDR modes require the host to run at double the card frequency, so
+ * the maximum rate we can support is half of the module input clock.
+ */
+ return clk_round_rate(pltfm_host->clk, UINT_MAX) / 2;
+}
+
+static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap)
+{
+ u32 reg;
+
+ reg = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+ reg &= ~SDHCI_CLOCK_CTRL_TAP_MASK;
+ reg |= tap << SDHCI_CLOCK_CTRL_TAP_SHIFT;
+ sdhci_writel(host, reg, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
+}
+
+static int tegra_sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
+{
+ unsigned int min, max;
+
+ /*
+ * Start search for minimum tap value at 10, as smaller values are
+ * may wrongly be reported as working but fail at higher speeds,
+ * according to the TRM.
+ */
+ min = 10;
+ while (min < 255) {
+ tegra_sdhci_set_tap(host, min);
+ if (!mmc_send_tuning(host->mmc, opcode, NULL))
+ break;
+ min++;
+ }
+
+ /* Find the maximum tap value that still passes. */
+ max = min + 1;
+ while (max < 255) {
+ tegra_sdhci_set_tap(host, max);
+ if (mmc_send_tuning(host->mmc, opcode, NULL)) {
+ max--;
+ break;
+ }
+ max++;
+ }
+
+ /* The TRM states the ideal tap value is at 75% in the passing range. */
+ tegra_sdhci_set_tap(host, min + ((max - min) * 3 / 4));
+
+ return mmc_send_tuning(host->mmc, opcode, NULL);
+}
+
static const struct sdhci_ops tegra_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
.write_l = tegra_sdhci_writel,
- .set_clock = sdhci_set_clock,
+ .set_clock = tegra_sdhci_set_clock,
.set_bus_width = tegra_sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
- .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .platform_execute_tuning = tegra_sdhci_execute_tuning,
+ .set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
+ .get_max_clock = tegra_sdhci_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
@@ -184,7 +285,7 @@ static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
.ops = &tegra_sdhci_ops,
};
-static struct sdhci_tegra_soc_data soc_data_tegra20 = {
+static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
.pdata = &sdhci_tegra20_pdata,
.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
NVQUIRK_ENABLE_BLOCK_GAP_DET,
@@ -197,14 +298,15 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = {
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra_sdhci_ops,
};
-static struct sdhci_tegra_soc_data soc_data_tegra30 = {
+static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
.pdata = &sdhci_tegra30_pdata,
.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
- NVQUIRK_DISABLE_SDR50 |
- NVQUIRK_DISABLE_SDR104,
+ NVQUIRK_ENABLE_SDR50 |
+ NVQUIRK_ENABLE_SDR104,
};
static const struct sdhci_ops tegra114_sdhci_ops = {
@@ -212,11 +314,12 @@ static const struct sdhci_ops tegra114_sdhci_ops = {
.read_w = tegra_sdhci_readw,
.write_w = tegra_sdhci_writew,
.write_l = tegra_sdhci_writel,
- .set_clock = sdhci_set_clock,
+ .set_clock = tegra_sdhci_set_clock,
.set_bus_width = tegra_sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
- .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .platform_execute_tuning = tegra_sdhci_execute_tuning,
+ .set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
+ .get_max_clock = tegra_sdhci_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
@@ -226,17 +329,34 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra114_sdhci_ops,
};
-static struct sdhci_tegra_soc_data soc_data_tegra114 = {
+static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
.pdata = &sdhci_tegra114_pdata,
- .nvquirks = NVQUIRK_DISABLE_SDR50 |
- NVQUIRK_DISABLE_DDR50 |
- NVQUIRK_DISABLE_SDR104,
+ .nvquirks = NVQUIRK_ENABLE_SDR50 |
+ NVQUIRK_ENABLE_DDR50 |
+ NVQUIRK_ENABLE_SDR104,
+};
+
+static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
+ .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+ SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+ SDHCI_QUIRK_SINGLE_POWER_WRITE |
+ SDHCI_QUIRK_NO_HISPD_BIT |
+ SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+ .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
+ .ops = &tegra114_sdhci_ops,
+};
+
+static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
+ .pdata = &sdhci_tegra210_pdata,
};
static const struct of_device_id sdhci_tegra_dt_match[] = {
+ { .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra114 },
{ .compatible = "nvidia,tegra114-sdhci", .data = &soc_data_tegra114 },
{ .compatible = "nvidia,tegra30-sdhci", .data = &soc_data_tegra30 },
@@ -271,6 +391,7 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
rc = -ENOMEM;
goto err_alloc_tegra_host;
}
+ tegra_host->ddr_signaling = false;
tegra_host->soc_data = soc_data;
pltfm_host->priv = tegra_host;
@@ -278,6 +399,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
if (rc)
goto err_parse_dt;
+ if (tegra_host->soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
+ host->mmc->caps |= MMC_CAP_1_8V_DDR;
+
tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
GPIOD_OUT_HIGH);
if (IS_ERR(tegra_host->power_gpio)) {
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b48565ed5616..d622435d1bcc 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -492,7 +492,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
host->align_buffer, host->align_buffer_sz, direction);
if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
goto fail;
- BUG_ON(host->align_addr & host->align_mask);
+ BUG_ON(host->align_addr & SDHCI_ADMA2_MASK);
host->sg_count = sdhci_pre_dma_transfer(host, data);
if (host->sg_count < 0)
@@ -514,8 +514,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
* the (up to three) bytes that screw up the
* alignment.
*/
- offset = (host->align_sz - (addr & host->align_mask)) &
- host->align_mask;
+ offset = (SDHCI_ADMA2_ALIGN - (addr & SDHCI_ADMA2_MASK)) &
+ SDHCI_ADMA2_MASK;
if (offset) {
if (data->flags & MMC_DATA_WRITE) {
buffer = sdhci_kmap_atomic(sg, &flags);
@@ -529,8 +529,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
BUG_ON(offset > 65536);
- align += host->align_sz;
- align_addr += host->align_sz;
+ align += SDHCI_ADMA2_ALIGN;
+ align_addr += SDHCI_ADMA2_ALIGN;
desc += host->desc_sz;
@@ -540,9 +540,12 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
BUG_ON(len > 65536);
- /* tran, valid */
- sdhci_adma_write_desc(host, desc, addr, len, ADMA2_TRAN_VALID);
- desc += host->desc_sz;
+ if (len) {
+ /* tran, valid */
+ sdhci_adma_write_desc(host, desc, addr, len,
+ ADMA2_TRAN_VALID);
+ desc += host->desc_sz;
+ }
/*
* If this triggers then we have a calculation bug
@@ -608,7 +611,7 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
/* Do a quick scan of the SG list for any unaligned mappings */
has_unaligned = false;
for_each_sg(data->sg, sg, host->sg_count, i)
- if (sg_dma_address(sg) & host->align_mask) {
+ if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
has_unaligned = true;
break;
}
@@ -620,15 +623,15 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
align = host->align_buffer;
for_each_sg(data->sg, sg, host->sg_count, i) {
- if (sg_dma_address(sg) & host->align_mask) {
- size = host->align_sz -
- (sg_dma_address(sg) & host->align_mask);
+ if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
+ size = SDHCI_ADMA2_ALIGN -
+ (sg_dma_address(sg) & SDHCI_ADMA2_MASK);
buffer = sdhci_kmap_atomic(sg, &flags);
memcpy(buffer, align, size);
sdhci_kunmap_atomic(buffer, &flags);
- align += host->align_sz;
+ align += SDHCI_ADMA2_ALIGN;
}
}
}
@@ -768,8 +771,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
if (unlikely(broken)) {
for_each_sg(data->sg, sg, data->sg_len, i) {
if (sg->length & 0x3) {
- DBG("Reverting to PIO because of "
- "transfer size (%d)\n",
+ DBG("Reverting to PIO because of transfer size (%d)\n",
sg->length);
host->flags &= ~SDHCI_REQ_USE_DMA;
break;
@@ -803,8 +805,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
if (unlikely(broken)) {
for_each_sg(data->sg, sg, data->sg_len, i) {
if (sg->offset & 0x3) {
- DBG("Reverting to PIO because of "
- "bad alignment\n");
+ DBG("Reverting to PIO because of bad alignment\n");
host->flags &= ~SDHCI_REQ_USE_DMA;
break;
}
@@ -1016,8 +1017,8 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
while (sdhci_readl(host, SDHCI_PRESENT_STATE) & mask) {
if (timeout == 0) {
- pr_err("%s: Controller never released "
- "inhibit bit(s).\n", mmc_hostname(host->mmc));
+ pr_err("%s: Controller never released inhibit bit(s).\n",
+ mmc_hostname(host->mmc));
sdhci_dumpregs(host);
cmd->error = -EIO;
tasklet_schedule(&host->finish_tasklet);
@@ -1254,8 +1255,8 @@ clock_set:
while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
& SDHCI_CLOCK_INT_STABLE)) {
if (timeout == 0) {
- pr_err("%s: Internal clock never "
- "stabilised.\n", mmc_hostname(host->mmc));
+ pr_err("%s: Internal clock never stabilised.\n",
+ mmc_hostname(host->mmc));
sdhci_dumpregs(host);
return;
}
@@ -1274,19 +1275,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
struct mmc_host *mmc = host->mmc;
u8 pwr = 0;
- if (!IS_ERR(mmc->supply.vmmc)) {
- spin_unlock_irq(&host->lock);
- mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
- spin_lock_irq(&host->lock);
-
- if (mode != MMC_POWER_OFF)
- sdhci_writeb(host, SDHCI_POWER_ON, SDHCI_POWER_CONTROL);
- else
- sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
-
- return;
- }
-
if (mode != MMC_POWER_OFF) {
switch (1 << vdd) {
case MMC_VDD_165_195:
@@ -1301,7 +1289,9 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
pwr = SDHCI_POWER_330;
break;
default:
- BUG();
+ WARN(1, "%s: Invalid vdd %#x\n",
+ mmc_hostname(host->mmc), vdd);
+ break;
}
}
@@ -1345,6 +1335,12 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
mdelay(10);
}
+
+ if (!IS_ERR(mmc->supply.vmmc)) {
+ spin_unlock_irq(&host->lock);
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+ spin_lock_irq(&host->lock);
+ }
}
/*****************************************************************************\
@@ -1540,8 +1536,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
else if (ios->drv_type == MMC_SET_DRIVER_TYPE_D)
ctrl_2 |= SDHCI_CTRL_DRV_TYPE_D;
else {
- pr_warn("%s: invalid driver type, default to "
- "driver type B\n", mmc_hostname(mmc));
+ pr_warn("%s: invalid driver type, default to driver type B\n",
+ mmc_hostname(mmc));
ctrl_2 |= SDHCI_CTRL_DRV_TYPE_B;
}
@@ -2015,10 +2011,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
spin_lock_irqsave(&host->lock, flags);
if (!host->tuning_done) {
- pr_info(DRIVER_NAME ": Timeout waiting for "
- "Buffer Read Ready interrupt during tuning "
- "procedure, falling back to fixed sampling "
- "clock\n");
+ pr_info(DRIVER_NAME ": Timeout waiting for Buffer Read Ready interrupt during tuning procedure, falling back to fixed sampling clock\n");
ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
ctrl &= ~SDHCI_CTRL_TUNED_CLK;
ctrl &= ~SDHCI_CTRL_EXEC_TUNING;
@@ -2046,9 +2039,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
}
if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
- pr_info(DRIVER_NAME ": Tuning procedure"
- " failed, falling back to fixed sampling"
- " clock\n");
+ pr_info(DRIVER_NAME ": Tuning procedure failed, falling back to fixed sampling clock\n");
err = -EIO;
}
@@ -2293,8 +2284,8 @@ static void sdhci_timeout_timer(unsigned long data)
spin_lock_irqsave(&host->lock, flags);
if (host->mrq) {
- pr_err("%s: Timeout waiting for hardware "
- "interrupt.\n", mmc_hostname(host->mmc));
+ pr_err("%s: Timeout waiting for hardware interrupt.\n",
+ mmc_hostname(host->mmc));
sdhci_dumpregs(host);
if (host->data) {
@@ -2325,9 +2316,8 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
BUG_ON(intmask == 0);
if (!host->cmd) {
- pr_err("%s: Got command interrupt 0x%08x even "
- "though no command operation was in progress.\n",
- mmc_hostname(host->mmc), (unsigned)intmask);
+ pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
+ mmc_hostname(host->mmc), (unsigned)intmask);
sdhci_dumpregs(host);
return;
}
@@ -2356,8 +2346,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
*/
if (host->cmd->flags & MMC_RSP_BUSY) {
if (host->cmd->data)
- DBG("Cannot wait for busy signal when also "
- "doing a data transfer");
+ DBG("Cannot wait for busy signal when also doing a data transfer");
else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ)
&& !host->busy_handle) {
/* Mark that command complete before busy is ended */
@@ -2451,9 +2440,8 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
}
}
- pr_err("%s: Got data interrupt 0x%08x even "
- "though no data operation was in progress.\n",
- mmc_hostname(host->mmc), (unsigned)intmask);
+ pr_err("%s: Got data interrupt 0x%08x even though no data operation was in progress.\n",
+ mmc_hostname(host->mmc), (unsigned)intmask);
sdhci_dumpregs(host);
return;
@@ -2760,7 +2748,7 @@ static int sdhci_runtime_pm_put(struct sdhci_host *host)
static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
{
- if (host->runtime_suspended || host->bus_on)
+ if (host->bus_on)
return;
host->bus_on = true;
pm_runtime_get_noresume(host->mmc->parent);
@@ -2768,7 +2756,7 @@ static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
{
- if (host->runtime_suspended || !host->bus_on)
+ if (!host->bus_on)
return;
host->bus_on = false;
pm_runtime_put_noidle(host->mmc->parent);
@@ -2896,9 +2884,8 @@ int sdhci_add_host(struct sdhci_host *host)
host->version = (host->version & SDHCI_SPEC_VER_MASK)
>> SDHCI_SPEC_VER_SHIFT;
if (host->version > SDHCI_SPEC_300) {
- pr_err("%s: Unknown controller version (%d). "
- "You may experience problems.\n", mmc_hostname(mmc),
- host->version);
+ pr_err("%s: Unknown controller version (%d). You may experience problems.\n",
+ mmc_hostname(mmc), host->version);
}
caps[0] = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
@@ -2967,24 +2954,17 @@ int sdhci_add_host(struct sdhci_host *host)
if (host->flags & SDHCI_USE_64_BIT_DMA) {
host->adma_table_sz = (SDHCI_MAX_SEGS * 2 + 1) *
SDHCI_ADMA2_64_DESC_SZ;
- host->align_buffer_sz = SDHCI_MAX_SEGS *
- SDHCI_ADMA2_64_ALIGN;
host->desc_sz = SDHCI_ADMA2_64_DESC_SZ;
- host->align_sz = SDHCI_ADMA2_64_ALIGN;
- host->align_mask = SDHCI_ADMA2_64_ALIGN - 1;
} else {
host->adma_table_sz = (SDHCI_MAX_SEGS * 2 + 1) *
SDHCI_ADMA2_32_DESC_SZ;
- host->align_buffer_sz = SDHCI_MAX_SEGS *
- SDHCI_ADMA2_32_ALIGN;
host->desc_sz = SDHCI_ADMA2_32_DESC_SZ;
- host->align_sz = SDHCI_ADMA2_32_ALIGN;
- host->align_mask = SDHCI_ADMA2_32_ALIGN - 1;
}
host->adma_table = dma_alloc_coherent(mmc_dev(mmc),
host->adma_table_sz,
&host->adma_addr,
GFP_KERNEL);
+ host->align_buffer_sz = SDHCI_MAX_SEGS * SDHCI_ADMA2_ALIGN;
host->align_buffer = kmalloc(host->align_buffer_sz, GFP_KERNEL);
if (!host->adma_table || !host->align_buffer) {
if (host->adma_table)
@@ -2998,7 +2978,7 @@ int sdhci_add_host(struct sdhci_host *host)
host->flags &= ~SDHCI_USE_ADMA;
host->adma_table = NULL;
host->align_buffer = NULL;
- } else if (host->adma_addr & host->align_mask) {
+ } else if (host->adma_addr & (SDHCI_ADMA2_DESC_ALIGN - 1)) {
pr_warn("%s: unable to allocate aligned ADMA descriptor\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_ADMA;
@@ -3031,8 +3011,8 @@ int sdhci_add_host(struct sdhci_host *host)
if (host->max_clk == 0 || host->quirks &
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN) {
if (!host->ops->get_max_clock) {
- pr_err("%s: Hardware doesn't specify base clock "
- "frequency.\n", mmc_hostname(mmc));
+ pr_err("%s: Hardware doesn't specify base clock frequency.\n",
+ mmc_hostname(mmc));
return -ENODEV;
}
host->max_clk = host->ops->get_max_clock(host);
@@ -3294,8 +3274,8 @@ int sdhci_add_host(struct sdhci_host *host)
mmc->ocr_avail_mmc &= host->ocr_avail_mmc;
if (mmc->ocr_avail == 0) {
- pr_err("%s: Hardware doesn't report any "
- "support voltages.\n", mmc_hostname(mmc));
+ pr_err("%s: Hardware doesn't report any support voltages.\n",
+ mmc_hostname(mmc));
return -ENODEV;
}
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 9d4aa31b683a..7654ae5d2b4e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -272,22 +272,27 @@
/* ADMA2 32-bit DMA descriptor size */
#define SDHCI_ADMA2_32_DESC_SZ 8
-/* ADMA2 32-bit DMA alignment */
-#define SDHCI_ADMA2_32_ALIGN 4
-
/* ADMA2 32-bit descriptor */
struct sdhci_adma2_32_desc {
__le16 cmd;
__le16 len;
__le32 addr;
-} __packed __aligned(SDHCI_ADMA2_32_ALIGN);
+} __packed __aligned(4);
+
+/* ADMA2 data alignment */
+#define SDHCI_ADMA2_ALIGN 4
+#define SDHCI_ADMA2_MASK (SDHCI_ADMA2_ALIGN - 1)
+
+/*
+ * ADMA2 descriptor alignment. Some controllers (e.g. Intel) require 8 byte
+ * alignment for the descriptor table even in 32-bit DMA mode. Memory
+ * allocation is at least 8 byte aligned anyway, so just stipulate 8 always.
+ */
+#define SDHCI_ADMA2_DESC_ALIGN 8
/* ADMA2 64-bit DMA descriptor size */
#define SDHCI_ADMA2_64_DESC_SZ 12
-/* ADMA2 64-bit DMA alignment */
-#define SDHCI_ADMA2_64_ALIGN 8
-
/*
* ADMA2 64-bit descriptor. Note 12-byte descriptor can't always be 8-byte
* aligned.
@@ -482,8 +487,6 @@ struct sdhci_host {
dma_addr_t align_addr; /* Mapped bounce buffer */
unsigned int desc_sz; /* ADMA descriptor size */
- unsigned int align_sz; /* ADMA alignment */
- unsigned int align_mask; /* ADMA alignment mask */
struct tasklet_struct finish_tasklet; /* Tasklet structures */
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index ad9ffea7d659..1ca8a1359cbc 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -397,38 +397,26 @@ static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
}
static struct dma_chan *
-sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
- struct sh_mmcif_plat_data *pdata,
- enum dma_transfer_direction direction)
+sh_mmcif_request_dma_pdata(struct sh_mmcif_host *host, uintptr_t slave_id)
{
- struct dma_slave_config cfg = { 0, };
- struct dma_chan *chan;
- void *slave_data = NULL;
- struct resource *res;
- struct device *dev = sh_mmcif_host_to_dev(host);
dma_cap_mask_t mask;
- int ret;
dma_cap_zero(mask);
dma_cap_set(DMA_SLAVE, mask);
+ if (slave_id <= 0)
+ return NULL;
- if (pdata)
- slave_data = direction == DMA_MEM_TO_DEV ?
- (void *)pdata->slave_id_tx :
- (void *)pdata->slave_id_rx;
-
- chan = dma_request_slave_channel_compat(mask, shdma_chan_filter,
- slave_data, dev,
- direction == DMA_MEM_TO_DEV ? "tx" : "rx");
-
- dev_dbg(dev, "%s: %s: got channel %p\n", __func__,
- direction == DMA_MEM_TO_DEV ? "TX" : "RX", chan);
+ return dma_request_channel(mask, shdma_chan_filter, (void *)slave_id);
+}
- if (!chan)
- return NULL;
+static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host,
+ struct dma_chan *chan,
+ enum dma_transfer_direction direction)
+{
+ struct resource *res;
+ struct dma_slave_config cfg = { 0, };
res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
-
cfg.direction = direction;
if (direction == DMA_DEV_TO_MEM) {
@@ -439,38 +427,42 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
}
- ret = dmaengine_slave_config(chan, &cfg);
- if (ret < 0) {
- dma_release_channel(chan);
- return NULL;
- }
-
- return chan;
+ return dmaengine_slave_config(chan, &cfg);
}
-static void sh_mmcif_request_dma(struct sh_mmcif_host *host,
- struct sh_mmcif_plat_data *pdata)
+static void sh_mmcif_request_dma(struct sh_mmcif_host *host)
{
struct device *dev = sh_mmcif_host_to_dev(host);
host->dma_active = false;
- if (pdata) {
- if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0)
- return;
- } else if (!dev->of_node) {
- return;
+ /* We can only either use DMA for both Tx and Rx or not use it at all */
+ if (IS_ENABLED(CONFIG_SUPERH) && dev->platform_data) {
+ struct sh_mmcif_plat_data *pdata = dev->platform_data;
+
+ host->chan_tx = sh_mmcif_request_dma_pdata(host,
+ pdata->slave_id_tx);
+ host->chan_rx = sh_mmcif_request_dma_pdata(host,
+ pdata->slave_id_rx);
+ } else {
+ host->chan_tx = dma_request_slave_channel(dev, "tx");
+ host->chan_tx = dma_request_slave_channel(dev, "rx");
}
+ dev_dbg(dev, "%s: got channel TX %p RX %p\n", __func__, host->chan_tx,
+ host->chan_rx);
- /* We can only either use DMA for both Tx and Rx or not use it at all */
- host->chan_tx = sh_mmcif_request_dma_one(host, pdata, DMA_MEM_TO_DEV);
- if (!host->chan_tx)
- return;
+ if (!host->chan_tx || !host->chan_rx ||
+ sh_mmcif_dma_slave_config(host, host->chan_tx, DMA_MEM_TO_DEV) ||
+ sh_mmcif_dma_slave_config(host, host->chan_rx, DMA_DEV_TO_MEM))
+ goto error;
- host->chan_rx = sh_mmcif_request_dma_one(host, pdata, DMA_DEV_TO_MEM);
- if (!host->chan_rx) {
+ return;
+
+error:
+ if (host->chan_tx)
dma_release_channel(host->chan_tx);
- host->chan_tx = NULL;
- }
+ if (host->chan_rx)
+ dma_release_channel(host->chan_rx);
+ host->chan_tx = host->chan_rx = NULL;
}
static void sh_mmcif_release_dma(struct sh_mmcif_host *host)
@@ -1102,7 +1094,7 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
if (ios->power_mode == MMC_POWER_UP) {
if (!host->card_present) {
/* See if we also get DMA */
- sh_mmcif_request_dma(host, dev->platform_data);
+ sh_mmcif_request_dma(host);
host->card_present = true;
}
sh_mmcif_set_power(host, ios);
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 4498e92116b8..b47122d3e8d8 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1634,7 +1634,7 @@ static void usdhi6_timeout_work(struct work_struct *work)
struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work);
struct mmc_request *mrq = host->mrq;
struct mmc_data *data = mrq ? mrq->data : NULL;
- struct scatterlist *sg = host->sg ?: data->sg;
+ struct scatterlist *sg;
dev_warn(mmc_dev(host->mmc),
"%s timeout wait %u CMD%d: IRQ 0x%08x:0x%08x, last IRQ 0x%08x\n",
@@ -1666,6 +1666,7 @@ static void usdhi6_timeout_work(struct work_struct *work)
case USDHI6_WAIT_FOR_MWRITE:
case USDHI6_WAIT_FOR_READ:
case USDHI6_WAIT_FOR_WRITE:
+ sg = host->sg ?: data->sg;
dev_dbg(mmc_dev(host->mmc),
"%c: page #%u @ +0x%zx %ux%u in SG%u. Current SG %u bytes @ %u\n",
data->flags & MMC_DATA_READ ? 'R' : 'W', host->page_idx,
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 95c13b2ffa79..ffa288474820 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -426,15 +426,6 @@ int add_mtd_device(struct mtd_info *mtd)
mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
- if (mtd->dev.parent) {
- if (!mtd->owner && mtd->dev.parent->driver)
- mtd->owner = mtd->dev.parent->driver->owner;
- if (!mtd->name)
- mtd->name = dev_name(mtd->dev.parent);
- } else {
- pr_debug("mtd device won't show a device symlink in sysfs\n");
- }
-
/* Some chips always power up locked. Unlock them now */
if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) {
error = mtd_unlock(mtd, 0, mtd->size);
@@ -549,6 +540,21 @@ static int mtd_add_device_partitions(struct mtd_info *mtd,
return 0;
}
+/*
+ * Set a few defaults based on the parent devices, if not provided by the
+ * driver
+ */
+static void mtd_set_dev_defaults(struct mtd_info *mtd)
+{
+ if (mtd->dev.parent) {
+ if (!mtd->owner && mtd->dev.parent->driver)
+ mtd->owner = mtd->dev.parent->driver->owner;
+ if (!mtd->name)
+ mtd->name = dev_name(mtd->dev.parent);
+ } else {
+ pr_debug("mtd device won't show a device symlink in sysfs\n");
+ }
+}
/**
* mtd_device_parse_register - parse partitions and register an MTD device.
@@ -587,6 +593,8 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
int ret;
struct mtd_partition *real_parts = NULL;
+ mtd_set_dev_defaults(mtd);
+
ret = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
if (ret <= 0 && nr_parts && parts) {
real_parts = kmemdup(parts, sizeof(*parts) * nr_parts,
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 49883905a434..32477c4eb421 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -516,8 +516,8 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
status_old = read_sr(nor);
/* Cannot unlock; would unlock larger region than requested */
- if (stm_is_locked_sr(nor, status_old, ofs - mtd->erasesize,
- mtd->erasesize))
+ if (stm_is_locked_sr(nor, ofs - mtd->erasesize, mtd->erasesize,
+ status_old))
return -EINVAL;
/*
@@ -1200,8 +1200,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
if (JEDEC_MFR(info) == SNOR_MFR_ATMEL ||
JEDEC_MFR(info) == SNOR_MFR_INTEL ||
- JEDEC_MFR(info) == SNOR_MFR_SST ||
- JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+ JEDEC_MFR(info) == SNOR_MFR_SST) {
write_enable(nor);
write_sr(nor, 0);
}
@@ -1217,8 +1216,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
mtd->_read = spi_nor_read;
/* NOR protection support for STmicro/Micron chips and similar */
- if (JEDEC_MFR(info) == SNOR_MFR_MICRON ||
- JEDEC_MFR(info) == SNOR_MFR_WINBOND) {
+ if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
nor->flash_lock = stm_lock;
nor->flash_unlock = stm_unlock;
nor->flash_is_locked = stm_is_locked;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index c308429dd9c7..11dd91e4db56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -295,6 +295,10 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
INIT_LIST_HEAD(&ctbl->hash_list[i]);
cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry));
+ if (!cl_list) {
+ t4_free_mem(ctbl);
+ return NULL;
+ }
ctbl->cl_list = (void *)cl_list;
for (i = 0; i < clipt_size; i++) {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index a5f422f26cb4..daf05155b732 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -772,8 +772,10 @@ int qlcnic_82xx_config_intrpt(struct qlcnic_adapter *adapter, u8 op_type)
int i, err = 0;
for (i = 0; i < ahw->num_msix; i++) {
- qlcnic_alloc_mbx_args(&cmd, adapter,
- QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+ err = qlcnic_alloc_mbx_args(&cmd, adapter,
+ QLCNIC_CMD_MQ_TX_CONFIG_INTR);
+ if (err)
+ return err;
type = op_type ? QLCNIC_INTRPT_ADD : QLCNIC_INTRPT_DEL;
val = type | (ahw->intr_tbl[i].type << 4);
if (ahw->intr_tbl[i].type == QLCNIC_INTRPT_MSIX)
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 9f0b1c342b77..5a1e98547031 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -683,6 +683,12 @@ static void sixpack_close(struct tty_struct *tty)
if (!atomic_dec_and_test(&sp->refcnt))
down(&sp->dead_sem);
+ /* We must stop the queue to avoid potentially scribbling
+ * on the free buffers. The sp->dead_sem is not sufficient
+ * to protect us from sp->xbuff access.
+ */
+ netif_stop_queue(sp->dev);
+
del_timer_sync(&sp->tx_t);
del_timer_sync(&sp->resync_t);
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 0b72b9de5207..85828f153445 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -797,6 +797,11 @@ static void mkiss_close(struct tty_struct *tty)
*/
if (!atomic_dec_and_test(&ax->refcnt))
down(&ax->dead_sem);
+ /*
+ * Halt the transmit queue so that a new transmit cannot scribble
+ * on our buffers
+ */
+ netif_stop_queue(ax->dev);
/* Free all AX25 frame buffers. */
kfree(ax->rbuff);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9a5be8b85186..5fccc5a8153f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -742,6 +742,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
+ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 2e32c41536ae..2fb637ad594a 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3525,6 +3525,14 @@ static int rtl8152_resume(struct usb_interface *intf)
return 0;
}
+static int rtl8152_reset_resume(struct usb_interface *intf)
+{
+ struct r8152 *tp = usb_get_intfdata(intf);
+
+ clear_bit(SELECTIVE_SUSPEND, &tp->flags);
+ return rtl8152_resume(intf);
+}
+
static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct r8152 *tp = netdev_priv(dev);
@@ -4276,7 +4284,7 @@ static struct usb_driver rtl8152_driver = {
.disconnect = rtl8152_disconnect,
.suspend = rtl8152_suspend,
.resume = rtl8152_resume,
- .reset_resume = rtl8152_resume,
+ .reset_resume = rtl8152_reset_resume,
.pre_reset = rtl8152_pre_reset,
.post_reset = rtl8152_post_reset,
.supports_autosuspend = 1,
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 417903715437..0cbf520cea77 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1380,10 +1380,10 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
skip_page_frags = true;
goto rcd_done;
}
- new_dma_addr = dma_map_page(&adapter->pdev->dev
- , rbi->page,
- 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ new_dma_addr = dma_map_page(&adapter->pdev->dev,
+ new_page,
+ 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
if (dma_mapping_error(&adapter->pdev->dev,
new_dma_addr)) {
put_page(new_page);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 4c58c83dc225..bdb8a6c0f8aa 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.4.4.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.4.5.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01040400
+#define VMXNET3_DRIVER_VERSION_NUM 0x01040500
#if defined(CONFIG_PCI_MSI)
/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4f9748457f5a..0a242b200df4 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -800,7 +800,7 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
}
/* called under rcu_read_lock */
-static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
+static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
{
struct fib_result res = { .tclassid = 0 };
struct net *net = dev_net(dev);
@@ -808,9 +808,10 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
u8 flags = fl4->flowi4_flags;
u8 scope = fl4->flowi4_scope;
u8 tos = RT_FL_TOS(fl4);
+ int rc;
if (unlikely(!fl4->daddr))
- return;
+ return 0;
fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
@@ -818,7 +819,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
- if (!fib_lookup(net, fl4, &res, 0)) {
+ rc = fib_lookup(net, fl4, &res, 0);
+ if (!rc) {
if (res.type == RTN_LOCAL)
fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
else
@@ -828,6 +830,8 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_flags = flags;
fl4->flowi4_tos = orig_tos;
fl4->flowi4_scope = scope;
+
+ return rc;
}
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index f131ba947dc6..c0ad9aaa16a7 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -5,6 +5,7 @@ config PCI_DRA7XX
bool "TI DRA7xx PCIe controller"
select PCIE_DW
depends on OF && HAS_IOMEM && TI_PIPE3
+ depends on BROKEN
help
Enables support for the PCIe controller in the DRA7xx SoC. There
are two instances of PCIe controller in DRA7xx. This controller can
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 7eaa4c87fec7..7a0df3fdbfae 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -257,6 +257,7 @@ void pci_msi_mask_irq(struct irq_data *data)
{
msi_set_mask_bit(data, 1);
}
+EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
/**
* pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts
@@ -266,6 +267,7 @@ void pci_msi_unmask_irq(struct irq_data *data)
{
msi_set_mask_bit(data, 0);
}
+EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
void default_restore_msi_irqs(struct pci_dev *dev)
{
@@ -1126,6 +1128,7 @@ struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
{
return to_pci_dev(desc->dev);
}
+EXPORT_SYMBOL(msi_desc_to_pci_dev);
void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
{
@@ -1285,6 +1288,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
domain->bus_token = DOMAIN_BUS_PCI_MSI;
return domain;
}
+EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);
/**
* pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index a32ba753e413..d3f32d6417ef 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -9,7 +9,9 @@
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/irqdomain.h>
#include <linux/pci.h>
+#include <linux/msi.h>
#include <linux/pci_hotplug.h>
#include <linux/module.h>
#include <linux/pci-aspm.h>
@@ -689,6 +691,46 @@ static struct acpi_bus_type acpi_pci_bus = {
.cleanup = pci_acpi_cleanup,
};
+
+static struct fwnode_handle *(*pci_msi_get_fwnode_cb)(struct device *dev);
+
+/**
+ * pci_msi_register_fwnode_provider - Register callback to retrieve fwnode
+ * @fn: Callback matching a device to a fwnode that identifies a PCI
+ * MSI domain.
+ *
+ * This should be called by irqchip driver, which is the parent of
+ * the MSI domain to provide callback interface to query fwnode.
+ */
+void
+pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *))
+{
+ pci_msi_get_fwnode_cb = fn;
+}
+
+/**
+ * pci_host_bridge_acpi_msi_domain - Retrieve MSI domain of a PCI host bridge
+ * @bus: The PCI host bridge bus.
+ *
+ * This function uses the callback function registered by
+ * pci_msi_register_fwnode_provider() to retrieve the irq_domain with
+ * type DOMAIN_BUS_PCI_MSI of the specified host bridge bus.
+ * This returns NULL on error or when the domain is not found.
+ */
+struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus)
+{
+ struct fwnode_handle *fwnode;
+
+ if (!pci_msi_get_fwnode_cb)
+ return NULL;
+
+ fwnode = pci_msi_get_fwnode_cb(&bus->dev);
+ if (!fwnode)
+ return NULL;
+
+ return irq_find_matching_fwnode(fwnode, DOMAIN_BUS_PCI_MSI);
+}
+
static int __init acpi_pci_init(void)
{
int ret;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index edb1984201e9..553a029e37f1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -672,6 +672,8 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus)
* should be called from here.
*/
d = pci_host_bridge_of_msi_domain(bus);
+ if (!d)
+ d = pci_host_bridge_acpi_msi_domain(bus);
return d;
}
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 312c78b27a32..99a4c10ed43f 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -244,7 +244,7 @@ config PINCTRL_ZYNQ
select PINMUX
select GENERIC_PINCONF
help
- This selectes the pinctrl driver for Xilinx Zynq.
+ This selects the pinctrl driver for Xilinx Zynq.
source "drivers/pinctrl/bcm/Kconfig"
source "drivers/pinctrl/berlin/Kconfig"
@@ -252,6 +252,7 @@ source "drivers/pinctrl/freescale/Kconfig"
source "drivers/pinctrl/intel/Kconfig"
source "drivers/pinctrl/mvebu/Kconfig"
source "drivers/pinctrl/nomadik/Kconfig"
+source "drivers/pinctrl/pxa/Kconfig"
source "drivers/pinctrl/qcom/Kconfig"
source "drivers/pinctrl/samsung/Kconfig"
source "drivers/pinctrl/sh-pfc/Kconfig"
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index 738cb4929a49..bf1b5ca5180b 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -41,15 +41,16 @@ obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o
obj-$(CONFIG_PINCTRL_ZYNQ) += pinctrl-zynq.o
obj-$(CONFIG_ARCH_BCM) += bcm/
-obj-$(CONFIG_ARCH_BERLIN) += berlin/
+obj-$(CONFIG_PINCTRL_BERLIN) += berlin/
obj-y += freescale/
obj-$(CONFIG_X86) += intel/
-obj-$(CONFIG_PLAT_ORION) += mvebu/
+obj-$(CONFIG_PINCTRL_MVEBU) += mvebu/
obj-y += nomadik/
+obj-$(CONFIG_ARCH_PXA) += pxa/
obj-$(CONFIG_ARCH_QCOM) += qcom/
obj-$(CONFIG_PINCTRL_SAMSUNG) += samsung/
obj-$(CONFIG_PINCTRL_SH_PFC) += sh-pfc/
-obj-$(CONFIG_PLAT_SPEAR) += spear/
+obj-$(CONFIG_PINCTRL_SPEAR) += spear/
obj-$(CONFIG_ARCH_SUNXI) += sunxi/
obj-$(CONFIG_PINCTRL_UNIPHIER) += uniphier/
obj-$(CONFIG_ARCH_VT8500) += vt8500/
diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig
index cd11d4d9ad58..2cc74384cafa 100644
--- a/drivers/pinctrl/bcm/Kconfig
+++ b/drivers/pinctrl/bcm/Kconfig
@@ -9,6 +9,7 @@ config PINCTRL_BCM281XX
select PINCONF
select GENERIC_PINCONF
select REGMAP_MMIO
+ default ARCH_BCM_MOBILE
help
Say Y here to support Broadcom BCM281xx pinctrl driver, which is used
for the BCM281xx SoC family, including BCM11130, BCM11140, BCM11351,
@@ -20,27 +21,41 @@ config PINCTRL_BCM2835
select PINMUX
select PINCONF
-config PINCTRL_CYGNUS_GPIO
- bool "Broadcom Cygnus GPIO (with PINCONF) driver"
- depends on OF_GPIO && ARCH_BCM_CYGNUS
+config PINCTRL_IPROC_GPIO
+ bool "Broadcom iProc GPIO (with PINCONF) driver"
+ depends on OF_GPIO && (ARCH_BCM_IPROC || COMPILE_TEST)
select GPIOLIB_IRQCHIP
select PINCONF
select GENERIC_PINCONF
- default ARCH_BCM_CYGNUS
+ default ARCH_BCM_IPROC
help
- Say yes here to enable the Broadcom Cygnus GPIO driver.
+ Say yes here to enable the Broadcom iProc GPIO driver.
+
+ The Broadcom iProc based SoCs- Cygnus, NS2, NSP and Stingray, use
+ same GPIO Controller IP hence this driver could be used for all.
The Broadcom Cygnus SoC has 3 GPIO controllers including the ASIU
GPIO controller (ASIU), the chipCommonG GPIO controller (CCM), and
the always-ON GPIO controller (CRMU/AON). All 3 GPIO controllers are
supported by this driver.
- All 3 Cygnus GPIO controllers support basic PINCONF functions such
+ The Broadcom NSP has two GPIO controllers including the ChipcommonA
+ GPIO, the ChipcommonB GPIO. Later controller is supported by this
+ driver.
+
+ The Broadcom NS2 has two GPIO controller including the CRMU GPIO,
+ the ChipcommonG GPIO. Both controllers are supported by this driver.
+
+ The Broadcom Stingray GPIO controllers are supported by this driver.
+
+ All above SoCs GPIO controllers support basic PINCONF functions such
as bias pull up, pull down, and drive strength configurations, when
these pins are muxed to GPIO.
- Pins from the ASIU GPIO can be individually muxed to GPIO function,
- through interaction with the Cygnus IOMUX controller.
+ It provides the framework where pins from the individual GPIO can be
+ individually muxed to GPIO function, through interaction with the
+ SoCs IOMUX controller. This features could be used only on SoCs which
+ support individual pin muxing.
config PINCTRL_CYGNUS_MUX
bool "Broadcom Cygnus IOMUX driver"
@@ -54,3 +69,20 @@ config PINCTRL_CYGNUS_MUX
The Broadcom Cygnus IOMUX driver supports group based IOMUX
configuration, with the exception that certain individual pins
can be overrided to GPIO function
+
+config PINCTRL_NSP_GPIO
+ bool "Broadcom NSP GPIO (with PINCONF) driver"
+ depends on OF_GPIO && (ARCH_BCM_NSP || COMPILE_TEST)
+ select GPIOLIB_IRQCHIP
+ select PINCONF
+ select GENERIC_PINCONF
+ default ARCH_BCM_NSP
+ help
+ Say yes here to enable the Broadcom NSP GPIO driver.
+
+ The Broadcom Northstar Plus SoC ChipcommonA GPIO controller is
+ supported by this driver.
+
+ The ChipcommonA GPIO controller support basic PINCONF functions such
+ as bias pull up, pull down, and drive strength configurations, when
+ these pins are muxed to GPIO.
diff --git a/drivers/pinctrl/bcm/Makefile b/drivers/pinctrl/bcm/Makefile
index 2b2f70ee804c..6148367d5e8c 100644
--- a/drivers/pinctrl/bcm/Makefile
+++ b/drivers/pinctrl/bcm/Makefile
@@ -2,5 +2,6 @@
obj-$(CONFIG_PINCTRL_BCM281XX) += pinctrl-bcm281xx.o
obj-$(CONFIG_PINCTRL_BCM2835) += pinctrl-bcm2835.o
-obj-$(CONFIG_PINCTRL_CYGNUS_GPIO) += pinctrl-cygnus-gpio.o
+obj-$(CONFIG_PINCTRL_IPROC_GPIO) += pinctrl-iproc-gpio.o
obj-$(CONFIG_PINCTRL_CYGNUS_MUX) += pinctrl-cygnus-mux.o
+obj-$(CONFIG_PINCTRL_NSP_GPIO) += pinctrl-nsp-gpio.o
diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
index 2e6ca69635aa..75b0d8c8f058 100644
--- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c
@@ -795,7 +795,7 @@ static int bcm2835_pctl_dt_node_to_map(struct pinctrl_dev *pctldev,
return 0;
out:
- kfree(maps);
+ bcm2835_pctl_dt_free_map(pctldev, maps, num_pins * maps_per_pin);
return err;
}
diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index 12a48f498b75..314591a4609b 100644
--- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -10,14 +10,16 @@
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * This file contains the Broadcom Cygnus GPIO driver that supports 3
- * GPIO controllers on Cygnus including the ASIU GPIO controller, the
+ * This file contains the Broadcom Iproc GPIO driver that supports 3
+ * GPIO controllers on Iproc including the ASIU GPIO controller, the
* chipCommonG GPIO controller, and the always-on GPIO controller. Basic
* PINCONF such as bias pull up/down, and drive strength are also supported
* in this driver.
*
- * Pins from the ASIU GPIO can be individually muxed to GPIO function,
- * through the interaction with the Cygnus IOMUX controller
+ * It provides the functionality where pins from the GPIO can be
+ * individually muxed to GPIO function, if individual pad
+ * configuration is supported, through the interaction with respective
+ * SoCs IOMUX controller.
*/
#include <linux/kernel.h>
@@ -34,42 +36,42 @@
#include "../pinctrl-utils.h"
-#define CYGNUS_GPIO_DATA_IN_OFFSET 0x00
-#define CYGNUS_GPIO_DATA_OUT_OFFSET 0x04
-#define CYGNUS_GPIO_OUT_EN_OFFSET 0x08
-#define CYGNUS_GPIO_INT_TYPE_OFFSET 0x0c
-#define CYGNUS_GPIO_INT_DE_OFFSET 0x10
-#define CYGNUS_GPIO_INT_EDGE_OFFSET 0x14
-#define CYGNUS_GPIO_INT_MSK_OFFSET 0x18
-#define CYGNUS_GPIO_INT_STAT_OFFSET 0x1c
-#define CYGNUS_GPIO_INT_MSTAT_OFFSET 0x20
-#define CYGNUS_GPIO_INT_CLR_OFFSET 0x24
-#define CYGNUS_GPIO_PAD_RES_OFFSET 0x34
-#define CYGNUS_GPIO_RES_EN_OFFSET 0x38
+#define IPROC_GPIO_DATA_IN_OFFSET 0x00
+#define IPROC_GPIO_DATA_OUT_OFFSET 0x04
+#define IPROC_GPIO_OUT_EN_OFFSET 0x08
+#define IPROC_GPIO_INT_TYPE_OFFSET 0x0c
+#define IPROC_GPIO_INT_DE_OFFSET 0x10
+#define IPROC_GPIO_INT_EDGE_OFFSET 0x14
+#define IPROC_GPIO_INT_MSK_OFFSET 0x18
+#define IPROC_GPIO_INT_STAT_OFFSET 0x1c
+#define IPROC_GPIO_INT_MSTAT_OFFSET 0x20
+#define IPROC_GPIO_INT_CLR_OFFSET 0x24
+#define IPROC_GPIO_PAD_RES_OFFSET 0x34
+#define IPROC_GPIO_RES_EN_OFFSET 0x38
/* drive strength control for ASIU GPIO */
-#define CYGNUS_GPIO_ASIU_DRV0_CTRL_OFFSET 0x58
+#define IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET 0x58
/* drive strength control for CCM/CRMU (AON) GPIO */
-#define CYGNUS_GPIO_DRV0_CTRL_OFFSET 0x00
+#define IPROC_GPIO_DRV0_CTRL_OFFSET 0x00
#define GPIO_BANK_SIZE 0x200
#define NGPIOS_PER_BANK 32
#define GPIO_BANK(pin) ((pin) / NGPIOS_PER_BANK)
-#define CYGNUS_GPIO_REG(pin, reg) (GPIO_BANK(pin) * GPIO_BANK_SIZE + (reg))
-#define CYGNUS_GPIO_SHIFT(pin) ((pin) % NGPIOS_PER_BANK)
+#define IPROC_GPIO_REG(pin, reg) (GPIO_BANK(pin) * GPIO_BANK_SIZE + (reg))
+#define IPROC_GPIO_SHIFT(pin) ((pin) % NGPIOS_PER_BANK)
#define GPIO_DRV_STRENGTH_BIT_SHIFT 20
#define GPIO_DRV_STRENGTH_BITS 3
#define GPIO_DRV_STRENGTH_BIT_MASK ((1 << GPIO_DRV_STRENGTH_BITS) - 1)
/*
- * Cygnus GPIO core
+ * Iproc GPIO core
*
* @dev: pointer to device
- * @base: I/O register base for Cygnus GPIO controller
- * @io_ctrl: I/O register base for certain type of Cygnus GPIO controller that
+ * @base: I/O register base for Iproc GPIO controller
+ * @io_ctrl: I/O register base for certain type of Iproc GPIO controller that
* has the PINCONF support implemented outside of the GPIO block
* @lock: lock to protect access to I/O registers
* @gc: GPIO chip
@@ -79,7 +81,7 @@
* @pctl: pointer to pinctrl_dev
* @pctldesc: pinctrl descriptor
*/
-struct cygnus_gpio {
+struct iproc_gpio {
struct device *dev;
void __iomem *base;
@@ -96,33 +98,33 @@ struct cygnus_gpio {
struct pinctrl_desc pctldesc;
};
-static inline struct cygnus_gpio *to_cygnus_gpio(struct gpio_chip *gc)
+static inline struct iproc_gpio *to_iproc_gpio(struct gpio_chip *gc)
{
- return container_of(gc, struct cygnus_gpio, gc);
+ return container_of(gc, struct iproc_gpio, gc);
}
/*
* Mapping from PINCONF pins to GPIO pins is 1-to-1
*/
-static inline unsigned cygnus_pin_to_gpio(unsigned pin)
+static inline unsigned iproc_pin_to_gpio(unsigned pin)
{
return pin;
}
/**
- * cygnus_set_bit - set or clear one bit (corresponding to the GPIO pin) in a
- * Cygnus GPIO register
+ * iproc_set_bit - set or clear one bit (corresponding to the GPIO pin) in a
+ * Iproc GPIO register
*
- * @cygnus_gpio: Cygnus GPIO device
+ * @iproc_gpio: Iproc GPIO device
* @reg: register offset
* @gpio: GPIO pin
* @set: set or clear
*/
-static inline void cygnus_set_bit(struct cygnus_gpio *chip, unsigned int reg,
+static inline void iproc_set_bit(struct iproc_gpio *chip, unsigned int reg,
unsigned gpio, bool set)
{
- unsigned int offset = CYGNUS_GPIO_REG(gpio, reg);
- unsigned int shift = CYGNUS_GPIO_SHIFT(gpio);
+ unsigned int offset = IPROC_GPIO_REG(gpio, reg);
+ unsigned int shift = IPROC_GPIO_SHIFT(gpio);
u32 val;
val = readl(chip->base + offset);
@@ -133,19 +135,19 @@ static inline void cygnus_set_bit(struct cygnus_gpio *chip, unsigned int reg,
writel(val, chip->base + offset);
}
-static inline bool cygnus_get_bit(struct cygnus_gpio *chip, unsigned int reg,
+static inline bool iproc_get_bit(struct iproc_gpio *chip, unsigned int reg,
unsigned gpio)
{
- unsigned int offset = CYGNUS_GPIO_REG(gpio, reg);
- unsigned int shift = CYGNUS_GPIO_SHIFT(gpio);
+ unsigned int offset = IPROC_GPIO_REG(gpio, reg);
+ unsigned int shift = IPROC_GPIO_SHIFT(gpio);
return !!(readl(chip->base + offset) & BIT(shift));
}
-static void cygnus_gpio_irq_handler(struct irq_desc *desc)
+static void iproc_gpio_irq_handler(struct irq_desc *desc)
{
struct gpio_chip *gc = irq_desc_get_handler_data(desc);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
struct irq_chip *irq_chip = irq_desc_get_chip(desc);
int i, bit;
@@ -154,7 +156,7 @@ static void cygnus_gpio_irq_handler(struct irq_desc *desc)
/* go through the entire GPIO banks and handle all interrupts */
for (i = 0; i < chip->num_banks; i++) {
unsigned long val = readl(chip->base + (i * GPIO_BANK_SIZE) +
- CYGNUS_GPIO_INT_MSTAT_OFFSET);
+ IPROC_GPIO_INT_MSTAT_OFFSET);
for_each_set_bit(bit, &val, NGPIOS_PER_BANK) {
unsigned pin = NGPIOS_PER_BANK * i + bit;
@@ -165,7 +167,7 @@ static void cygnus_gpio_irq_handler(struct irq_desc *desc)
* handler, so we do not leave any window
*/
writel(BIT(bit), chip->base + (i * GPIO_BANK_SIZE) +
- CYGNUS_GPIO_INT_CLR_OFFSET);
+ IPROC_GPIO_INT_CLR_OFFSET);
generic_handle_irq(child_irq);
}
@@ -175,60 +177,60 @@ static void cygnus_gpio_irq_handler(struct irq_desc *desc)
}
-static void cygnus_gpio_irq_ack(struct irq_data *d)
+static void iproc_gpio_irq_ack(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned gpio = d->hwirq;
- unsigned int offset = CYGNUS_GPIO_REG(gpio,
- CYGNUS_GPIO_INT_CLR_OFFSET);
- unsigned int shift = CYGNUS_GPIO_SHIFT(gpio);
+ unsigned int offset = IPROC_GPIO_REG(gpio,
+ IPROC_GPIO_INT_CLR_OFFSET);
+ unsigned int shift = IPROC_GPIO_SHIFT(gpio);
u32 val = BIT(shift);
writel(val, chip->base + offset);
}
/**
- * cygnus_gpio_irq_set_mask - mask/unmask a GPIO interrupt
+ * iproc_gpio_irq_set_mask - mask/unmask a GPIO interrupt
*
* @d: IRQ chip data
* @unmask: mask/unmask GPIO interrupt
*/
-static void cygnus_gpio_irq_set_mask(struct irq_data *d, bool unmask)
+static void iproc_gpio_irq_set_mask(struct irq_data *d, bool unmask)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned gpio = d->hwirq;
- cygnus_set_bit(chip, CYGNUS_GPIO_INT_MSK_OFFSET, gpio, unmask);
+ iproc_set_bit(chip, IPROC_GPIO_INT_MSK_OFFSET, gpio, unmask);
}
-static void cygnus_gpio_irq_mask(struct irq_data *d)
+static void iproc_gpio_irq_mask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- cygnus_gpio_irq_set_mask(d, false);
+ iproc_gpio_irq_set_mask(d, false);
spin_unlock_irqrestore(&chip->lock, flags);
}
-static void cygnus_gpio_irq_unmask(struct irq_data *d)
+static void iproc_gpio_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- cygnus_gpio_irq_set_mask(d, true);
+ iproc_gpio_irq_set_mask(d, true);
spin_unlock_irqrestore(&chip->lock, flags);
}
-static int cygnus_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+static int iproc_gpio_irq_set_type(struct irq_data *d, unsigned int type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned gpio = d->hwirq;
bool level_triggered = false;
bool dual_edge = false;
@@ -263,10 +265,10 @@ static int cygnus_gpio_irq_set_type(struct irq_data *d, unsigned int type)
}
spin_lock_irqsave(&chip->lock, flags);
- cygnus_set_bit(chip, CYGNUS_GPIO_INT_TYPE_OFFSET, gpio,
+ iproc_set_bit(chip, IPROC_GPIO_INT_TYPE_OFFSET, gpio,
level_triggered);
- cygnus_set_bit(chip, CYGNUS_GPIO_INT_DE_OFFSET, gpio, dual_edge);
- cygnus_set_bit(chip, CYGNUS_GPIO_INT_EDGE_OFFSET, gpio,
+ iproc_set_bit(chip, IPROC_GPIO_INT_DE_OFFSET, gpio, dual_edge);
+ iproc_set_bit(chip, IPROC_GPIO_INT_EDGE_OFFSET, gpio,
rising_or_high);
spin_unlock_irqrestore(&chip->lock, flags);
@@ -277,32 +279,32 @@ static int cygnus_gpio_irq_set_type(struct irq_data *d, unsigned int type)
return 0;
}
-static struct irq_chip cygnus_gpio_irq_chip = {
- .name = "bcm-cygnus-gpio",
- .irq_ack = cygnus_gpio_irq_ack,
- .irq_mask = cygnus_gpio_irq_mask,
- .irq_unmask = cygnus_gpio_irq_unmask,
- .irq_set_type = cygnus_gpio_irq_set_type,
+static struct irq_chip iproc_gpio_irq_chip = {
+ .name = "bcm-iproc-gpio",
+ .irq_ack = iproc_gpio_irq_ack,
+ .irq_mask = iproc_gpio_irq_mask,
+ .irq_unmask = iproc_gpio_irq_unmask,
+ .irq_set_type = iproc_gpio_irq_set_type,
};
/*
- * Request the Cygnus IOMUX pinmux controller to mux individual pins to GPIO
+ * Request the Iproc IOMUX pinmux controller to mux individual pins to GPIO
*/
-static int cygnus_gpio_request(struct gpio_chip *gc, unsigned offset)
+static int iproc_gpio_request(struct gpio_chip *gc, unsigned offset)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned gpio = gc->base + offset;
- /* not all Cygnus GPIO pins can be muxed individually */
+ /* not all Iproc GPIO pins can be muxed individually */
if (!chip->pinmux_is_supported)
return 0;
return pinctrl_request_gpio(gpio);
}
-static void cygnus_gpio_free(struct gpio_chip *gc, unsigned offset)
+static void iproc_gpio_free(struct gpio_chip *gc, unsigned offset)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned gpio = gc->base + offset;
if (!chip->pinmux_is_supported)
@@ -311,13 +313,13 @@ static void cygnus_gpio_free(struct gpio_chip *gc, unsigned offset)
pinctrl_free_gpio(gpio);
}
-static int cygnus_gpio_direction_input(struct gpio_chip *gc, unsigned gpio)
+static int iproc_gpio_direction_input(struct gpio_chip *gc, unsigned gpio)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- cygnus_set_bit(chip, CYGNUS_GPIO_OUT_EN_OFFSET, gpio, false);
+ iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, false);
spin_unlock_irqrestore(&chip->lock, flags);
dev_dbg(chip->dev, "gpio:%u set input\n", gpio);
@@ -325,15 +327,15 @@ static int cygnus_gpio_direction_input(struct gpio_chip *gc, unsigned gpio)
return 0;
}
-static int cygnus_gpio_direction_output(struct gpio_chip *gc, unsigned gpio,
+static int iproc_gpio_direction_output(struct gpio_chip *gc, unsigned gpio,
int val)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- cygnus_set_bit(chip, CYGNUS_GPIO_OUT_EN_OFFSET, gpio, true);
- cygnus_set_bit(chip, CYGNUS_GPIO_DATA_OUT_OFFSET, gpio, !!(val));
+ iproc_set_bit(chip, IPROC_GPIO_OUT_EN_OFFSET, gpio, true);
+ iproc_set_bit(chip, IPROC_GPIO_DATA_OUT_OFFSET, gpio, !!(val));
spin_unlock_irqrestore(&chip->lock, flags);
dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val);
@@ -341,29 +343,29 @@ static int cygnus_gpio_direction_output(struct gpio_chip *gc, unsigned gpio,
return 0;
}
-static void cygnus_gpio_set(struct gpio_chip *gc, unsigned gpio, int val)
+static void iproc_gpio_set(struct gpio_chip *gc, unsigned gpio, int val)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- cygnus_set_bit(chip, CYGNUS_GPIO_DATA_OUT_OFFSET, gpio, !!(val));
+ iproc_set_bit(chip, IPROC_GPIO_DATA_OUT_OFFSET, gpio, !!(val));
spin_unlock_irqrestore(&chip->lock, flags);
dev_dbg(chip->dev, "gpio:%u set, value:%d\n", gpio, val);
}
-static int cygnus_gpio_get(struct gpio_chip *gc, unsigned gpio)
+static int iproc_gpio_get(struct gpio_chip *gc, unsigned gpio)
{
- struct cygnus_gpio *chip = to_cygnus_gpio(gc);
- unsigned int offset = CYGNUS_GPIO_REG(gpio,
- CYGNUS_GPIO_DATA_IN_OFFSET);
- unsigned int shift = CYGNUS_GPIO_SHIFT(gpio);
+ struct iproc_gpio *chip = to_iproc_gpio(gc);
+ unsigned int offset = IPROC_GPIO_REG(gpio,
+ IPROC_GPIO_DATA_IN_OFFSET);
+ unsigned int shift = IPROC_GPIO_SHIFT(gpio);
return !!(readl(chip->base + offset) & BIT(shift));
}
-static int cygnus_get_groups_count(struct pinctrl_dev *pctldev)
+static int iproc_get_groups_count(struct pinctrl_dev *pctldev)
{
return 1;
}
@@ -372,20 +374,20 @@ static int cygnus_get_groups_count(struct pinctrl_dev *pctldev)
* Only one group: "gpio_grp", since this local pinctrl device only performs
* GPIO specific PINCONF configurations
*/
-static const char *cygnus_get_group_name(struct pinctrl_dev *pctldev,
+static const char *iproc_get_group_name(struct pinctrl_dev *pctldev,
unsigned selector)
{
return "gpio_grp";
}
-static const struct pinctrl_ops cygnus_pctrl_ops = {
- .get_groups_count = cygnus_get_groups_count,
- .get_group_name = cygnus_get_group_name,
+static const struct pinctrl_ops iproc_pctrl_ops = {
+ .get_groups_count = iproc_get_groups_count,
+ .get_group_name = iproc_get_group_name,
.dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
.dt_free_map = pinctrl_utils_dt_free_map,
};
-static int cygnus_gpio_set_pull(struct cygnus_gpio *chip, unsigned gpio,
+static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio,
bool disable, bool pull_up)
{
unsigned long flags;
@@ -393,11 +395,11 @@ static int cygnus_gpio_set_pull(struct cygnus_gpio *chip, unsigned gpio,
spin_lock_irqsave(&chip->lock, flags);
if (disable) {
- cygnus_set_bit(chip, CYGNUS_GPIO_RES_EN_OFFSET, gpio, false);
+ iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, false);
} else {
- cygnus_set_bit(chip, CYGNUS_GPIO_PAD_RES_OFFSET, gpio,
+ iproc_set_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio,
pull_up);
- cygnus_set_bit(chip, CYGNUS_GPIO_RES_EN_OFFSET, gpio, true);
+ iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, true);
}
spin_unlock_irqrestore(&chip->lock, flags);
@@ -407,18 +409,18 @@ static int cygnus_gpio_set_pull(struct cygnus_gpio *chip, unsigned gpio,
return 0;
}
-static void cygnus_gpio_get_pull(struct cygnus_gpio *chip, unsigned gpio,
+static void iproc_gpio_get_pull(struct iproc_gpio *chip, unsigned gpio,
bool *disable, bool *pull_up)
{
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
- *disable = !cygnus_get_bit(chip, CYGNUS_GPIO_RES_EN_OFFSET, gpio);
- *pull_up = cygnus_get_bit(chip, CYGNUS_GPIO_PAD_RES_OFFSET, gpio);
+ *disable = !iproc_get_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio);
+ *pull_up = iproc_get_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio);
spin_unlock_irqrestore(&chip->lock, flags);
}
-static int cygnus_gpio_set_strength(struct cygnus_gpio *chip, unsigned gpio,
+static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio,
unsigned strength)
{
void __iomem *base;
@@ -432,14 +434,14 @@ static int cygnus_gpio_set_strength(struct cygnus_gpio *chip, unsigned gpio,
if (chip->io_ctrl) {
base = chip->io_ctrl;
- offset = CYGNUS_GPIO_DRV0_CTRL_OFFSET;
+ offset = IPROC_GPIO_DRV0_CTRL_OFFSET;
} else {
base = chip->base;
- offset = CYGNUS_GPIO_REG(gpio,
- CYGNUS_GPIO_ASIU_DRV0_CTRL_OFFSET);
+ offset = IPROC_GPIO_REG(gpio,
+ IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET);
}
- shift = CYGNUS_GPIO_SHIFT(gpio);
+ shift = IPROC_GPIO_SHIFT(gpio);
dev_dbg(chip->dev, "gpio:%u set drive strength:%d mA\n", gpio,
strength);
@@ -458,7 +460,7 @@ static int cygnus_gpio_set_strength(struct cygnus_gpio *chip, unsigned gpio,
return 0;
}
-static int cygnus_gpio_get_strength(struct cygnus_gpio *chip, unsigned gpio,
+static int iproc_gpio_get_strength(struct iproc_gpio *chip, unsigned gpio,
u16 *strength)
{
void __iomem *base;
@@ -468,14 +470,14 @@ static int cygnus_gpio_get_strength(struct cygnus_gpio *chip, unsigned gpio,
if (chip->io_ctrl) {
base = chip->io_ctrl;
- offset = CYGNUS_GPIO_DRV0_CTRL_OFFSET;
+ offset = IPROC_GPIO_DRV0_CTRL_OFFSET;
} else {
base = chip->base;
- offset = CYGNUS_GPIO_REG(gpio,
- CYGNUS_GPIO_ASIU_DRV0_CTRL_OFFSET);
+ offset = IPROC_GPIO_REG(gpio,
+ IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET);
}
- shift = CYGNUS_GPIO_SHIFT(gpio);
+ shift = IPROC_GPIO_SHIFT(gpio);
spin_lock_irqsave(&chip->lock, flags);
*strength = 0;
@@ -493,44 +495,43 @@ static int cygnus_gpio_get_strength(struct cygnus_gpio *chip, unsigned gpio,
return 0;
}
-static int cygnus_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
+static int iproc_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
unsigned long *config)
{
- struct cygnus_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
+ struct iproc_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
enum pin_config_param param = pinconf_to_config_param(*config);
- unsigned gpio = cygnus_pin_to_gpio(pin);
+ unsigned gpio = iproc_pin_to_gpio(pin);
u16 arg;
bool disable, pull_up;
int ret;
switch (param) {
case PIN_CONFIG_BIAS_DISABLE:
- cygnus_gpio_get_pull(chip, gpio, &disable, &pull_up);
+ iproc_gpio_get_pull(chip, gpio, &disable, &pull_up);
if (disable)
return 0;
else
return -EINVAL;
case PIN_CONFIG_BIAS_PULL_UP:
- cygnus_gpio_get_pull(chip, gpio, &disable, &pull_up);
+ iproc_gpio_get_pull(chip, gpio, &disable, &pull_up);
if (!disable && pull_up)
return 0;
else
return -EINVAL;
case PIN_CONFIG_BIAS_PULL_DOWN:
- cygnus_gpio_get_pull(chip, gpio, &disable, &pull_up);
+ iproc_gpio_get_pull(chip, gpio, &disable, &pull_up);
if (!disable && !pull_up)
return 0;
else
return -EINVAL;
case PIN_CONFIG_DRIVE_STRENGTH:
- ret = cygnus_gpio_get_strength(chip, gpio, &arg);
+ ret = iproc_gpio_get_strength(chip, gpio, &arg);
if (ret)
return ret;
- else
- *config = pinconf_to_config_packed(param, arg);
+ *config = pinconf_to_config_packed(param, arg);
return 0;
@@ -541,13 +542,13 @@ static int cygnus_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
return -ENOTSUPP;
}
-static int cygnus_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin,
+static int iproc_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin,
unsigned long *configs, unsigned num_configs)
{
- struct cygnus_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
+ struct iproc_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
enum pin_config_param param;
u16 arg;
- unsigned i, gpio = cygnus_pin_to_gpio(pin);
+ unsigned i, gpio = iproc_pin_to_gpio(pin);
int ret = -ENOTSUPP;
for (i = 0; i < num_configs; i++) {
@@ -556,25 +557,25 @@ static int cygnus_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin,
switch (param) {
case PIN_CONFIG_BIAS_DISABLE:
- ret = cygnus_gpio_set_pull(chip, gpio, true, false);
+ ret = iproc_gpio_set_pull(chip, gpio, true, false);
if (ret < 0)
goto out;
break;
case PIN_CONFIG_BIAS_PULL_UP:
- ret = cygnus_gpio_set_pull(chip, gpio, false, true);
+ ret = iproc_gpio_set_pull(chip, gpio, false, true);
if (ret < 0)
goto out;
break;
case PIN_CONFIG_BIAS_PULL_DOWN:
- ret = cygnus_gpio_set_pull(chip, gpio, false, false);
+ ret = iproc_gpio_set_pull(chip, gpio, false, false);
if (ret < 0)
goto out;
break;
case PIN_CONFIG_DRIVE_STRENGTH:
- ret = cygnus_gpio_set_strength(chip, gpio, arg);
+ ret = iproc_gpio_set_strength(chip, gpio, arg);
if (ret < 0)
goto out;
break;
@@ -589,20 +590,20 @@ out:
return ret;
}
-static const struct pinconf_ops cygnus_pconf_ops = {
+static const struct pinconf_ops iproc_pconf_ops = {
.is_generic = true,
- .pin_config_get = cygnus_pin_config_get,
- .pin_config_set = cygnus_pin_config_set,
+ .pin_config_get = iproc_pin_config_get,
+ .pin_config_set = iproc_pin_config_set,
};
/*
- * Cygnus GPIO controller supports some PINCONF related configurations such as
+ * Iproc GPIO controller supports some PINCONF related configurations such as
* pull up, pull down, and drive strength, when the pin is configured to GPIO
*
* Here a local pinctrl device is created with simple 1-to-1 pin mapping to the
* local GPIO pins
*/
-static int cygnus_gpio_register_pinconf(struct cygnus_gpio *chip)
+static int iproc_gpio_register_pinconf(struct iproc_gpio *chip)
{
struct pinctrl_desc *pctldesc = &chip->pctldesc;
struct pinctrl_pin_desc *pins;
@@ -622,10 +623,10 @@ static int cygnus_gpio_register_pinconf(struct cygnus_gpio *chip)
}
pctldesc->name = dev_name(chip->dev);
- pctldesc->pctlops = &cygnus_pctrl_ops;
+ pctldesc->pctlops = &iproc_pctrl_ops;
pctldesc->pins = pins;
pctldesc->npins = gc->ngpio;
- pctldesc->confops = &cygnus_pconf_ops;
+ pctldesc->confops = &iproc_pconf_ops;
chip->pctl = pinctrl_register(pctldesc, chip->dev, chip);
if (IS_ERR(chip->pctl)) {
@@ -636,59 +637,27 @@ static int cygnus_gpio_register_pinconf(struct cygnus_gpio *chip)
return 0;
}
-static void cygnus_gpio_unregister_pinconf(struct cygnus_gpio *chip)
+static void iproc_gpio_unregister_pinconf(struct iproc_gpio *chip)
{
- if (chip->pctl)
- pinctrl_unregister(chip->pctl);
+ pinctrl_unregister(chip->pctl);
}
-struct cygnus_gpio_data {
- unsigned num_gpios;
-};
-
-static const struct cygnus_gpio_data cygnus_cmm_gpio_data = {
- .num_gpios = 24,
-};
-
-static const struct cygnus_gpio_data cygnus_asiu_gpio_data = {
- .num_gpios = 146,
+static const struct of_device_id iproc_gpio_of_match[] = {
+ { .compatible = "brcm,cygnus-ccm-gpio" },
+ { .compatible = "brcm,cygnus-asiu-gpio" },
+ { .compatible = "brcm,cygnus-crmu-gpio" },
+ { .compatible = "brcm,iproc-gpio" },
+ { }
};
-static const struct cygnus_gpio_data cygnus_crmu_gpio_data = {
- .num_gpios = 6,
-};
-
-static const struct of_device_id cygnus_gpio_of_match[] = {
- {
- .compatible = "brcm,cygnus-ccm-gpio",
- .data = &cygnus_cmm_gpio_data,
- },
- {
- .compatible = "brcm,cygnus-asiu-gpio",
- .data = &cygnus_asiu_gpio_data,
- },
- {
- .compatible = "brcm,cygnus-crmu-gpio",
- .data = &cygnus_crmu_gpio_data,
- }
-};
-
-static int cygnus_gpio_probe(struct platform_device *pdev)
+static int iproc_gpio_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *res;
- struct cygnus_gpio *chip;
+ struct iproc_gpio *chip;
struct gpio_chip *gc;
u32 ngpios;
int irq, ret;
- const struct of_device_id *match;
- const struct cygnus_gpio_data *gpio_data;
-
- match = of_match_device(cygnus_gpio_of_match, dev);
- if (!match)
- return -ENODEV;
- gpio_data = match->data;
- ngpios = gpio_data->num_gpios;
chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
if (!chip)
@@ -713,6 +682,11 @@ static int cygnus_gpio_probe(struct platform_device *pdev)
}
}
+ if (of_property_read_u32(dev->of_node, "ngpios", &ngpios)) {
+ dev_err(&pdev->dev, "missing ngpios DT property\n");
+ return -ENODEV;
+ }
+
spin_lock_init(&chip->lock);
gc = &chip->gc;
@@ -722,12 +696,12 @@ static int cygnus_gpio_probe(struct platform_device *pdev)
gc->label = dev_name(dev);
gc->dev = dev;
gc->of_node = dev->of_node;
- gc->request = cygnus_gpio_request;
- gc->free = cygnus_gpio_free;
- gc->direction_input = cygnus_gpio_direction_input;
- gc->direction_output = cygnus_gpio_direction_output;
- gc->set = cygnus_gpio_set;
- gc->get = cygnus_gpio_get;
+ gc->request = iproc_gpio_request;
+ gc->free = iproc_gpio_free;
+ gc->direction_input = iproc_gpio_direction_input;
+ gc->direction_output = iproc_gpio_direction_output;
+ gc->set = iproc_gpio_set;
+ gc->get = iproc_gpio_get;
chip->pinmux_is_supported = of_property_read_bool(dev->of_node,
"gpio-ranges");
@@ -738,7 +712,7 @@ static int cygnus_gpio_probe(struct platform_device *pdev)
return ret;
}
- ret = cygnus_gpio_register_pinconf(chip);
+ ret = iproc_gpio_register_pinconf(chip);
if (ret) {
dev_err(dev, "unable to register pinconf\n");
goto err_rm_gpiochip;
@@ -747,21 +721,21 @@ static int cygnus_gpio_probe(struct platform_device *pdev)
/* optional GPIO interrupt support */
irq = platform_get_irq(pdev, 0);
if (irq) {
- ret = gpiochip_irqchip_add(gc, &cygnus_gpio_irq_chip, 0,
+ ret = gpiochip_irqchip_add(gc, &iproc_gpio_irq_chip, 0,
handle_simple_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(dev, "no GPIO irqchip\n");
goto err_unregister_pinconf;
}
- gpiochip_set_chained_irqchip(gc, &cygnus_gpio_irq_chip, irq,
- cygnus_gpio_irq_handler);
+ gpiochip_set_chained_irqchip(gc, &iproc_gpio_irq_chip, irq,
+ iproc_gpio_irq_handler);
}
return 0;
err_unregister_pinconf:
- cygnus_gpio_unregister_pinconf(chip);
+ iproc_gpio_unregister_pinconf(chip);
err_rm_gpiochip:
gpiochip_remove(gc);
@@ -769,16 +743,16 @@ err_rm_gpiochip:
return ret;
}
-static struct platform_driver cygnus_gpio_driver = {
+static struct platform_driver iproc_gpio_driver = {
.driver = {
- .name = "cygnus-gpio",
- .of_match_table = cygnus_gpio_of_match,
+ .name = "iproc-gpio",
+ .of_match_table = iproc_gpio_of_match,
},
- .probe = cygnus_gpio_probe,
+ .probe = iproc_gpio_probe,
};
-static int __init cygnus_gpio_init(void)
+static int __init iproc_gpio_init(void)
{
- return platform_driver_probe(&cygnus_gpio_driver, cygnus_gpio_probe);
+ return platform_driver_probe(&iproc_gpio_driver, iproc_gpio_probe);
}
-arch_initcall_sync(cygnus_gpio_init);
+arch_initcall_sync(iproc_gpio_init);
diff --git a/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
new file mode 100644
index 000000000000..725c36f917f9
--- /dev/null
+++ b/drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
@@ -0,0 +1,749 @@
+/*
+ * Copyright (C) 2015 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * This file contains the Broadcom Northstar Plus (NSP) GPIO driver that
+ * supports the chipCommonA GPIO controller. Basic PINCONF such as bias,
+ * pull up/down, slew and drive strength are also supported in this driver.
+ *
+ * Pins from the chipCommonA GPIO can be individually muxed to GPIO function,
+ * through the interaction with the NSP IOMUX controller.
+ */
+
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/slab.h>
+
+#include "../pinctrl-utils.h"
+
+#define NSP_CHIP_A_INT_STATUS 0x00
+#define NSP_CHIP_A_INT_MASK 0x04
+#define NSP_GPIO_DATA_IN 0x40
+#define NSP_GPIO_DATA_OUT 0x44
+#define NSP_GPIO_OUT_EN 0x48
+#define NSP_GPIO_INT_POLARITY 0x50
+#define NSP_GPIO_INT_MASK 0x54
+#define NSP_GPIO_EVENT 0x58
+#define NSP_GPIO_EVENT_INT_MASK 0x5c
+#define NSP_GPIO_EVENT_INT_POLARITY 0x64
+#define NSP_CHIP_A_GPIO_INT_BIT 0x01
+
+/* I/O parameters offset for chipcommon A GPIO */
+#define NSP_GPIO_DRV_CTRL 0x00
+#define NSP_GPIO_HYSTERESIS_EN 0x10
+#define NSP_GPIO_SLEW_RATE_EN 0x14
+#define NSP_PULL_UP_EN 0x18
+#define NSP_PULL_DOWN_EN 0x1c
+#define GPIO_DRV_STRENGTH_BITS 0x03
+
+/*
+ * nsp GPIO core
+ *
+ * @dev: pointer to device
+ * @base: I/O register base for nsp GPIO controller
+ * @io_ctrl: I/O register base for PINCONF support outside the GPIO block
+ * @gc: GPIO chip
+ * @pctl: pointer to pinctrl_dev
+ * @pctldesc: pinctrl descriptor
+ * @irq_domain: pointer to irq domain
+ * @lock: lock to protect access to I/O registers
+ */
+struct nsp_gpio {
+ struct device *dev;
+ void __iomem *base;
+ void __iomem *io_ctrl;
+ struct gpio_chip gc;
+ struct pinctrl_dev *pctl;
+ struct pinctrl_desc pctldesc;
+ struct irq_domain *irq_domain;
+ spinlock_t lock;
+};
+
+enum base_type {
+ REG,
+ IO_CTRL
+};
+
+static inline struct nsp_gpio *to_nsp_gpio(struct gpio_chip *gc)
+{
+ return container_of(gc, struct nsp_gpio, gc);
+}
+
+/*
+ * Mapping from PINCONF pins to GPIO pins is 1-to-1
+ */
+static inline unsigned nsp_pin_to_gpio(unsigned pin)
+{
+ return pin;
+}
+
+/*
+ * nsp_set_bit - set or clear one bit (corresponding to the GPIO pin) in a
+ * nsp GPIO register
+ *
+ * @nsp_gpio: nsp GPIO device
+ * @base_type: reg base to modify
+ * @reg: register offset
+ * @gpio: GPIO pin
+ * @set: set or clear
+ */
+static inline void nsp_set_bit(struct nsp_gpio *chip, enum base_type address,
+ unsigned int reg, unsigned gpio, bool set)
+{
+ u32 val;
+ void __iomem *base_address;
+
+ if (address == IO_CTRL)
+ base_address = chip->io_ctrl;
+ else
+ base_address = chip->base;
+
+ val = readl(base_address + reg);
+ if (set)
+ val |= BIT(gpio);
+ else
+ val &= ~BIT(gpio);
+
+ writel(val, base_address + reg);
+}
+
+/*
+ * nsp_get_bit - get one bit (corresponding to the GPIO pin) in a
+ * nsp GPIO register
+ */
+static inline bool nsp_get_bit(struct nsp_gpio *chip, enum base_type address,
+ unsigned int reg, unsigned gpio)
+{
+ if (address == IO_CTRL)
+ return !!(readl(chip->io_ctrl + reg) & BIT(gpio));
+ else
+ return !!(readl(chip->base + reg) & BIT(gpio));
+}
+
+static irqreturn_t nsp_gpio_irq_handler(int irq, void *data)
+{
+ struct nsp_gpio *chip = (struct nsp_gpio *)data;
+ struct gpio_chip gc = chip->gc;
+ int bit;
+ unsigned long int_bits = 0;
+ u32 int_status;
+
+ /* go through the entire GPIOs and handle all interrupts */
+ int_status = readl(chip->base + NSP_CHIP_A_INT_STATUS);
+ if (int_status & NSP_CHIP_A_GPIO_INT_BIT) {
+ unsigned int event, level;
+
+ /* Get level and edge interrupts */
+ event = readl(chip->base + NSP_GPIO_EVENT_INT_MASK) &
+ readl(chip->base + NSP_GPIO_EVENT);
+ level = readl(chip->base + NSP_GPIO_DATA_IN) ^
+ readl(chip->base + NSP_GPIO_INT_POLARITY);
+ level &= readl(chip->base + NSP_GPIO_INT_MASK);
+ int_bits = level | event;
+
+ for_each_set_bit(bit, &int_bits, gc.ngpio) {
+ /*
+ * Clear the interrupt before invoking the
+ * handler, so we do not leave any window
+ */
+ writel(BIT(bit), chip->base + NSP_GPIO_EVENT);
+ generic_handle_irq(
+ irq_linear_revmap(chip->irq_domain, bit));
+ }
+ }
+
+ return int_bits ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void nsp_gpio_irq_ack(struct irq_data *d)
+{
+ struct nsp_gpio *chip = irq_data_get_irq_chip_data(d);
+ unsigned gpio = d->hwirq;
+ u32 val = BIT(gpio);
+ u32 trigger_type;
+
+ trigger_type = irq_get_trigger_type(d->irq);
+ if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
+ nsp_set_bit(chip, REG, NSP_GPIO_EVENT, gpio, val);
+}
+
+/*
+ * nsp_gpio_irq_set_mask - mask/unmask a GPIO interrupt
+ *
+ * @d: IRQ chip data
+ * @unmask: mask/unmask GPIO interrupt
+ */
+static void nsp_gpio_irq_set_mask(struct irq_data *d, bool unmask)
+{
+ struct nsp_gpio *chip = irq_data_get_irq_chip_data(d);
+ unsigned gpio = d->hwirq;
+ u32 trigger_type;
+
+ trigger_type = irq_get_trigger_type(d->irq);
+ if (trigger_type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
+ nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_MASK, gpio, unmask);
+ else
+ nsp_set_bit(chip, REG, NSP_GPIO_INT_MASK, gpio, unmask);
+}
+
+static void nsp_gpio_irq_mask(struct irq_data *d)
+{
+ struct nsp_gpio *chip = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_gpio_irq_set_mask(d, false);
+ spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static void nsp_gpio_irq_unmask(struct irq_data *d)
+{
+ struct nsp_gpio *chip = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_gpio_irq_set_mask(d, true);
+ spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int nsp_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ struct nsp_gpio *chip = irq_data_get_irq_chip_data(d);
+ unsigned gpio = d->hwirq;
+ bool level_low;
+ bool falling;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ falling = nsp_get_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio);
+ level_low = nsp_get_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio);
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_EDGE_RISING:
+ falling = false;
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ falling = true;
+ break;
+
+ case IRQ_TYPE_LEVEL_HIGH:
+ level_low = false;
+ break;
+
+ case IRQ_TYPE_LEVEL_LOW:
+ level_low = true;
+ break;
+
+ default:
+ dev_err(chip->dev, "invalid GPIO IRQ type 0x%x\n",
+ type);
+ spin_unlock_irqrestore(&chip->lock, flags);
+ return -EINVAL;
+ }
+
+ nsp_set_bit(chip, REG, NSP_GPIO_EVENT_INT_POLARITY, gpio, falling);
+ nsp_set_bit(chip, REG, NSP_GPIO_INT_POLARITY, gpio, level_low);
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ dev_dbg(chip->dev, "gpio:%u level_low:%s falling:%s\n", gpio,
+ level_low ? "true" : "false", falling ? "true" : "false");
+ return 0;
+}
+
+static struct irq_chip nsp_gpio_irq_chip = {
+ .name = "gpio-a",
+ .irq_enable = nsp_gpio_irq_unmask,
+ .irq_disable = nsp_gpio_irq_mask,
+ .irq_ack = nsp_gpio_irq_ack,
+ .irq_mask = nsp_gpio_irq_mask,
+ .irq_unmask = nsp_gpio_irq_unmask,
+ .irq_set_type = nsp_gpio_irq_set_type,
+};
+
+/*
+ * Request the nsp IOMUX pinmux controller to mux individual pins to GPIO
+ */
+static int nsp_gpio_request(struct gpio_chip *gc, unsigned offset)
+{
+ unsigned gpio = gc->base + offset;
+
+ return pinctrl_request_gpio(gpio);
+}
+
+static void nsp_gpio_free(struct gpio_chip *gc, unsigned offset)
+{
+ unsigned gpio = gc->base + offset;
+
+ pinctrl_free_gpio(gpio);
+}
+
+static int nsp_gpio_direction_input(struct gpio_chip *gc, unsigned gpio)
+{
+ struct nsp_gpio *chip = to_nsp_gpio(gc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, false);
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ dev_dbg(chip->dev, "gpio:%u set input\n", gpio);
+ return 0;
+}
+
+static int nsp_gpio_direction_output(struct gpio_chip *gc, unsigned gpio,
+ int val)
+{
+ struct nsp_gpio *chip = to_nsp_gpio(gc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_set_bit(chip, REG, NSP_GPIO_OUT_EN, gpio, true);
+ nsp_set_bit(chip, REG, NSP_GPIO_DATA_OUT, gpio, !!(val));
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ dev_dbg(chip->dev, "gpio:%u set output, value:%d\n", gpio, val);
+ return 0;
+}
+
+static void nsp_gpio_set(struct gpio_chip *gc, unsigned gpio, int val)
+{
+ struct nsp_gpio *chip = to_nsp_gpio(gc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_set_bit(chip, REG, NSP_GPIO_DATA_OUT, gpio, !!(val));
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ dev_dbg(chip->dev, "gpio:%u set, value:%d\n", gpio, val);
+}
+
+static int nsp_gpio_get(struct gpio_chip *gc, unsigned gpio)
+{
+ struct nsp_gpio *chip = to_nsp_gpio(gc);
+
+ return !!(readl(chip->base + NSP_GPIO_DATA_IN) & BIT(gpio));
+}
+
+static int nsp_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
+{
+ struct nsp_gpio *chip = to_nsp_gpio(gc);
+
+ return irq_linear_revmap(chip->irq_domain, offset);
+}
+
+static int nsp_get_groups_count(struct pinctrl_dev *pctldev)
+{
+ return 1;
+}
+
+/*
+ * Only one group: "gpio_grp", since this local pinctrl device only performs
+ * GPIO specific PINCONF configurations
+ */
+static const char *nsp_get_group_name(struct pinctrl_dev *pctldev,
+ unsigned selector)
+{
+ return "gpio_grp";
+}
+
+static const struct pinctrl_ops nsp_pctrl_ops = {
+ .get_groups_count = nsp_get_groups_count,
+ .get_group_name = nsp_get_group_name,
+ .dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
+ .dt_free_map = pinctrl_utils_dt_free_map,
+};
+
+static int nsp_gpio_set_slew(struct nsp_gpio *chip, unsigned gpio, u16 slew)
+{
+ if (slew)
+ nsp_set_bit(chip, IO_CTRL, NSP_GPIO_SLEW_RATE_EN, gpio, true);
+ else
+ nsp_set_bit(chip, IO_CTRL, NSP_GPIO_SLEW_RATE_EN, gpio, false);
+
+ return 0;
+}
+
+static int nsp_gpio_set_pull(struct nsp_gpio *chip, unsigned gpio,
+ bool pull_up, bool pull_down)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ nsp_set_bit(chip, IO_CTRL, NSP_PULL_DOWN_EN, gpio, pull_down);
+ nsp_set_bit(chip, IO_CTRL, NSP_PULL_UP_EN, gpio, pull_up);
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ dev_dbg(chip->dev, "gpio:%u set pullup:%d pulldown: %d\n",
+ gpio, pull_up, pull_down);
+ return 0;
+}
+
+static void nsp_gpio_get_pull(struct nsp_gpio *chip, unsigned gpio,
+ bool *pull_up, bool *pull_down)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ *pull_up = nsp_get_bit(chip, IO_CTRL, NSP_PULL_UP_EN, gpio);
+ *pull_down = nsp_get_bit(chip, IO_CTRL, NSP_PULL_DOWN_EN, gpio);
+ spin_unlock_irqrestore(&chip->lock, flags);
+}
+
+static int nsp_gpio_set_strength(struct nsp_gpio *chip, unsigned gpio,
+ u16 strength)
+{
+ u32 offset, shift, i;
+ u32 val;
+ unsigned long flags;
+
+ /* make sure drive strength is supported */
+ if (strength < 2 || strength > 16 || (strength % 2))
+ return -ENOTSUPP;
+
+ shift = gpio;
+ offset = NSP_GPIO_DRV_CTRL;
+ dev_dbg(chip->dev, "gpio:%u set drive strength:%d mA\n", gpio,
+ strength);
+ spin_lock_irqsave(&chip->lock, flags);
+ strength = (strength / 2) - 1;
+ for (i = GPIO_DRV_STRENGTH_BITS; i > 0; i--) {
+ val = readl(chip->io_ctrl + offset);
+ val &= ~BIT(shift);
+ val |= ((strength >> (i-1)) & 0x1) << shift;
+ writel(val, chip->io_ctrl + offset);
+ offset += 4;
+ }
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ return 0;
+}
+
+static int nsp_gpio_get_strength(struct nsp_gpio *chip, unsigned gpio,
+ u16 *strength)
+{
+ unsigned int offset, shift;
+ u32 val;
+ unsigned long flags;
+ int i;
+
+ offset = NSP_GPIO_DRV_CTRL;
+ shift = gpio;
+
+ spin_lock_irqsave(&chip->lock, flags);
+ *strength = 0;
+ for (i = (GPIO_DRV_STRENGTH_BITS - 1); i >= 0; i--) {
+ val = readl(chip->io_ctrl + offset) & BIT(shift);
+ val >>= shift;
+ *strength += (val << i);
+ offset += 4;
+ }
+
+ /* convert to mA */
+ *strength = (*strength + 1) * 2;
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ return 0;
+}
+
+int nsp_pin_config_group_get(struct pinctrl_dev *pctldev, unsigned selector,
+ unsigned long *config)
+{
+ return 0;
+}
+
+int nsp_pin_config_group_set(struct pinctrl_dev *pctldev, unsigned selector,
+ unsigned long *configs, unsigned num_configs)
+{
+ return 0;
+}
+
+static int nsp_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
+ unsigned long *config)
+{
+ struct nsp_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
+ enum pin_config_param param = pinconf_to_config_param(*config);
+ unsigned int gpio;
+ u16 arg = 0;
+ bool pull_up, pull_down;
+ int ret;
+
+ gpio = nsp_pin_to_gpio(pin);
+ switch (param) {
+ case PIN_CONFIG_BIAS_DISABLE:
+ nsp_gpio_get_pull(chip, gpio, &pull_up, &pull_down);
+ if ((pull_up == false) && (pull_down == false))
+ return 0;
+ else
+ return -EINVAL;
+
+ case PIN_CONFIG_BIAS_PULL_UP:
+ nsp_gpio_get_pull(chip, gpio, &pull_up, &pull_down);
+ if (pull_up)
+ return 0;
+ else
+ return -EINVAL;
+
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+ nsp_gpio_get_pull(chip, gpio, &pull_up, &pull_down);
+ if (pull_down)
+ return 0;
+ else
+ return -EINVAL;
+
+ case PIN_CONFIG_DRIVE_STRENGTH:
+ ret = nsp_gpio_get_strength(chip, gpio, &arg);
+ if (ret)
+ return ret;
+ *config = pinconf_to_config_packed(param, arg);
+ return 0;
+
+ default:
+ return -ENOTSUPP;
+ }
+}
+
+static int nsp_pin_config_set(struct pinctrl_dev *pctldev, unsigned pin,
+ unsigned long *configs, unsigned num_configs)
+{
+ struct nsp_gpio *chip = pinctrl_dev_get_drvdata(pctldev);
+ enum pin_config_param param;
+ u16 arg;
+ unsigned int i, gpio;
+ int ret = -ENOTSUPP;
+
+ gpio = nsp_pin_to_gpio(pin);
+ for (i = 0; i < num_configs; i++) {
+ param = pinconf_to_config_param(configs[i]);
+ arg = pinconf_to_config_argument(configs[i]);
+
+ switch (param) {
+ case PIN_CONFIG_BIAS_DISABLE:
+ ret = nsp_gpio_set_pull(chip, gpio, false, false);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case PIN_CONFIG_BIAS_PULL_UP:
+ ret = nsp_gpio_set_pull(chip, gpio, true, false);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+ ret = nsp_gpio_set_pull(chip, gpio, false, true);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case PIN_CONFIG_DRIVE_STRENGTH:
+ ret = nsp_gpio_set_strength(chip, gpio, arg);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case PIN_CONFIG_SLEW_RATE:
+ ret = nsp_gpio_set_slew(chip, gpio, arg);
+ if (ret < 0)
+ goto out;
+ break;
+
+ default:
+ dev_err(chip->dev, "invalid configuration\n");
+ return -ENOTSUPP;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static const struct pinconf_ops nsp_pconf_ops = {
+ .is_generic = true,
+ .pin_config_get = nsp_pin_config_get,
+ .pin_config_set = nsp_pin_config_set,
+ .pin_config_group_get = nsp_pin_config_group_get,
+ .pin_config_group_set = nsp_pin_config_group_set,
+};
+
+/*
+ * NSP GPIO controller supports some PINCONF related configurations such as
+ * pull up, pull down, slew and drive strength, when the pin is configured
+ * to GPIO.
+ *
+ * Here a local pinctrl device is created with simple 1-to-1 pin mapping to the
+ * local GPIO pins
+ */
+static int nsp_gpio_register_pinconf(struct nsp_gpio *chip)
+{
+ struct pinctrl_desc *pctldesc = &chip->pctldesc;
+ struct pinctrl_pin_desc *pins;
+ struct gpio_chip *gc = &chip->gc;
+ int i;
+
+ pins = devm_kcalloc(chip->dev, gc->ngpio, sizeof(*pins), GFP_KERNEL);
+ if (!pins)
+ return -ENOMEM;
+ for (i = 0; i < gc->ngpio; i++) {
+ pins[i].number = i;
+ pins[i].name = devm_kasprintf(chip->dev, GFP_KERNEL,
+ "gpio-%d", i);
+ if (!pins[i].name)
+ return -ENOMEM;
+ }
+ pctldesc->name = dev_name(chip->dev);
+ pctldesc->pctlops = &nsp_pctrl_ops;
+ pctldesc->pins = pins;
+ pctldesc->npins = gc->ngpio;
+ pctldesc->confops = &nsp_pconf_ops;
+
+ chip->pctl = pinctrl_register(pctldesc, chip->dev, chip);
+ if (IS_ERR(chip->pctl)) {
+ dev_err(chip->dev, "unable to register pinctrl device\n");
+ return PTR_ERR(chip->pctl);
+ }
+
+ return 0;
+}
+
+static const struct of_device_id nsp_gpio_of_match[] = {
+ {.compatible = "brcm,nsp-gpio-a",},
+ {}
+};
+
+static int nsp_gpio_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ struct nsp_gpio *chip;
+ struct gpio_chip *gc;
+ u32 val, count;
+ int irq, ret;
+
+ if (of_property_read_u32(pdev->dev.of_node, "ngpios", &val)) {
+ dev_err(&pdev->dev, "Missing ngpios OF property\n");
+ return -ENODEV;
+ }
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->dev = dev;
+ platform_set_drvdata(pdev, chip);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ chip->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(chip->base)) {
+ dev_err(dev, "unable to map I/O memory\n");
+ return PTR_ERR(chip->base);
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ chip->io_ctrl = devm_ioremap_resource(dev, res);
+ if (IS_ERR(chip->io_ctrl)) {
+ dev_err(dev, "unable to map I/O memory\n");
+ return PTR_ERR(chip->io_ctrl);
+ }
+
+ spin_lock_init(&chip->lock);
+ gc = &chip->gc;
+ gc->base = -1;
+ gc->can_sleep = false;
+ gc->ngpio = val;
+ gc->label = dev_name(dev);
+ gc->dev = dev;
+ gc->of_node = dev->of_node;
+ gc->request = nsp_gpio_request;
+ gc->free = nsp_gpio_free;
+ gc->direction_input = nsp_gpio_direction_input;
+ gc->direction_output = nsp_gpio_direction_output;
+ gc->set = nsp_gpio_set;
+ gc->get = nsp_gpio_get;
+ gc->to_irq = nsp_gpio_to_irq;
+
+ /* optional GPIO interrupt support */
+ irq = platform_get_irq(pdev, 0);
+ if (irq > 0) {
+ /* Create irq domain so that each pin can be assigned an IRQ.*/
+ chip->irq_domain = irq_domain_add_linear(gc->of_node, gc->ngpio,
+ &irq_domain_simple_ops,
+ chip);
+ if (!chip->irq_domain) {
+ dev_err(&pdev->dev, "Couldn't allocate IRQ domain\n");
+ return -ENXIO;
+ }
+
+ /* Map each gpio to an IRQ and set the handler for gpiolib. */
+ for (count = 0; count < gc->ngpio; count++) {
+ int irq = irq_create_mapping(chip->irq_domain, count);
+
+ irq_set_chip_and_handler(irq, &nsp_gpio_irq_chip,
+ handle_simple_irq);
+ irq_set_chip_data(irq, chip);
+ }
+
+ /* Install ISR for this GPIO controller. */
+ ret = devm_request_irq(&pdev->dev, irq, nsp_gpio_irq_handler,
+ IRQF_SHARED, "gpio-a", chip);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to request IRQ%d: %d\n",
+ irq, ret);
+ goto err_rm_gpiochip;
+ }
+
+ val = readl(chip->base + NSP_CHIP_A_INT_MASK);
+ val = val | NSP_CHIP_A_GPIO_INT_BIT;
+ writel(val, (chip->base + NSP_CHIP_A_INT_MASK));
+ }
+
+ ret = gpiochip_add(gc);
+ if (ret < 0) {
+ dev_err(dev, "unable to add GPIO chip\n");
+ return ret;
+ }
+
+ ret = nsp_gpio_register_pinconf(chip);
+ if (ret) {
+ dev_err(dev, "unable to register pinconf\n");
+ goto err_rm_gpiochip;
+ }
+
+ return 0;
+
+err_rm_gpiochip:
+ gpiochip_remove(gc);
+
+ return ret;
+}
+
+static struct platform_driver nsp_gpio_driver = {
+ .driver = {
+ .name = "nsp-gpio-a",
+ .of_match_table = nsp_gpio_of_match,
+ },
+ .probe = nsp_gpio_probe,
+};
+
+static int __init nsp_gpio_init(void)
+{
+ return platform_driver_probe(&nsp_gpio_driver, nsp_gpio_probe);
+}
+arch_initcall_sync(nsp_gpio_init);
diff --git a/drivers/pinctrl/berlin/Makefile b/drivers/pinctrl/berlin/Makefile
index 06f94029ad66..6f641ce2c830 100644
--- a/drivers/pinctrl/berlin/Makefile
+++ b/drivers/pinctrl/berlin/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_PINCTRL_BERLIN) += berlin.o
+obj-y += berlin.o
obj-$(CONFIG_PINCTRL_BERLIN_BG2) += berlin-bg2.o
obj-$(CONFIG_PINCTRL_BERLIN_BG2CD) += berlin-bg2cd.o
obj-$(CONFIG_PINCTRL_BERLIN_BG2Q) += berlin-bg2q.o
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8127.c b/drivers/pinctrl/mediatek/pinctrl-mt8127.c
index b317b0b664ea..98e0bebfdf92 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8127.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8127.c
@@ -351,7 +351,7 @@ static int __init mtk_pinctrl_init(void)
return platform_driver_register(&mtk_pinctrl_driver);
}
-module_init(mtk_pinctrl_init);
+arch_initcall(mtk_pinctrl_init);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MediaTek MT8127 Pinctrl Driver");
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8135.c b/drivers/pinctrl/mediatek/pinctrl-mt8135.c
index 404f1178511d..1c153b860f36 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8135.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8135.c
@@ -366,7 +366,7 @@ static int __init mtk_pinctrl_init(void)
return platform_driver_register(&mtk_pinctrl_driver);
}
-module_init(mtk_pinctrl_init);
+arch_initcall(mtk_pinctrl_init);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MediaTek Pinctrl Driver");
diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8173.c b/drivers/pinctrl/mediatek/pinctrl-mt8173.c
index ad271840d865..a62514eb2129 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt8173.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt8173.c
@@ -394,7 +394,7 @@ static int __init mtk_pinctrl_init(void)
return platform_driver_register(&mtk_pinctrl_driver);
}
-module_init(mtk_pinctrl_init);
+arch_initcall(mtk_pinctrl_init);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MediaTek Pinctrl Driver");
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 5c717275a7fa..e22cbaf9f9cf 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -509,6 +509,9 @@ static int mtk_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
err = pinconf_generic_parse_dt_config(node, pctldev, &configs,
&num_configs);
+ if (err)
+ return err;
+
if (num_configs)
has_config = 1;
@@ -520,21 +523,23 @@ static int mtk_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
if (has_config && num_pins >= 1)
maps_per_pin++;
- if (!num_pins || !maps_per_pin)
- return -EINVAL;
+ if (!num_pins || !maps_per_pin) {
+ err = -EINVAL;
+ goto exit;
+ }
reserve = num_pins * maps_per_pin;
err = pinctrl_utils_reserve_map(pctldev, map,
reserved_maps, num_maps, reserve);
if (err < 0)
- goto fail;
+ goto exit;
for (i = 0; i < num_pins; i++) {
err = of_property_read_u32_index(node, "pinmux",
i, &pinfunc);
if (err)
- goto fail;
+ goto exit;
pin = MTK_GET_PIN_NO(pinfunc);
func = MTK_GET_PIN_FUNC(pinfunc);
@@ -543,20 +548,21 @@ static int mtk_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
func >= ARRAY_SIZE(mtk_gpio_functions)) {
dev_err(pctl->dev, "invalid pins value.\n");
err = -EINVAL;
- goto fail;
+ goto exit;
}
grp = mtk_pctrl_find_group_by_pin(pctl, pin);
if (!grp) {
dev_err(pctl->dev, "unable to match pin %d to group\n",
pin);
- return -EINVAL;
+ err = -EINVAL;
+ goto exit;
}
err = mtk_pctrl_dt_node_to_map_func(pctl, pin, func, grp, map,
reserved_maps, num_maps);
if (err < 0)
- goto fail;
+ goto exit;
if (has_config) {
err = pinctrl_utils_add_map_configs(pctldev, map,
@@ -564,13 +570,14 @@ static int mtk_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
configs, num_configs,
PIN_MAP_TYPE_CONFIGS_GROUP);
if (err < 0)
- goto fail;
+ goto exit;
}
}
- return 0;
+ err = 0;
-fail:
+exit:
+ kfree(configs);
return err;
}
@@ -591,6 +598,7 @@ static int mtk_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev,
&reserved_maps, num_maps);
if (ret < 0) {
pinctrl_utils_dt_free_map(pctldev, *map, *num_maps);
+ of_node_put(np);
return ret;
}
}
diff --git a/drivers/pinctrl/mvebu/Makefile b/drivers/pinctrl/mvebu/Makefile
index 554d8af14eeb..18270cd5ea43 100644
--- a/drivers/pinctrl/mvebu/Makefile
+++ b/drivers/pinctrl/mvebu/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_PINCTRL_MVEBU) += pinctrl-mvebu.o
+obj-y += pinctrl-mvebu.o
obj-$(CONFIG_PINCTRL_DOVE) += pinctrl-dove.o
obj-$(CONFIG_PINCTRL_KIRKWOOD) += pinctrl-kirkwood.o
obj-$(CONFIG_PINCTRL_ARMADA_370) += pinctrl-armada-370.o
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index 77d2221d379d..e4d473811bb3 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -663,28 +663,20 @@ int mvebu_pinctrl_probe(struct platform_device *pdev)
/* assign mpp modes to groups */
for (n = 0; n < soc->nmodes; n++) {
struct mvebu_mpp_mode *mode = &soc->modes[n];
- struct mvebu_pinctrl_group *grp =
- mvebu_pinctrl_find_group_by_pid(pctl, mode->pid);
+ struct mvebu_mpp_ctrl_setting *set = &mode->settings[0];
+ struct mvebu_pinctrl_group *grp;
unsigned num_settings;
- if (!grp) {
- dev_warn(&pdev->dev, "unknown pinctrl group %d\n",
- mode->pid);
- continue;
- }
-
- for (num_settings = 0; ;) {
- struct mvebu_mpp_ctrl_setting *set =
- &mode->settings[num_settings];
-
+ for (num_settings = 0; ; set++) {
if (!set->name)
break;
- num_settings++;
/* skip unsupported settings for this variant */
if (pctl->variant && !(pctl->variant & set->variant))
continue;
+ num_settings++;
+
/* find gpio/gpo/gpi settings */
if (strcmp(set->name, "gpio") == 0)
set->flags = MVEBU_SETTING_GPI |
@@ -695,6 +687,17 @@ int mvebu_pinctrl_probe(struct platform_device *pdev)
set->flags = MVEBU_SETTING_GPI;
}
+ /* skip modes with no settings for this variant */
+ if (!num_settings)
+ continue;
+
+ grp = mvebu_pinctrl_find_group_by_pid(pctl, mode->pid);
+ if (!grp) {
+ dev_warn(&pdev->dev, "unknown pinctrl group %d\n",
+ mode->pid);
+ continue;
+ }
+
grp->settings = mode->settings;
grp->num_settings = num_settings;
}
diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 099a3442ff42..79e6159712c2 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -220,6 +220,7 @@ static void parse_dt_cfg(struct device_node *np,
* parse the config properties into generic pinconfig values.
* @np: node containing the pinconfig properties
* @configs: array with nconfigs entries containing the generic pinconf values
+ * must be freed when no longer necessary.
* @nconfigs: umber of configurations
*/
int pinconf_generic_parse_dt_config(struct device_node *np,
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index fd342dffe4dc..8e9e8eab59ba 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -1102,32 +1102,24 @@ static struct platform_driver adi_gpio_driver = {
},
};
+static struct platform_driver * const drivers[] = {
+ &adi_pinctrl_driver,
+ &adi_gpio_pint_driver,
+ &adi_gpio_driver,
+};
+
static int __init adi_pinctrl_setup(void)
{
int ret;
- ret = platform_driver_register(&adi_pinctrl_driver);
+ ret = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
if (ret)
return ret;
- ret = platform_driver_register(&adi_gpio_pint_driver);
- if (ret)
- goto pint_error;
-
- ret = platform_driver_register(&adi_gpio_driver);
- if (ret)
- goto gpio_error;
-
#ifdef CONFIG_PM
register_syscore_ops(&gpio_pm_syscore_ops);
#endif
- return ret;
-gpio_error:
- platform_driver_unregister(&adi_gpio_pint_driver);
-pint_error:
- platform_driver_unregister(&adi_pinctrl_driver);
-
- return ret;
+ return 0;
}
arch_initcall(adi_pinctrl_setup);
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 33edd07d9149..d5bdcebc6aa6 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -500,7 +500,8 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
if (!num_pins) {
dev_err(pctldev->dev, "no pins found in node %s\n",
of_node_full_name(np));
- return -EINVAL;
+ ret = -EINVAL;
+ goto exit;
}
/*
@@ -514,19 +515,19 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps,
reserve);
if (ret < 0)
- return ret;
+ goto exit;
for (i = 0; i < num_pins; i++) {
const char *group, *func;
ret = of_property_read_u32_index(np, "pinmux", i, &pinfunc);
if (ret)
- return ret;
+ goto exit;
ret = atmel_pctl_xlate_pinfunc(pctldev, np, pinfunc, &group,
&func);
if (ret)
- return ret;
+ goto exit;
pinctrl_utils_add_map_mux(pctldev, map, reserved_maps, num_maps,
group, func);
@@ -537,11 +538,13 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
configs, num_configs,
PIN_MAP_TYPE_CONFIGS_GROUP);
if (ret < 0)
- return ret;
+ goto exit;
}
}
- return 0;
+exit:
+ kfree(configs);
+ return ret;
}
static int atmel_pctl_dt_node_to_map(struct pinctrl_dev *pctldev,
@@ -1000,7 +1003,7 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
atmel_pioctrl->irqs[i] = res->start;
irq_set_chained_handler(res->start, atmel_gpio_irq_handler);
irq_set_handler_data(res->start, atmel_pioctrl);
- dev_dbg(dev, "bank %i: hwirq=%u\n", i, res->start);
+ dev_dbg(dev, "bank %i: irq=%pr\n", i, res);
}
atmel_pioctrl->irq_domain = irq_domain_add_linear(dev->of_node,
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 0d2fc0cff35e..47b625b1b789 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1828,20 +1828,20 @@ static struct platform_driver at91_pinctrl_driver = {
.remove = at91_pinctrl_remove,
};
+static struct platform_driver * const drivers[] = {
+ &at91_gpio_driver,
+ &at91_pinctrl_driver,
+};
+
static int __init at91_pinctrl_init(void)
{
- int ret;
-
- ret = platform_driver_register(&at91_gpio_driver);
- if (ret)
- return ret;
- return platform_driver_register(&at91_pinctrl_driver);
+ return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
}
arch_initcall(at91_pinctrl_init);
static void __exit at91_pinctrl_exit(void)
{
- platform_driver_unregister(&at91_pinctrl_driver);
+ platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
}
module_exit(at91_pinctrl_exit);
diff --git a/drivers/pinctrl/pinctrl-lantiq.h b/drivers/pinctrl/pinctrl-lantiq.h
index eb89ba045228..e137d139e494 100644
--- a/drivers/pinctrl/pinctrl-lantiq.h
+++ b/drivers/pinctrl/pinctrl-lantiq.h
@@ -162,6 +162,14 @@ enum ltq_pin {
GPIO53,
GPIO54,
GPIO55,
+ GPIO56,
+ GPIO57,
+ GPIO58,
+ GPIO59,
+ GPIO60, /* 60 */
+ GPIO61,
+ GPIO62,
+ GPIO63,
GPIO64,
GPIO65,
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index a0651128e23a..91288265e856 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -614,6 +614,40 @@ static void rk3288_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
}
}
+#define RK3228_PULL_OFFSET 0x100
+
+static void rk3228_calc_pull_reg_and_bit(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit)
+{
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+ *regmap = info->regmap_base;
+ *reg = RK3228_PULL_OFFSET;
+ *reg += bank->bank_num * RK3188_PULL_BANK_STRIDE;
+ *reg += ((pin_num / RK3188_PULL_PINS_PER_REG) * 4);
+
+ *bit = (pin_num % RK3188_PULL_PINS_PER_REG);
+ *bit *= RK3188_PULL_BITS_PER_PIN;
+}
+
+#define RK3228_DRV_GRF_OFFSET 0x200
+
+static void rk3228_calc_drv_reg_and_bit(struct rockchip_pin_bank *bank,
+ int pin_num, struct regmap **regmap,
+ int *reg, u8 *bit)
+{
+ struct rockchip_pinctrl *info = bank->drvdata;
+
+ *regmap = info->regmap_base;
+ *reg = RK3228_DRV_GRF_OFFSET;
+ *reg += bank->bank_num * RK3288_DRV_BANK_STRIDE;
+ *reg += ((pin_num / RK3288_DRV_PINS_PER_REG) * 4);
+
+ *bit = (pin_num % RK3288_DRV_PINS_PER_REG);
+ *bit *= RK3288_DRV_BITS_PER_PIN;
+}
+
#define RK3368_PULL_GRF_OFFSET 0x100
#define RK3368_PULL_PMU_OFFSET 0x10
@@ -1258,8 +1292,10 @@ static int rockchip_pinctrl_parse_functions(struct device_node *np,
func->groups[i] = child->name;
grp = &info->groups[grp_index++];
ret = rockchip_pinctrl_parse_groups(child, grp, info, i++);
- if (ret)
+ if (ret) {
+ of_node_put(child);
return ret;
+ }
}
return 0;
@@ -1304,6 +1340,7 @@ static int rockchip_pinctrl_parse_dt(struct platform_device *pdev,
ret = rockchip_pinctrl_parse_functions(child, info, i++);
if (ret) {
dev_err(&pdev->dev, "failed to parse function\n");
+ of_node_put(child);
return ret;
}
}
@@ -2143,6 +2180,23 @@ static struct rockchip_pin_ctrl rk3188_pin_ctrl = {
.pull_calc_reg = rk3188_calc_pull_reg_and_bit,
};
+static struct rockchip_pin_bank rk3228_pin_banks[] = {
+ PIN_BANK(0, 32, "gpio0"),
+ PIN_BANK(1, 32, "gpio1"),
+ PIN_BANK(2, 32, "gpio2"),
+ PIN_BANK(3, 32, "gpio3"),
+};
+
+static struct rockchip_pin_ctrl rk3228_pin_ctrl = {
+ .pin_banks = rk3228_pin_banks,
+ .nr_banks = ARRAY_SIZE(rk3228_pin_banks),
+ .label = "RK3228-GPIO",
+ .type = RK3288,
+ .grf_mux_offset = 0x0,
+ .pull_calc_reg = rk3228_calc_pull_reg_and_bit,
+ .drv_calc_reg = rk3228_calc_drv_reg_and_bit,
+};
+
static struct rockchip_pin_bank rk3288_pin_banks[] = {
PIN_BANK_IOMUX_FLAGS(0, 24, "gpio0", IOMUX_SOURCE_PMU,
IOMUX_SOURCE_PMU,
@@ -2220,6 +2274,8 @@ static const struct of_device_id rockchip_pinctrl_dt_match[] = {
.data = (void *)&rk3066b_pin_ctrl },
{ .compatible = "rockchip,rk3188-pinctrl",
.data = (void *)&rk3188_pin_ctrl },
+ { .compatible = "rockchip,rk3228-pinctrl",
+ .data = (void *)&rk3228_pin_ctrl },
{ .compatible = "rockchip,rk3288-pinctrl",
.data = (void *)&rk3288_pin_ctrl },
{ .compatible = "rockchip,rk3368-pinctrl",
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index ef04b962c3d5..d24e5f1d1525 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1484,10 +1484,7 @@ static void pcs_irq_free(struct pcs_device *pcs)
static void pcs_free_resources(struct pcs_device *pcs)
{
pcs_irq_free(pcs);
-
- if (pcs->pctl)
- pinctrl_unregister(pcs->pctl);
-
+ pinctrl_unregister(pcs->pctl);
pcs_free_funcs(pcs);
pcs_free_pingroups(pcs);
}
diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c
index 84a43e612952..bd3aa5a4fd6d 100644
--- a/drivers/pinctrl/pinctrl-tegra-xusb.c
+++ b/drivers/pinctrl/pinctrl-tegra-xusb.c
@@ -253,8 +253,10 @@ static int tegra_xusb_padctl_dt_node_to_map(struct pinctrl_dev *pinctrl,
err = tegra_xusb_padctl_parse_subnode(padctl, np, maps,
&reserved_maps,
num_maps);
- if (err < 0)
+ if (err < 0) {
+ of_node_put(np);
return err;
+ }
}
return 0;
diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index 0fd7fd2b0f72..9da4da219a07 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -217,6 +217,7 @@ static int tegra_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev,
if (ret < 0) {
pinctrl_utils_dt_free_map(pctldev, *map,
*num_maps);
+ of_node_put(np);
return ret;
}
}
diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c
index ae724bdab3d3..7db74699fda4 100644
--- a/drivers/pinctrl/pinctrl-xway.c
+++ b/drivers/pinctrl/pinctrl-xway.c
@@ -7,6 +7,7 @@
* publishhed by the Free Software Foundation.
*
* Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2015 Martin Schiller <mschiller@tdt.de>
*/
#include <linux/err.h>
@@ -24,7 +25,7 @@
#include <lantiq_soc.h>
-/* we have 3 1/2 banks of 16 bit each */
+/* we have up to 4 banks of 16 bit each */
#define PINS 16
#define PORT3 3
#define PORT(x) (x / PINS)
@@ -35,7 +36,7 @@
#define MUX_ALT1 0x2
/*
- * each bank has this offset apart from the 1/2 bank that is mixed into the
+ * each bank has this offset apart from the 4th bank that is mixed into the
* other 3 ranges
*/
#define REG_OFF 0x30
@@ -51,7 +52,7 @@
#define GPIO_PUDSEL(p) (GPIO_BASE(p) + 0x1c)
#define GPIO_PUDEN(p) (GPIO_BASE(p) + 0x20)
-/* the 1/2 port needs special offsets for some registers */
+/* the 4th port needs special offsets for some registers */
#define GPIO3_OD (GPIO_BASE(0) + 0x24)
#define GPIO3_PUDSEL (GPIO_BASE(0) + 0x28)
#define GPIO3_PUDEN (GPIO_BASE(0) + 0x2C)
@@ -80,17 +81,18 @@
#define FUNC_MUX(f, m) \
{ .func = f, .mux = XWAY_MUX_##m, }
-#define XWAY_MAX_PIN 32
-#define XR9_MAX_PIN 56
-
enum xway_mux {
XWAY_MUX_GPIO = 0,
XWAY_MUX_SPI,
XWAY_MUX_ASC,
+ XWAY_MUX_USIF,
XWAY_MUX_PCI,
+ XWAY_MUX_CBUS,
XWAY_MUX_CGU,
XWAY_MUX_EBU,
+ XWAY_MUX_EBU2,
XWAY_MUX_JTAG,
+ XWAY_MUX_MCD,
XWAY_MUX_EXIN,
XWAY_MUX_TDM,
XWAY_MUX_STP,
@@ -103,9 +105,15 @@ enum xway_mux {
XWAY_MUX_DFE,
XWAY_MUX_SDIO,
XWAY_MUX_GPHY,
+ XWAY_MUX_SSI,
+ XWAY_MUX_WIFI,
XWAY_MUX_NONE = 0xffff,
};
+/* --------- DEPRECATED: xr9 related code --------- */
+/* ---------- use xrx100/xrx200 instead ---------- */
+#define XR9_MAX_PIN 56
+
static const struct ltq_mfp_pin xway_mfp[] = {
/* pin f0 f1 f2 f3 */
MFP_XWAY(GPIO0, GPIO, EXIN, NONE, TDM),
@@ -113,7 +121,7 @@ static const struct ltq_mfp_pin xway_mfp[] = {
MFP_XWAY(GPIO2, GPIO, CGU, EXIN, GPHY),
MFP_XWAY(GPIO3, GPIO, CGU, NONE, PCI),
MFP_XWAY(GPIO4, GPIO, STP, NONE, ASC),
- MFP_XWAY(GPIO5, GPIO, STP, NONE, GPHY),
+ MFP_XWAY(GPIO5, GPIO, STP, GPHY, NONE),
MFP_XWAY(GPIO6, GPIO, STP, GPT, ASC),
MFP_XWAY(GPIO7, GPIO, CGU, PCI, GPHY),
MFP_XWAY(GPIO8, GPIO, CGU, NMI, NONE),
@@ -152,10 +160,10 @@ static const struct ltq_mfp_pin xway_mfp[] = {
MFP_XWAY(GPIO41, GPIO, NONE, NONE, NONE),
MFP_XWAY(GPIO42, GPIO, MDIO, NONE, NONE),
MFP_XWAY(GPIO43, GPIO, MDIO, NONE, NONE),
- MFP_XWAY(GPIO44, GPIO, NONE, GPHY, SIN),
+ MFP_XWAY(GPIO44, GPIO, MII, SIN, GPHY),
MFP_XWAY(GPIO45, GPIO, NONE, GPHY, SIN),
MFP_XWAY(GPIO46, GPIO, NONE, NONE, EXIN),
- MFP_XWAY(GPIO47, GPIO, NONE, GPHY, SIN),
+ MFP_XWAY(GPIO47, GPIO, MII, GPHY, SIN),
MFP_XWAY(GPIO48, GPIO, EBU, NONE, NONE),
MFP_XWAY(GPIO49, GPIO, EBU, NONE, NONE),
MFP_XWAY(GPIO50, GPIO, NONE, NONE, NONE),
@@ -166,42 +174,6 @@ static const struct ltq_mfp_pin xway_mfp[] = {
MFP_XWAY(GPIO55, GPIO, NONE, NONE, NONE),
};
-static const struct ltq_mfp_pin ase_mfp[] = {
- /* pin f0 f1 f2 f3 */
- MFP_XWAY(GPIO0, GPIO, EXIN, MII, TDM),
- MFP_XWAY(GPIO1, GPIO, STP, DFE, EBU),
- MFP_XWAY(GPIO2, GPIO, STP, DFE, EPHY),
- MFP_XWAY(GPIO3, GPIO, STP, EPHY, EBU),
- MFP_XWAY(GPIO4, GPIO, GPT, EPHY, MII),
- MFP_XWAY(GPIO5, GPIO, MII, ASC, GPT),
- MFP_XWAY(GPIO6, GPIO, MII, ASC, EXIN),
- MFP_XWAY(GPIO7, GPIO, SPI, MII, JTAG),
- MFP_XWAY(GPIO8, GPIO, SPI, MII, JTAG),
- MFP_XWAY(GPIO9, GPIO, SPI, MII, JTAG),
- MFP_XWAY(GPIO10, GPIO, SPI, MII, JTAG),
- MFP_XWAY(GPIO11, GPIO, EBU, CGU, JTAG),
- MFP_XWAY(GPIO12, GPIO, EBU, MII, SDIO),
- MFP_XWAY(GPIO13, GPIO, EBU, MII, CGU),
- MFP_XWAY(GPIO14, GPIO, EBU, SPI, CGU),
- MFP_XWAY(GPIO15, GPIO, EBU, SPI, SDIO),
- MFP_XWAY(GPIO16, GPIO, NONE, NONE, NONE),
- MFP_XWAY(GPIO17, GPIO, NONE, NONE, NONE),
- MFP_XWAY(GPIO18, GPIO, NONE, NONE, NONE),
- MFP_XWAY(GPIO19, GPIO, EBU, MII, SDIO),
- MFP_XWAY(GPIO20, GPIO, EBU, MII, SDIO),
- MFP_XWAY(GPIO21, GPIO, EBU, MII, SDIO),
- MFP_XWAY(GPIO22, GPIO, EBU, MII, CGU),
- MFP_XWAY(GPIO23, GPIO, EBU, MII, CGU),
- MFP_XWAY(GPIO24, GPIO, EBU, NONE, MII),
- MFP_XWAY(GPIO25, GPIO, EBU, MII, GPT),
- MFP_XWAY(GPIO26, GPIO, EBU, MII, SDIO),
- MFP_XWAY(GPIO27, GPIO, EBU, NONE, MII),
- MFP_XWAY(GPIO28, GPIO, MII, EBU, SDIO),
- MFP_XWAY(GPIO29, GPIO, EBU, MII, EXIN),
- MFP_XWAY(GPIO30, GPIO, NONE, NONE, NONE),
- MFP_XWAY(GPIO31, GPIO, NONE, NONE, NONE),
-};
-
static const unsigned pins_jtag[] = {GPIO15, GPIO16, GPIO17, GPIO19, GPIO35};
static const unsigned pins_asc0[] = {GPIO11, GPIO12};
static const unsigned pins_asc0_cts_rts[] = {GPIO9, GPIO10};
@@ -231,6 +203,8 @@ static const unsigned pins_nand_cle[] = {GPIO24};
static const unsigned pins_nand_rdy[] = {GPIO48};
static const unsigned pins_nand_rd[] = {GPIO49};
+static const unsigned xway_exin_pin_map[] = {GPIO0, GPIO1, GPIO2, GPIO39, GPIO46, GPIO9};
+
static const unsigned pins_exin0[] = {GPIO0};
static const unsigned pins_exin1[] = {GPIO1};
static const unsigned pins_exin2[] = {GPIO2};
@@ -240,7 +214,7 @@ static const unsigned pins_exin5[] = {GPIO9};
static const unsigned pins_spi[] = {GPIO16, GPIO17, GPIO18};
static const unsigned pins_spi_cs1[] = {GPIO15};
-static const unsigned pins_spi_cs2[] = {GPIO21};
+static const unsigned pins_spi_cs2[] = {GPIO22};
static const unsigned pins_spi_cs3[] = {GPIO13};
static const unsigned pins_spi_cs4[] = {GPIO10};
static const unsigned pins_spi_cs5[] = {GPIO9};
@@ -264,25 +238,6 @@ static const unsigned pins_pci_req2[] = {GPIO31};
static const unsigned pins_pci_req3[] = {GPIO3};
static const unsigned pins_pci_req4[] = {GPIO37};
-static const unsigned ase_pins_jtag[] = {GPIO7, GPIO8, GPIO9, GPIO10, GPIO11};
-static const unsigned ase_pins_asc[] = {GPIO5, GPIO6};
-static const unsigned ase_pins_stp[] = {GPIO1, GPIO2, GPIO3};
-static const unsigned ase_pins_ephy[] = {GPIO2, GPIO3, GPIO4};
-static const unsigned ase_pins_dfe[] = {GPIO1, GPIO2};
-
-static const unsigned ase_pins_spi[] = {GPIO8, GPIO9, GPIO10};
-static const unsigned ase_pins_spi_cs1[] = {GPIO7};
-static const unsigned ase_pins_spi_cs2[] = {GPIO15};
-static const unsigned ase_pins_spi_cs3[] = {GPIO14};
-
-static const unsigned ase_pins_exin0[] = {GPIO6};
-static const unsigned ase_pins_exin1[] = {GPIO29};
-static const unsigned ase_pins_exin2[] = {GPIO0};
-
-static const unsigned ase_pins_gpt1[] = {GPIO5};
-static const unsigned ase_pins_gpt2[] = {GPIO4};
-static const unsigned ase_pins_gpt3[] = {GPIO25};
-
static const struct ltq_pin_group xway_grps[] = {
GRP_MUX("exin0", EXIN, pins_exin0),
GRP_MUX("exin1", EXIN, pins_exin1),
@@ -338,24 +293,6 @@ static const struct ltq_pin_group xway_grps[] = {
GRP_MUX("gphy1 led2", GPHY, pins_gphy1_led2),
};
-static const struct ltq_pin_group ase_grps[] = {
- GRP_MUX("exin0", EXIN, ase_pins_exin0),
- GRP_MUX("exin1", EXIN, ase_pins_exin1),
- GRP_MUX("exin2", EXIN, ase_pins_exin2),
- GRP_MUX("jtag", JTAG, ase_pins_jtag),
- GRP_MUX("stp", STP, ase_pins_stp),
- GRP_MUX("asc", ASC, ase_pins_asc),
- GRP_MUX("gpt1", GPT, ase_pins_gpt1),
- GRP_MUX("gpt2", GPT, ase_pins_gpt2),
- GRP_MUX("gpt3", GPT, ase_pins_gpt3),
- GRP_MUX("ephy", EPHY, ase_pins_ephy),
- GRP_MUX("dfe", DFE, ase_pins_dfe),
- GRP_MUX("spi", SPI, ase_pins_spi),
- GRP_MUX("spi_cs1", SPI, ase_pins_spi_cs1),
- GRP_MUX("spi_cs2", SPI, ase_pins_spi_cs2),
- GRP_MUX("spi_cs3", SPI, ase_pins_spi_cs3),
-};
-
static const char * const xway_pci_grps[] = {"gnt1", "gnt2",
"gnt3", "req1",
"req2", "req3"};
@@ -395,30 +332,6 @@ static const char * const xrx_pci_grps[] = {"gnt1", "gnt2",
"req1", "req2",
"req3", "req4"};
-/* ase */
-static const char * const ase_exin_grps[] = {"exin0", "exin1", "exin2"};
-static const char * const ase_gpt_grps[] = {"gpt1", "gpt2", "gpt3"};
-static const char * const ase_dfe_grps[] = {"dfe"};
-static const char * const ase_ephy_grps[] = {"ephy"};
-static const char * const ase_asc_grps[] = {"asc"};
-static const char * const ase_jtag_grps[] = {"jtag"};
-static const char * const ase_stp_grps[] = {"stp"};
-static const char * const ase_spi_grps[] = {"spi", "spi_cs1",
- "spi_cs2", "spi_cs3"};
-
-static const struct ltq_pmx_func danube_funcs[] = {
- {"spi", ARRAY_AND_SIZE(xway_spi_grps)},
- {"asc", ARRAY_AND_SIZE(xway_asc_grps)},
- {"cgu", ARRAY_AND_SIZE(xway_cgu_grps)},
- {"jtag", ARRAY_AND_SIZE(xway_jtag_grps)},
- {"exin", ARRAY_AND_SIZE(xway_exin_grps)},
- {"stp", ARRAY_AND_SIZE(xway_stp_grps)},
- {"gpt", ARRAY_AND_SIZE(xway_gpt_grps)},
- {"nmi", ARRAY_AND_SIZE(xway_nmi_grps)},
- {"pci", ARRAY_AND_SIZE(xway_pci_grps)},
- {"ebu", ARRAY_AND_SIZE(xway_ebu_grps)},
-};
-
static const struct ltq_pmx_func xrx_funcs[] = {
{"spi", ARRAY_AND_SIZE(xway_spi_grps)},
{"asc", ARRAY_AND_SIZE(xway_asc_grps)},
@@ -434,17 +347,991 @@ static const struct ltq_pmx_func xrx_funcs[] = {
{"gphy", ARRAY_AND_SIZE(xrx_gphy_grps)},
};
+/* --------- ase related code --------- */
+#define ASE_MAX_PIN 32
+
+static const struct ltq_mfp_pin ase_mfp[] = {
+ /* pin f0 f1 f2 f3 */
+ MFP_XWAY(GPIO0, GPIO, EXIN, MII, TDM),
+ MFP_XWAY(GPIO1, GPIO, STP, DFE, EBU),
+ MFP_XWAY(GPIO2, GPIO, STP, DFE, EPHY),
+ MFP_XWAY(GPIO3, GPIO, STP, EPHY, EBU),
+ MFP_XWAY(GPIO4, GPIO, GPT, EPHY, MII),
+ MFP_XWAY(GPIO5, GPIO, MII, ASC, GPT),
+ MFP_XWAY(GPIO6, GPIO, MII, ASC, EXIN),
+ MFP_XWAY(GPIO7, GPIO, SPI, MII, JTAG),
+ MFP_XWAY(GPIO8, GPIO, SPI, MII, JTAG),
+ MFP_XWAY(GPIO9, GPIO, SPI, MII, JTAG),
+ MFP_XWAY(GPIO10, GPIO, SPI, MII, JTAG),
+ MFP_XWAY(GPIO11, GPIO, EBU, CGU, JTAG),
+ MFP_XWAY(GPIO12, GPIO, EBU, MII, SDIO),
+ MFP_XWAY(GPIO13, GPIO, EBU, MII, CGU),
+ MFP_XWAY(GPIO14, GPIO, EBU, SPI, CGU),
+ MFP_XWAY(GPIO15, GPIO, EBU, SPI, SDIO),
+ MFP_XWAY(GPIO16, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO17, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO18, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO19, GPIO, EBU, MII, SDIO),
+ MFP_XWAY(GPIO20, GPIO, EBU, MII, SDIO),
+ MFP_XWAY(GPIO21, GPIO, EBU, MII, EBU2),
+ MFP_XWAY(GPIO22, GPIO, EBU, MII, CGU),
+ MFP_XWAY(GPIO23, GPIO, EBU, MII, CGU),
+ MFP_XWAY(GPIO24, GPIO, EBU, EBU2, MDIO),
+ MFP_XWAY(GPIO25, GPIO, EBU, MII, GPT),
+ MFP_XWAY(GPIO26, GPIO, EBU, MII, SDIO),
+ MFP_XWAY(GPIO27, GPIO, EBU, NONE, MDIO),
+ MFP_XWAY(GPIO28, GPIO, MII, EBU, SDIO),
+ MFP_XWAY(GPIO29, GPIO, EBU, MII, EXIN),
+ MFP_XWAY(GPIO30, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO31, GPIO, NONE, NONE, NONE),
+};
+
+static const unsigned ase_exin_pin_map[] = {GPIO6, GPIO29, GPIO0};
+
+static const unsigned ase_pins_exin0[] = {GPIO6};
+static const unsigned ase_pins_exin1[] = {GPIO29};
+static const unsigned ase_pins_exin2[] = {GPIO0};
+
+static const unsigned ase_pins_jtag[] = {GPIO7, GPIO8, GPIO9, GPIO10, GPIO11};
+static const unsigned ase_pins_asc[] = {GPIO5, GPIO6};
+static const unsigned ase_pins_stp[] = {GPIO1, GPIO2, GPIO3};
+static const unsigned ase_pins_mdio[] = {GPIO24, GPIO27};
+static const unsigned ase_pins_ephy_led0[] = {GPIO2};
+static const unsigned ase_pins_ephy_led1[] = {GPIO3};
+static const unsigned ase_pins_ephy_led2[] = {GPIO4};
+static const unsigned ase_pins_dfe_led0[] = {GPIO1};
+static const unsigned ase_pins_dfe_led1[] = {GPIO2};
+
+static const unsigned ase_pins_spi[] = {GPIO8, GPIO9, GPIO10}; /* DEPRECATED */
+static const unsigned ase_pins_spi_di[] = {GPIO8};
+static const unsigned ase_pins_spi_do[] = {GPIO9};
+static const unsigned ase_pins_spi_clk[] = {GPIO10};
+static const unsigned ase_pins_spi_cs1[] = {GPIO7};
+static const unsigned ase_pins_spi_cs2[] = {GPIO15};
+static const unsigned ase_pins_spi_cs3[] = {GPIO14};
+
+static const unsigned ase_pins_gpt1[] = {GPIO5};
+static const unsigned ase_pins_gpt2[] = {GPIO4};
+static const unsigned ase_pins_gpt3[] = {GPIO25};
+
+static const unsigned ase_pins_clkout0[] = {GPIO23};
+static const unsigned ase_pins_clkout1[] = {GPIO22};
+static const unsigned ase_pins_clkout2[] = {GPIO14};
+
+static const struct ltq_pin_group ase_grps[] = {
+ GRP_MUX("exin0", EXIN, ase_pins_exin0),
+ GRP_MUX("exin1", EXIN, ase_pins_exin1),
+ GRP_MUX("exin2", EXIN, ase_pins_exin2),
+ GRP_MUX("jtag", JTAG, ase_pins_jtag),
+ GRP_MUX("spi", SPI, ase_pins_spi), /* DEPRECATED */
+ GRP_MUX("spi_di", SPI, ase_pins_spi_di),
+ GRP_MUX("spi_do", SPI, ase_pins_spi_do),
+ GRP_MUX("spi_clk", SPI, ase_pins_spi_clk),
+ GRP_MUX("spi_cs1", SPI, ase_pins_spi_cs1),
+ GRP_MUX("spi_cs2", SPI, ase_pins_spi_cs2),
+ GRP_MUX("spi_cs3", SPI, ase_pins_spi_cs3),
+ GRP_MUX("asc", ASC, ase_pins_asc),
+ GRP_MUX("stp", STP, ase_pins_stp),
+ GRP_MUX("gpt1", GPT, ase_pins_gpt1),
+ GRP_MUX("gpt2", GPT, ase_pins_gpt2),
+ GRP_MUX("gpt3", GPT, ase_pins_gpt3),
+ GRP_MUX("clkout0", CGU, ase_pins_clkout0),
+ GRP_MUX("clkout1", CGU, ase_pins_clkout1),
+ GRP_MUX("clkout2", CGU, ase_pins_clkout2),
+ GRP_MUX("mdio", MDIO, ase_pins_mdio),
+ GRP_MUX("dfe led0", DFE, ase_pins_dfe_led0),
+ GRP_MUX("dfe led1", DFE, ase_pins_dfe_led1),
+ GRP_MUX("ephy led0", EPHY, ase_pins_ephy_led0),
+ GRP_MUX("ephy led1", EPHY, ase_pins_ephy_led1),
+ GRP_MUX("ephy led2", EPHY, ase_pins_ephy_led2),
+};
+
+static const char * const ase_exin_grps[] = {"exin0", "exin1", "exin2"};
+static const char * const ase_gpt_grps[] = {"gpt1", "gpt2", "gpt3"};
+static const char * const ase_cgu_grps[] = {"clkout0", "clkout1",
+ "clkout2"};
+static const char * const ase_mdio_grps[] = {"mdio"};
+static const char * const ase_dfe_grps[] = {"dfe led0", "dfe led1"};
+static const char * const ase_ephy_grps[] = {"ephy led0", "ephy led1",
+ "ephy led2"};
+static const char * const ase_asc_grps[] = {"asc"};
+static const char * const ase_jtag_grps[] = {"jtag"};
+static const char * const ase_stp_grps[] = {"stp"};
+static const char * const ase_spi_grps[] = {"spi", /* DEPRECATED */
+ "spi_di", "spi_do",
+ "spi_clk", "spi_cs1",
+ "spi_cs2", "spi_cs3"};
+
static const struct ltq_pmx_func ase_funcs[] = {
{"spi", ARRAY_AND_SIZE(ase_spi_grps)},
{"asc", ARRAY_AND_SIZE(ase_asc_grps)},
+ {"cgu", ARRAY_AND_SIZE(ase_cgu_grps)},
{"jtag", ARRAY_AND_SIZE(ase_jtag_grps)},
{"exin", ARRAY_AND_SIZE(ase_exin_grps)},
{"stp", ARRAY_AND_SIZE(ase_stp_grps)},
{"gpt", ARRAY_AND_SIZE(ase_gpt_grps)},
+ {"mdio", ARRAY_AND_SIZE(ase_mdio_grps)},
{"ephy", ARRAY_AND_SIZE(ase_ephy_grps)},
{"dfe", ARRAY_AND_SIZE(ase_dfe_grps)},
};
+/* --------- danube related code --------- */
+#define DANUBE_MAX_PIN 32
+
+static const struct ltq_mfp_pin danube_mfp[] = {
+ /* pin f0 f1 f2 f3 */
+ MFP_XWAY(GPIO0, GPIO, EXIN, SDIO, TDM),
+ MFP_XWAY(GPIO1, GPIO, EXIN, CBUS, MII),
+ MFP_XWAY(GPIO2, GPIO, CGU, EXIN, MII),
+ MFP_XWAY(GPIO3, GPIO, CGU, SDIO, PCI),
+ MFP_XWAY(GPIO4, GPIO, STP, DFE, ASC),
+ MFP_XWAY(GPIO5, GPIO, STP, MII, DFE),
+ MFP_XWAY(GPIO6, GPIO, STP, GPT, ASC),
+ MFP_XWAY(GPIO7, GPIO, CGU, CBUS, MII),
+ MFP_XWAY(GPIO8, GPIO, CGU, NMI, MII),
+ MFP_XWAY(GPIO9, GPIO, ASC, SPI, MII),
+ MFP_XWAY(GPIO10, GPIO, ASC, SPI, MII),
+ MFP_XWAY(GPIO11, GPIO, ASC, CBUS, SPI),
+ MFP_XWAY(GPIO12, GPIO, ASC, CBUS, MCD),
+ MFP_XWAY(GPIO13, GPIO, EBU, SPI, MII),
+ MFP_XWAY(GPIO14, GPIO, CGU, CBUS, MII),
+ MFP_XWAY(GPIO15, GPIO, SPI, SDIO, JTAG),
+ MFP_XWAY(GPIO16, GPIO, SPI, SDIO, JTAG),
+ MFP_XWAY(GPIO17, GPIO, SPI, SDIO, JTAG),
+ MFP_XWAY(GPIO18, GPIO, SPI, SDIO, JTAG),
+ MFP_XWAY(GPIO19, GPIO, PCI, SDIO, MII),
+ MFP_XWAY(GPIO20, GPIO, JTAG, SDIO, MII),
+ MFP_XWAY(GPIO21, GPIO, PCI, EBU, GPT),
+ MFP_XWAY(GPIO22, GPIO, SPI, MCD, MII),
+ MFP_XWAY(GPIO23, GPIO, EBU, PCI, STP),
+ MFP_XWAY(GPIO24, GPIO, EBU, TDM, PCI),
+ MFP_XWAY(GPIO25, GPIO, TDM, SDIO, ASC),
+ MFP_XWAY(GPIO26, GPIO, EBU, TDM, SDIO),
+ MFP_XWAY(GPIO27, GPIO, TDM, SDIO, ASC),
+ MFP_XWAY(GPIO28, GPIO, GPT, MII, SDIO),
+ MFP_XWAY(GPIO29, GPIO, PCI, CBUS, MII),
+ MFP_XWAY(GPIO30, GPIO, PCI, CBUS, MII),
+ MFP_XWAY(GPIO31, GPIO, EBU, PCI, MII),
+};
+
+static const unsigned danube_exin_pin_map[] = {GPIO0, GPIO1, GPIO2};
+
+static const unsigned danube_pins_exin0[] = {GPIO0};
+static const unsigned danube_pins_exin1[] = {GPIO1};
+static const unsigned danube_pins_exin2[] = {GPIO2};
+
+static const unsigned danube_pins_jtag[] = {GPIO15, GPIO16, GPIO17, GPIO18, GPIO20};
+static const unsigned danube_pins_asc0[] = {GPIO11, GPIO12};
+static const unsigned danube_pins_asc0_cts_rts[] = {GPIO9, GPIO10};
+static const unsigned danube_pins_stp[] = {GPIO4, GPIO5, GPIO6};
+static const unsigned danube_pins_nmi[] = {GPIO8};
+
+static const unsigned danube_pins_dfe_led0[] = {GPIO4};
+static const unsigned danube_pins_dfe_led1[] = {GPIO5};
+
+static const unsigned danube_pins_ebu_a24[] = {GPIO13};
+static const unsigned danube_pins_ebu_clk[] = {GPIO21};
+static const unsigned danube_pins_ebu_cs1[] = {GPIO23};
+static const unsigned danube_pins_ebu_a23[] = {GPIO24};
+static const unsigned danube_pins_ebu_wait[] = {GPIO26};
+static const unsigned danube_pins_ebu_a25[] = {GPIO31};
+
+static const unsigned danube_pins_nand_ale[] = {GPIO13};
+static const unsigned danube_pins_nand_cs1[] = {GPIO23};
+static const unsigned danube_pins_nand_cle[] = {GPIO24};
+
+static const unsigned danube_pins_spi[] = {GPIO16, GPIO17, GPIO18}; /* DEPRECATED */
+static const unsigned danube_pins_spi_di[] = {GPIO16};
+static const unsigned danube_pins_spi_do[] = {GPIO17};
+static const unsigned danube_pins_spi_clk[] = {GPIO18};
+static const unsigned danube_pins_spi_cs1[] = {GPIO15};
+static const unsigned danube_pins_spi_cs2[] = {GPIO21};
+static const unsigned danube_pins_spi_cs3[] = {GPIO13};
+static const unsigned danube_pins_spi_cs4[] = {GPIO10};
+static const unsigned danube_pins_spi_cs5[] = {GPIO9};
+static const unsigned danube_pins_spi_cs6[] = {GPIO11};
+
+static const unsigned danube_pins_gpt1[] = {GPIO28};
+static const unsigned danube_pins_gpt2[] = {GPIO21};
+static const unsigned danube_pins_gpt3[] = {GPIO6};
+
+static const unsigned danube_pins_clkout0[] = {GPIO8};
+static const unsigned danube_pins_clkout1[] = {GPIO7};
+static const unsigned danube_pins_clkout2[] = {GPIO3};
+static const unsigned danube_pins_clkout3[] = {GPIO2};
+
+static const unsigned danube_pins_pci_gnt1[] = {GPIO30};
+static const unsigned danube_pins_pci_gnt2[] = {GPIO23};
+static const unsigned danube_pins_pci_gnt3[] = {GPIO19};
+static const unsigned danube_pins_pci_req1[] = {GPIO29};
+static const unsigned danube_pins_pci_req2[] = {GPIO31};
+static const unsigned danube_pins_pci_req3[] = {GPIO3};
+
+static const struct ltq_pin_group danube_grps[] = {
+ GRP_MUX("exin0", EXIN, danube_pins_exin0),
+ GRP_MUX("exin1", EXIN, danube_pins_exin1),
+ GRP_MUX("exin2", EXIN, danube_pins_exin2),
+ GRP_MUX("jtag", JTAG, danube_pins_jtag),
+ GRP_MUX("ebu a23", EBU, danube_pins_ebu_a23),
+ GRP_MUX("ebu a24", EBU, danube_pins_ebu_a24),
+ GRP_MUX("ebu a25", EBU, danube_pins_ebu_a25),
+ GRP_MUX("ebu clk", EBU, danube_pins_ebu_clk),
+ GRP_MUX("ebu cs1", EBU, danube_pins_ebu_cs1),
+ GRP_MUX("ebu wait", EBU, danube_pins_ebu_wait),
+ GRP_MUX("nand ale", EBU, danube_pins_nand_ale),
+ GRP_MUX("nand cs1", EBU, danube_pins_nand_cs1),
+ GRP_MUX("nand cle", EBU, danube_pins_nand_cle),
+ GRP_MUX("spi", SPI, danube_pins_spi), /* DEPRECATED */
+ GRP_MUX("spi_di", SPI, danube_pins_spi_di),
+ GRP_MUX("spi_do", SPI, danube_pins_spi_do),
+ GRP_MUX("spi_clk", SPI, danube_pins_spi_clk),
+ GRP_MUX("spi_cs1", SPI, danube_pins_spi_cs1),
+ GRP_MUX("spi_cs2", SPI, danube_pins_spi_cs2),
+ GRP_MUX("spi_cs3", SPI, danube_pins_spi_cs3),
+ GRP_MUX("spi_cs4", SPI, danube_pins_spi_cs4),
+ GRP_MUX("spi_cs5", SPI, danube_pins_spi_cs5),
+ GRP_MUX("spi_cs6", SPI, danube_pins_spi_cs6),
+ GRP_MUX("asc0", ASC, danube_pins_asc0),
+ GRP_MUX("asc0 cts rts", ASC, danube_pins_asc0_cts_rts),
+ GRP_MUX("stp", STP, danube_pins_stp),
+ GRP_MUX("nmi", NMI, danube_pins_nmi),
+ GRP_MUX("gpt1", GPT, danube_pins_gpt1),
+ GRP_MUX("gpt2", GPT, danube_pins_gpt2),
+ GRP_MUX("gpt3", GPT, danube_pins_gpt3),
+ GRP_MUX("clkout0", CGU, danube_pins_clkout0),
+ GRP_MUX("clkout1", CGU, danube_pins_clkout1),
+ GRP_MUX("clkout2", CGU, danube_pins_clkout2),
+ GRP_MUX("clkout3", CGU, danube_pins_clkout3),
+ GRP_MUX("gnt1", PCI, danube_pins_pci_gnt1),
+ GRP_MUX("gnt2", PCI, danube_pins_pci_gnt2),
+ GRP_MUX("gnt3", PCI, danube_pins_pci_gnt3),
+ GRP_MUX("req1", PCI, danube_pins_pci_req1),
+ GRP_MUX("req2", PCI, danube_pins_pci_req2),
+ GRP_MUX("req3", PCI, danube_pins_pci_req3),
+ GRP_MUX("dfe led0", DFE, danube_pins_dfe_led0),
+ GRP_MUX("dfe led1", DFE, danube_pins_dfe_led1),
+};
+
+static const char * const danube_pci_grps[] = {"gnt1", "gnt2",
+ "gnt3", "req1",
+ "req2", "req3"};
+static const char * const danube_spi_grps[] = {"spi", /* DEPRECATED */
+ "spi_di", "spi_do",
+ "spi_clk", "spi_cs1",
+ "spi_cs2", "spi_cs3",
+ "spi_cs4", "spi_cs5",
+ "spi_cs6"};
+static const char * const danube_cgu_grps[] = {"clkout0", "clkout1",
+ "clkout2", "clkout3"};
+static const char * const danube_ebu_grps[] = {"ebu a23", "ebu a24",
+ "ebu a25", "ebu cs1",
+ "ebu wait", "ebu clk",
+ "nand ale", "nand cs1",
+ "nand cle"};
+static const char * const danube_dfe_grps[] = {"dfe led0", "dfe led1"};
+static const char * const danube_exin_grps[] = {"exin0", "exin1", "exin2"};
+static const char * const danube_gpt_grps[] = {"gpt1", "gpt2", "gpt3"};
+static const char * const danube_asc_grps[] = {"asc0", "asc0 cts rts"};
+static const char * const danube_jtag_grps[] = {"jtag"};
+static const char * const danube_stp_grps[] = {"stp"};
+static const char * const danube_nmi_grps[] = {"nmi"};
+
+static const struct ltq_pmx_func danube_funcs[] = {
+ {"spi", ARRAY_AND_SIZE(danube_spi_grps)},
+ {"asc", ARRAY_AND_SIZE(danube_asc_grps)},
+ {"cgu", ARRAY_AND_SIZE(danube_cgu_grps)},
+ {"jtag", ARRAY_AND_SIZE(danube_jtag_grps)},
+ {"exin", ARRAY_AND_SIZE(danube_exin_grps)},
+ {"stp", ARRAY_AND_SIZE(danube_stp_grps)},
+ {"gpt", ARRAY_AND_SIZE(danube_gpt_grps)},
+ {"nmi", ARRAY_AND_SIZE(danube_nmi_grps)},
+ {"pci", ARRAY_AND_SIZE(danube_pci_grps)},
+ {"ebu", ARRAY_AND_SIZE(danube_ebu_grps)},
+ {"dfe", ARRAY_AND_SIZE(danube_dfe_grps)},
+};
+
+/* --------- xrx100 related code --------- */
+#define XRX100_MAX_PIN 56
+
+static const struct ltq_mfp_pin xrx100_mfp[] = {
+ /* pin f0 f1 f2 f3 */
+ MFP_XWAY(GPIO0, GPIO, EXIN, SDIO, TDM),
+ MFP_XWAY(GPIO1, GPIO, EXIN, CBUS, SIN),
+ MFP_XWAY(GPIO2, GPIO, CGU, EXIN, NONE),
+ MFP_XWAY(GPIO3, GPIO, CGU, SDIO, PCI),
+ MFP_XWAY(GPIO4, GPIO, STP, DFE, ASC),
+ MFP_XWAY(GPIO5, GPIO, STP, NONE, DFE),
+ MFP_XWAY(GPIO6, GPIO, STP, GPT, ASC),
+ MFP_XWAY(GPIO7, GPIO, CGU, CBUS, NONE),
+ MFP_XWAY(GPIO8, GPIO, CGU, NMI, NONE),
+ MFP_XWAY(GPIO9, GPIO, ASC, SPI, EXIN),
+ MFP_XWAY(GPIO10, GPIO, ASC, SPI, EXIN),
+ MFP_XWAY(GPIO11, GPIO, ASC, CBUS, SPI),
+ MFP_XWAY(GPIO12, GPIO, ASC, CBUS, MCD),
+ MFP_XWAY(GPIO13, GPIO, EBU, SPI, NONE),
+ MFP_XWAY(GPIO14, GPIO, CGU, NONE, NONE),
+ MFP_XWAY(GPIO15, GPIO, SPI, SDIO, MCD),
+ MFP_XWAY(GPIO16, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO17, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO18, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO19, GPIO, PCI, SDIO, CGU),
+ MFP_XWAY(GPIO20, GPIO, NONE, SDIO, EBU),
+ MFP_XWAY(GPIO21, GPIO, PCI, EBU, GPT),
+ MFP_XWAY(GPIO22, GPIO, SPI, NONE, EBU),
+ MFP_XWAY(GPIO23, GPIO, EBU, PCI, STP),
+ MFP_XWAY(GPIO24, GPIO, EBU, TDM, PCI),
+ MFP_XWAY(GPIO25, GPIO, TDM, SDIO, ASC),
+ MFP_XWAY(GPIO26, GPIO, EBU, TDM, SDIO),
+ MFP_XWAY(GPIO27, GPIO, TDM, SDIO, ASC),
+ MFP_XWAY(GPIO28, GPIO, GPT, NONE, SDIO),
+ MFP_XWAY(GPIO29, GPIO, PCI, CBUS, NONE),
+ MFP_XWAY(GPIO30, GPIO, PCI, CBUS, NONE),
+ MFP_XWAY(GPIO31, GPIO, EBU, PCI, NONE),
+ MFP_XWAY(GPIO32, GPIO, MII, NONE, EBU),
+ MFP_XWAY(GPIO33, GPIO, MII, NONE, EBU),
+ MFP_XWAY(GPIO34, GPIO, SIN, SSI, NONE),
+ MFP_XWAY(GPIO35, GPIO, SIN, SSI, NONE),
+ MFP_XWAY(GPIO36, GPIO, SIN, SSI, NONE),
+ MFP_XWAY(GPIO37, GPIO, PCI, NONE, NONE),
+ MFP_XWAY(GPIO38, GPIO, PCI, NONE, NONE),
+ MFP_XWAY(GPIO39, GPIO, NONE, EXIN, NONE),
+ MFP_XWAY(GPIO40, GPIO, MII, TDM, NONE),
+ MFP_XWAY(GPIO41, GPIO, MII, TDM, NONE),
+ MFP_XWAY(GPIO42, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO43, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO44, GPIO, MII, SIN, NONE),
+ MFP_XWAY(GPIO45, GPIO, MII, NONE, SIN),
+ MFP_XWAY(GPIO46, GPIO, MII, NONE, EXIN),
+ MFP_XWAY(GPIO47, GPIO, MII, NONE, SIN),
+ MFP_XWAY(GPIO48, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO49, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO50, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO51, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO52, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO53, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO54, GPIO, NONE, NONE, NONE),
+ MFP_XWAY(GPIO55, GPIO, NONE, NONE, NONE),
+};
+
+static const unsigned xrx100_exin_pin_map[] = {GPIO0, GPIO1, GPIO2, GPIO39, GPIO10, GPIO9};
+
+static const unsigned xrx100_pins_exin0[] = {GPIO0};
+static const unsigned xrx100_pins_exin1[] = {GPIO1};
+static const unsigned xrx100_pins_exin2[] = {GPIO2};
+static const unsigned xrx100_pins_exin3[] = {GPIO39};
+static const unsigned xrx100_pins_exin4[] = {GPIO10};
+static const unsigned xrx100_pins_exin5[] = {GPIO9};
+
+static const unsigned xrx100_pins_asc0[] = {GPIO11, GPIO12};
+static const unsigned xrx100_pins_asc0_cts_rts[] = {GPIO9, GPIO10};
+static const unsigned xrx100_pins_stp[] = {GPIO4, GPIO5, GPIO6};
+static const unsigned xrx100_pins_nmi[] = {GPIO8};
+static const unsigned xrx100_pins_mdio[] = {GPIO42, GPIO43};
+
+static const unsigned xrx100_pins_dfe_led0[] = {GPIO4};
+static const unsigned xrx100_pins_dfe_led1[] = {GPIO5};
+
+static const unsigned xrx100_pins_ebu_a24[] = {GPIO13};
+static const unsigned xrx100_pins_ebu_clk[] = {GPIO21};
+static const unsigned xrx100_pins_ebu_cs1[] = {GPIO23};
+static const unsigned xrx100_pins_ebu_a23[] = {GPIO24};
+static const unsigned xrx100_pins_ebu_wait[] = {GPIO26};
+static const unsigned xrx100_pins_ebu_a25[] = {GPIO31};
+
+static const unsigned xrx100_pins_nand_ale[] = {GPIO13};
+static const unsigned xrx100_pins_nand_cs1[] = {GPIO23};
+static const unsigned xrx100_pins_nand_cle[] = {GPIO24};
+static const unsigned xrx100_pins_nand_rdy[] = {GPIO48};
+static const unsigned xrx100_pins_nand_rd[] = {GPIO49};
+
+static const unsigned xrx100_pins_spi_di[] = {GPIO16};
+static const unsigned xrx100_pins_spi_do[] = {GPIO17};
+static const unsigned xrx100_pins_spi_clk[] = {GPIO18};
+static const unsigned xrx100_pins_spi_cs1[] = {GPIO15};
+static const unsigned xrx100_pins_spi_cs2[] = {GPIO22};
+static const unsigned xrx100_pins_spi_cs3[] = {GPIO13};
+static const unsigned xrx100_pins_spi_cs4[] = {GPIO10};
+static const unsigned xrx100_pins_spi_cs5[] = {GPIO9};
+static const unsigned xrx100_pins_spi_cs6[] = {GPIO11};
+
+static const unsigned xrx100_pins_gpt1[] = {GPIO28};
+static const unsigned xrx100_pins_gpt2[] = {GPIO21};
+static const unsigned xrx100_pins_gpt3[] = {GPIO6};
+
+static const unsigned xrx100_pins_clkout0[] = {GPIO8};
+static const unsigned xrx100_pins_clkout1[] = {GPIO7};
+static const unsigned xrx100_pins_clkout2[] = {GPIO3};
+static const unsigned xrx100_pins_clkout3[] = {GPIO2};
+
+static const unsigned xrx100_pins_pci_gnt1[] = {GPIO30};
+static const unsigned xrx100_pins_pci_gnt2[] = {GPIO23};
+static const unsigned xrx100_pins_pci_gnt3[] = {GPIO19};
+static const unsigned xrx100_pins_pci_gnt4[] = {GPIO38};
+static const unsigned xrx100_pins_pci_req1[] = {GPIO29};
+static const unsigned xrx100_pins_pci_req2[] = {GPIO31};
+static const unsigned xrx100_pins_pci_req3[] = {GPIO3};
+static const unsigned xrx100_pins_pci_req4[] = {GPIO37};
+
+static const struct ltq_pin_group xrx100_grps[] = {
+ GRP_MUX("exin0", EXIN, xrx100_pins_exin0),
+ GRP_MUX("exin1", EXIN, xrx100_pins_exin1),
+ GRP_MUX("exin2", EXIN, xrx100_pins_exin2),
+ GRP_MUX("exin3", EXIN, xrx100_pins_exin3),
+ GRP_MUX("exin4", EXIN, xrx100_pins_exin4),
+ GRP_MUX("exin5", EXIN, xrx100_pins_exin5),
+ GRP_MUX("ebu a23", EBU, xrx100_pins_ebu_a23),
+ GRP_MUX("ebu a24", EBU, xrx100_pins_ebu_a24),
+ GRP_MUX("ebu a25", EBU, xrx100_pins_ebu_a25),
+ GRP_MUX("ebu clk", EBU, xrx100_pins_ebu_clk),
+ GRP_MUX("ebu cs1", EBU, xrx100_pins_ebu_cs1),
+ GRP_MUX("ebu wait", EBU, xrx100_pins_ebu_wait),
+ GRP_MUX("nand ale", EBU, xrx100_pins_nand_ale),
+ GRP_MUX("nand cs1", EBU, xrx100_pins_nand_cs1),
+ GRP_MUX("nand cle", EBU, xrx100_pins_nand_cle),
+ GRP_MUX("nand rdy", EBU, xrx100_pins_nand_rdy),
+ GRP_MUX("nand rd", EBU, xrx100_pins_nand_rd),
+ GRP_MUX("spi_di", SPI, xrx100_pins_spi_di),
+ GRP_MUX("spi_do", SPI, xrx100_pins_spi_do),
+ GRP_MUX("spi_clk", SPI, xrx100_pins_spi_clk),
+ GRP_MUX("spi_cs1", SPI, xrx100_pins_spi_cs1),
+ GRP_MUX("spi_cs2", SPI, xrx100_pins_spi_cs2),
+ GRP_MUX("spi_cs3", SPI, xrx100_pins_spi_cs3),
+ GRP_MUX("spi_cs4", SPI, xrx100_pins_spi_cs4),
+ GRP_MUX("spi_cs5", SPI, xrx100_pins_spi_cs5),
+ GRP_MUX("spi_cs6", SPI, xrx100_pins_spi_cs6),
+ GRP_MUX("asc0", ASC, xrx100_pins_asc0),
+ GRP_MUX("asc0 cts rts", ASC, xrx100_pins_asc0_cts_rts),
+ GRP_MUX("stp", STP, xrx100_pins_stp),
+ GRP_MUX("nmi", NMI, xrx100_pins_nmi),
+ GRP_MUX("gpt1", GPT, xrx100_pins_gpt1),
+ GRP_MUX("gpt2", GPT, xrx100_pins_gpt2),
+ GRP_MUX("gpt3", GPT, xrx100_pins_gpt3),
+ GRP_MUX("clkout0", CGU, xrx100_pins_clkout0),
+ GRP_MUX("clkout1", CGU, xrx100_pins_clkout1),
+ GRP_MUX("clkout2", CGU, xrx100_pins_clkout2),
+ GRP_MUX("clkout3", CGU, xrx100_pins_clkout3),
+ GRP_MUX("gnt1", PCI, xrx100_pins_pci_gnt1),
+ GRP_MUX("gnt2", PCI, xrx100_pins_pci_gnt2),
+ GRP_MUX("gnt3", PCI, xrx100_pins_pci_gnt3),
+ GRP_MUX("gnt4", PCI, xrx100_pins_pci_gnt4),
+ GRP_MUX("req1", PCI, xrx100_pins_pci_req1),
+ GRP_MUX("req2", PCI, xrx100_pins_pci_req2),
+ GRP_MUX("req3", PCI, xrx100_pins_pci_req3),
+ GRP_MUX("req4", PCI, xrx100_pins_pci_req4),
+ GRP_MUX("mdio", MDIO, xrx100_pins_mdio),
+ GRP_MUX("dfe led0", DFE, xrx100_pins_dfe_led0),
+ GRP_MUX("dfe led1", DFE, xrx100_pins_dfe_led1),
+};
+
+static const char * const xrx100_pci_grps[] = {"gnt1", "gnt2",
+ "gnt3", "gnt4",
+ "req1", "req2",
+ "req3", "req4"};
+static const char * const xrx100_spi_grps[] = {"spi_di", "spi_do",
+ "spi_clk", "spi_cs1",
+ "spi_cs2", "spi_cs3",
+ "spi_cs4", "spi_cs5",
+ "spi_cs6"};
+static const char * const xrx100_cgu_grps[] = {"clkout0", "clkout1",
+ "clkout2", "clkout3"};
+static const char * const xrx100_ebu_grps[] = {"ebu a23", "ebu a24",
+ "ebu a25", "ebu cs1",
+ "ebu wait", "ebu clk",
+ "nand ale", "nand cs1",
+ "nand cle", "nand rdy",
+ "nand rd"};
+static const char * const xrx100_exin_grps[] = {"exin0", "exin1", "exin2",
+ "exin3", "exin4", "exin5"};
+static const char * const xrx100_gpt_grps[] = {"gpt1", "gpt2", "gpt3"};
+static const char * const xrx100_asc_grps[] = {"asc0", "asc0 cts rts"};
+static const char * const xrx100_stp_grps[] = {"stp"};
+static const char * const xrx100_nmi_grps[] = {"nmi"};
+static const char * const xrx100_mdio_grps[] = {"mdio"};
+static const char * const xrx100_dfe_grps[] = {"dfe led0", "dfe led1"};
+
+static const struct ltq_pmx_func xrx100_funcs[] = {
+ {"spi", ARRAY_AND_SIZE(xrx100_spi_grps)},
+ {"asc", ARRAY_AND_SIZE(xrx100_asc_grps)},
+ {"cgu", ARRAY_AND_SIZE(xrx100_cgu_grps)},
+ {"exin", ARRAY_AND_SIZE(xrx100_exin_grps)},
+ {"stp", ARRAY_AND_SIZE(xrx100_stp_grps)},
+ {"gpt", ARRAY_AND_SIZE(xrx100_gpt_grps)},
+ {"nmi", ARRAY_AND_SIZE(xrx100_nmi_grps)},
+ {"pci", ARRAY_AND_SIZE(xrx100_pci_grps)},
+ {"ebu", ARRAY_AND_SIZE(xrx100_ebu_grps)},
+ {"mdio", ARRAY_AND_SIZE(xrx100_mdio_grps)},
+ {"dfe", ARRAY_AND_SIZE(xrx100_dfe_grps)},
+};
+
+/* --------- xrx200 related code --------- */
+#define XRX200_MAX_PIN 50
+
+static const struct ltq_mfp_pin xrx200_mfp[] = {
+ /* pin f0 f1 f2 f3 */
+ MFP_XWAY(GPIO0, GPIO, EXIN, SDIO, TDM),
+ MFP_XWAY(GPIO1, GPIO, EXIN, CBUS, SIN),
+ MFP_XWAY(GPIO2, GPIO, CGU, EXIN, GPHY),
+ MFP_XWAY(GPIO3, GPIO, CGU, SDIO, PCI),
+ MFP_XWAY(GPIO4, GPIO, STP, DFE, USIF),
+ MFP_XWAY(GPIO5, GPIO, STP, GPHY, DFE),
+ MFP_XWAY(GPIO6, GPIO, STP, GPT, USIF),
+ MFP_XWAY(GPIO7, GPIO, CGU, CBUS, GPHY),
+ MFP_XWAY(GPIO8, GPIO, CGU, NMI, NONE),
+ MFP_XWAY(GPIO9, GPIO, USIF, SPI, EXIN),
+ MFP_XWAY(GPIO10, GPIO, USIF, SPI, EXIN),
+ MFP_XWAY(GPIO11, GPIO, USIF, CBUS, SPI),
+ MFP_XWAY(GPIO12, GPIO, USIF, CBUS, MCD),
+ MFP_XWAY(GPIO13, GPIO, EBU, SPI, NONE),
+ MFP_XWAY(GPIO14, GPIO, CGU, CBUS, USIF),
+ MFP_XWAY(GPIO15, GPIO, SPI, SDIO, MCD),
+ MFP_XWAY(GPIO16, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO17, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO18, GPIO, SPI, SDIO, NONE),
+ MFP_XWAY(GPIO19, GPIO, PCI, SDIO, CGU),
+ MFP_XWAY(GPIO20, GPIO, NONE, SDIO, EBU),
+ MFP_XWAY(GPIO21, GPIO, PCI, EBU, GPT),
+ MFP_XWAY(GPIO22, GPIO, SPI, CGU, EBU),
+ MFP_XWAY(GPIO23, GPIO, EBU, PCI, STP),
+ MFP_XWAY(GPIO24, GPIO, EBU, TDM, PCI),
+ MFP_XWAY(GPIO25, GPIO, TDM, SDIO, USIF),
+ MFP_XWAY(GPIO26, GPIO, EBU, TDM, SDIO),
+ MFP_XWAY(GPIO27, GPIO, TDM, SDIO, USIF),
+ MFP_XWAY(GPIO28, GPIO, GPT, PCI, SDIO),
+ MFP_XWAY(GPIO29, GPIO, PCI, CBUS, EXIN),
+ MFP_XWAY(GPIO30, GPIO, PCI, CBUS, NONE),
+ MFP_XWAY(GPIO31, GPIO, EBU, PCI, NONE),
+ MFP_XWAY(GPIO32, GPIO, MII, NONE, EBU),
+ MFP_XWAY(GPIO33, GPIO, MII, NONE, EBU),
+ MFP_XWAY(GPIO34, GPIO, SIN, SSI, NONE),
+ MFP_XWAY(GPIO35, GPIO, SIN, SSI, NONE),
+ MFP_XWAY(GPIO36, GPIO, SIN, SSI, EXIN),
+ MFP_XWAY(GPIO37, GPIO, USIF, NONE, PCI),
+ MFP_XWAY(GPIO38, GPIO, PCI, USIF, NONE),
+ MFP_XWAY(GPIO39, GPIO, USIF, EXIN, NONE),
+ MFP_XWAY(GPIO40, GPIO, MII, TDM, NONE),
+ MFP_XWAY(GPIO41, GPIO, MII, TDM, NONE),
+ MFP_XWAY(GPIO42, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO43, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO44, GPIO, MII, SIN, GPHY),
+ MFP_XWAY(GPIO45, GPIO, MII, GPHY, SIN),
+ MFP_XWAY(GPIO46, GPIO, MII, NONE, EXIN),
+ MFP_XWAY(GPIO47, GPIO, MII, GPHY, SIN),
+ MFP_XWAY(GPIO48, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO49, GPIO, EBU, NONE, NONE),
+};
+
+static const unsigned xrx200_exin_pin_map[] = {GPIO0, GPIO1, GPIO2, GPIO39, GPIO10, GPIO9};
+
+static const unsigned xrx200_pins_exin0[] = {GPIO0};
+static const unsigned xrx200_pins_exin1[] = {GPIO1};
+static const unsigned xrx200_pins_exin2[] = {GPIO2};
+static const unsigned xrx200_pins_exin3[] = {GPIO39};
+static const unsigned xrx200_pins_exin4[] = {GPIO10};
+static const unsigned xrx200_pins_exin5[] = {GPIO9};
+
+static const unsigned xrx200_pins_usif_uart_rx[] = {GPIO11};
+static const unsigned xrx200_pins_usif_uart_tx[] = {GPIO12};
+static const unsigned xrx200_pins_usif_uart_rts[] = {GPIO9};
+static const unsigned xrx200_pins_usif_uart_cts[] = {GPIO10};
+static const unsigned xrx200_pins_usif_uart_dtr[] = {GPIO4};
+static const unsigned xrx200_pins_usif_uart_dsr[] = {GPIO6};
+static const unsigned xrx200_pins_usif_uart_dcd[] = {GPIO25};
+static const unsigned xrx200_pins_usif_uart_ri[] = {GPIO27};
+
+static const unsigned xrx200_pins_usif_spi_di[] = {GPIO11};
+static const unsigned xrx200_pins_usif_spi_do[] = {GPIO12};
+static const unsigned xrx200_pins_usif_spi_clk[] = {GPIO38};
+static const unsigned xrx200_pins_usif_spi_cs0[] = {GPIO37};
+static const unsigned xrx200_pins_usif_spi_cs1[] = {GPIO39};
+static const unsigned xrx200_pins_usif_spi_cs2[] = {GPIO14};
+
+static const unsigned xrx200_pins_stp[] = {GPIO4, GPIO5, GPIO6};
+static const unsigned xrx200_pins_nmi[] = {GPIO8};
+static const unsigned xrx200_pins_mdio[] = {GPIO42, GPIO43};
+
+static const unsigned xrx200_pins_dfe_led0[] = {GPIO4};
+static const unsigned xrx200_pins_dfe_led1[] = {GPIO5};
+
+static const unsigned xrx200_pins_gphy0_led0[] = {GPIO5};
+static const unsigned xrx200_pins_gphy0_led1[] = {GPIO7};
+static const unsigned xrx200_pins_gphy0_led2[] = {GPIO2};
+static const unsigned xrx200_pins_gphy1_led0[] = {GPIO44};
+static const unsigned xrx200_pins_gphy1_led1[] = {GPIO45};
+static const unsigned xrx200_pins_gphy1_led2[] = {GPIO47};
+
+static const unsigned xrx200_pins_ebu_a24[] = {GPIO13};
+static const unsigned xrx200_pins_ebu_clk[] = {GPIO21};
+static const unsigned xrx200_pins_ebu_cs1[] = {GPIO23};
+static const unsigned xrx200_pins_ebu_a23[] = {GPIO24};
+static const unsigned xrx200_pins_ebu_wait[] = {GPIO26};
+static const unsigned xrx200_pins_ebu_a25[] = {GPIO31};
+
+static const unsigned xrx200_pins_nand_ale[] = {GPIO13};
+static const unsigned xrx200_pins_nand_cs1[] = {GPIO23};
+static const unsigned xrx200_pins_nand_cle[] = {GPIO24};
+static const unsigned xrx200_pins_nand_rdy[] = {GPIO48};
+static const unsigned xrx200_pins_nand_rd[] = {GPIO49};
+
+static const unsigned xrx200_pins_spi_di[] = {GPIO16};
+static const unsigned xrx200_pins_spi_do[] = {GPIO17};
+static const unsigned xrx200_pins_spi_clk[] = {GPIO18};
+static const unsigned xrx200_pins_spi_cs1[] = {GPIO15};
+static const unsigned xrx200_pins_spi_cs2[] = {GPIO22};
+static const unsigned xrx200_pins_spi_cs3[] = {GPIO13};
+static const unsigned xrx200_pins_spi_cs4[] = {GPIO10};
+static const unsigned xrx200_pins_spi_cs5[] = {GPIO9};
+static const unsigned xrx200_pins_spi_cs6[] = {GPIO11};
+
+static const unsigned xrx200_pins_gpt1[] = {GPIO28};
+static const unsigned xrx200_pins_gpt2[] = {GPIO21};
+static const unsigned xrx200_pins_gpt3[] = {GPIO6};
+
+static const unsigned xrx200_pins_clkout0[] = {GPIO8};
+static const unsigned xrx200_pins_clkout1[] = {GPIO7};
+static const unsigned xrx200_pins_clkout2[] = {GPIO3};
+static const unsigned xrx200_pins_clkout3[] = {GPIO2};
+
+static const unsigned xrx200_pins_pci_gnt1[] = {GPIO28};
+static const unsigned xrx200_pins_pci_gnt2[] = {GPIO23};
+static const unsigned xrx200_pins_pci_gnt3[] = {GPIO19};
+static const unsigned xrx200_pins_pci_gnt4[] = {GPIO38};
+static const unsigned xrx200_pins_pci_req1[] = {GPIO29};
+static const unsigned xrx200_pins_pci_req2[] = {GPIO31};
+static const unsigned xrx200_pins_pci_req3[] = {GPIO3};
+static const unsigned xrx200_pins_pci_req4[] = {GPIO37};
+
+static const struct ltq_pin_group xrx200_grps[] = {
+ GRP_MUX("exin0", EXIN, xrx200_pins_exin0),
+ GRP_MUX("exin1", EXIN, xrx200_pins_exin1),
+ GRP_MUX("exin2", EXIN, xrx200_pins_exin2),
+ GRP_MUX("exin3", EXIN, xrx200_pins_exin3),
+ GRP_MUX("exin4", EXIN, xrx200_pins_exin4),
+ GRP_MUX("exin5", EXIN, xrx200_pins_exin5),
+ GRP_MUX("ebu a23", EBU, xrx200_pins_ebu_a23),
+ GRP_MUX("ebu a24", EBU, xrx200_pins_ebu_a24),
+ GRP_MUX("ebu a25", EBU, xrx200_pins_ebu_a25),
+ GRP_MUX("ebu clk", EBU, xrx200_pins_ebu_clk),
+ GRP_MUX("ebu cs1", EBU, xrx200_pins_ebu_cs1),
+ GRP_MUX("ebu wait", EBU, xrx200_pins_ebu_wait),
+ GRP_MUX("nand ale", EBU, xrx200_pins_nand_ale),
+ GRP_MUX("nand cs1", EBU, xrx200_pins_nand_cs1),
+ GRP_MUX("nand cle", EBU, xrx200_pins_nand_cle),
+ GRP_MUX("nand rdy", EBU, xrx200_pins_nand_rdy),
+ GRP_MUX("nand rd", EBU, xrx200_pins_nand_rd),
+ GRP_MUX("spi_di", SPI, xrx200_pins_spi_di),
+ GRP_MUX("spi_do", SPI, xrx200_pins_spi_do),
+ GRP_MUX("spi_clk", SPI, xrx200_pins_spi_clk),
+ GRP_MUX("spi_cs1", SPI, xrx200_pins_spi_cs1),
+ GRP_MUX("spi_cs2", SPI, xrx200_pins_spi_cs2),
+ GRP_MUX("spi_cs3", SPI, xrx200_pins_spi_cs3),
+ GRP_MUX("spi_cs4", SPI, xrx200_pins_spi_cs4),
+ GRP_MUX("spi_cs5", SPI, xrx200_pins_spi_cs5),
+ GRP_MUX("spi_cs6", SPI, xrx200_pins_spi_cs6),
+ GRP_MUX("usif uart_rx", USIF, xrx200_pins_usif_uart_rx),
+ GRP_MUX("usif uart_rx", USIF, xrx200_pins_usif_uart_tx),
+ GRP_MUX("usif uart_rts", USIF, xrx200_pins_usif_uart_rts),
+ GRP_MUX("usif uart_cts", USIF, xrx200_pins_usif_uart_cts),
+ GRP_MUX("usif uart_dtr", USIF, xrx200_pins_usif_uart_dtr),
+ GRP_MUX("usif uart_dsr", USIF, xrx200_pins_usif_uart_dsr),
+ GRP_MUX("usif uart_dcd", USIF, xrx200_pins_usif_uart_dcd),
+ GRP_MUX("usif uart_ri", USIF, xrx200_pins_usif_uart_ri),
+ GRP_MUX("usif spi_di", USIF, xrx200_pins_usif_spi_di),
+ GRP_MUX("usif spi_do", USIF, xrx200_pins_usif_spi_do),
+ GRP_MUX("usif spi_clk", USIF, xrx200_pins_usif_spi_clk),
+ GRP_MUX("usif spi_cs0", USIF, xrx200_pins_usif_spi_cs0),
+ GRP_MUX("usif spi_cs1", USIF, xrx200_pins_usif_spi_cs1),
+ GRP_MUX("usif spi_cs2", USIF, xrx200_pins_usif_spi_cs2),
+ GRP_MUX("stp", STP, xrx200_pins_stp),
+ GRP_MUX("nmi", NMI, xrx200_pins_nmi),
+ GRP_MUX("gpt1", GPT, xrx200_pins_gpt1),
+ GRP_MUX("gpt2", GPT, xrx200_pins_gpt2),
+ GRP_MUX("gpt3", GPT, xrx200_pins_gpt3),
+ GRP_MUX("clkout0", CGU, xrx200_pins_clkout0),
+ GRP_MUX("clkout1", CGU, xrx200_pins_clkout1),
+ GRP_MUX("clkout2", CGU, xrx200_pins_clkout2),
+ GRP_MUX("clkout3", CGU, xrx200_pins_clkout3),
+ GRP_MUX("gnt1", PCI, xrx200_pins_pci_gnt1),
+ GRP_MUX("gnt2", PCI, xrx200_pins_pci_gnt2),
+ GRP_MUX("gnt3", PCI, xrx200_pins_pci_gnt3),
+ GRP_MUX("gnt4", PCI, xrx200_pins_pci_gnt4),
+ GRP_MUX("req1", PCI, xrx200_pins_pci_req1),
+ GRP_MUX("req2", PCI, xrx200_pins_pci_req2),
+ GRP_MUX("req3", PCI, xrx200_pins_pci_req3),
+ GRP_MUX("req4", PCI, xrx200_pins_pci_req4),
+ GRP_MUX("mdio", MDIO, xrx200_pins_mdio),
+ GRP_MUX("dfe led0", DFE, xrx200_pins_dfe_led0),
+ GRP_MUX("dfe led1", DFE, xrx200_pins_dfe_led1),
+ GRP_MUX("gphy0 led0", GPHY, xrx200_pins_gphy0_led0),
+ GRP_MUX("gphy0 led1", GPHY, xrx200_pins_gphy0_led1),
+ GRP_MUX("gphy0 led2", GPHY, xrx200_pins_gphy0_led2),
+ GRP_MUX("gphy1 led0", GPHY, xrx200_pins_gphy1_led0),
+ GRP_MUX("gphy1 led1", GPHY, xrx200_pins_gphy1_led1),
+ GRP_MUX("gphy1 led2", GPHY, xrx200_pins_gphy1_led2),
+};
+
+static const char * const xrx200_pci_grps[] = {"gnt1", "gnt2",
+ "gnt3", "gnt4",
+ "req1", "req2",
+ "req3", "req4"};
+static const char * const xrx200_spi_grps[] = {"spi_di", "spi_do",
+ "spi_clk", "spi_cs1",
+ "spi_cs2", "spi_cs3",
+ "spi_cs4", "spi_cs5",
+ "spi_cs6"};
+static const char * const xrx200_cgu_grps[] = {"clkout0", "clkout1",
+ "clkout2", "clkout3"};
+static const char * const xrx200_ebu_grps[] = {"ebu a23", "ebu a24",
+ "ebu a25", "ebu cs1",
+ "ebu wait", "ebu clk",
+ "nand ale", "nand cs1",
+ "nand cle", "nand rdy",
+ "nand rd"};
+static const char * const xrx200_exin_grps[] = {"exin0", "exin1", "exin2",
+ "exin3", "exin4", "exin5"};
+static const char * const xrx200_gpt_grps[] = {"gpt1", "gpt2", "gpt3"};
+static const char * const xrx200_usif_grps[] = {"usif uart_rx", "usif uart_tx",
+ "usif uart_rts", "usif uart_cts",
+ "usif uart_dtr", "usif uart_dsr",
+ "usif uart_dcd", "usif uart_ri",
+ "usif spi_di", "usif spi_do",
+ "usif spi_clk", "usif spi_cs0",
+ "usif spi_cs1", "usif spi_cs2"};
+static const char * const xrx200_stp_grps[] = {"stp"};
+static const char * const xrx200_nmi_grps[] = {"nmi"};
+static const char * const xrx200_mdio_grps[] = {"mdio"};
+static const char * const xrx200_dfe_grps[] = {"dfe led0", "dfe led1"};
+static const char * const xrx200_gphy_grps[] = {"gphy0 led0", "gphy0 led1",
+ "gphy0 led2", "gphy1 led0",
+ "gphy1 led1", "gphy1 led2"};
+
+static const struct ltq_pmx_func xrx200_funcs[] = {
+ {"spi", ARRAY_AND_SIZE(xrx200_spi_grps)},
+ {"usif", ARRAY_AND_SIZE(xrx200_usif_grps)},
+ {"cgu", ARRAY_AND_SIZE(xrx200_cgu_grps)},
+ {"exin", ARRAY_AND_SIZE(xrx200_exin_grps)},
+ {"stp", ARRAY_AND_SIZE(xrx200_stp_grps)},
+ {"gpt", ARRAY_AND_SIZE(xrx200_gpt_grps)},
+ {"nmi", ARRAY_AND_SIZE(xrx200_nmi_grps)},
+ {"pci", ARRAY_AND_SIZE(xrx200_pci_grps)},
+ {"ebu", ARRAY_AND_SIZE(xrx200_ebu_grps)},
+ {"mdio", ARRAY_AND_SIZE(xrx200_mdio_grps)},
+ {"dfe", ARRAY_AND_SIZE(xrx200_dfe_grps)},
+ {"gphy", ARRAY_AND_SIZE(xrx200_gphy_grps)},
+};
+
+/* --------- xrx300 related code --------- */
+#define XRX300_MAX_PIN 64
+
+static const struct ltq_mfp_pin xrx300_mfp[] = {
+ /* pin f0 f1 f2 f3 */
+ MFP_XWAY(GPIO0, GPIO, EXIN, EPHY, NONE),
+ MFP_XWAY(GPIO1, GPIO, NONE, EXIN, NONE),
+ MFP_XWAY(GPIO2, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO3, GPIO, CGU, NONE, NONE),
+ MFP_XWAY(GPIO4, GPIO, STP, DFE, NONE),
+ MFP_XWAY(GPIO5, GPIO, STP, EPHY, DFE),
+ MFP_XWAY(GPIO6, GPIO, STP, NONE, NONE),
+ MFP_XWAY(GPIO7, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO8, GPIO, CGU, GPHY, EPHY),
+ MFP_XWAY(GPIO9, GPIO, WIFI, NONE, EXIN),
+ MFP_XWAY(GPIO10, GPIO, USIF, SPI, EXIN),
+ MFP_XWAY(GPIO11, GPIO, USIF, WIFI, SPI),
+ MFP_XWAY(GPIO12, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO13, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO14, GPIO, CGU, USIF, EPHY),
+ MFP_XWAY(GPIO15, GPIO, SPI, NONE, MCD),
+ MFP_XWAY(GPIO16, GPIO, SPI, EXIN, NONE),
+ MFP_XWAY(GPIO17, GPIO, SPI, NONE, NONE),
+ MFP_XWAY(GPIO18, GPIO, SPI, NONE, NONE),
+ MFP_XWAY(GPIO19, GPIO, USIF, NONE, EPHY),
+ MFP_XWAY(GPIO20, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO21, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO22, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO23, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO24, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO25, GPIO, TDM, NONE, NONE),
+ MFP_XWAY(GPIO26, GPIO, TDM, NONE, NONE),
+ MFP_XWAY(GPIO27, GPIO, TDM, NONE, NONE),
+ MFP_XWAY(GPIO28, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO29, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO30, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO31, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO32, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO33, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO34, GPIO, NONE, SSI, NONE),
+ MFP_XWAY(GPIO35, GPIO, NONE, SSI, NONE),
+ MFP_XWAY(GPIO36, GPIO, NONE, SSI, NONE),
+ MFP_XWAY(GPIO37, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO38, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO39, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO40, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO41, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO42, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO43, GPIO, MDIO, NONE, NONE),
+ MFP_XWAY(GPIO44, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO45, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO46, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO47, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO48, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO49, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO50, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO51, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO52, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO53, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO54, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO55, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO56, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO57, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO58, GPIO, EBU, TDM, NONE),
+ MFP_XWAY(GPIO59, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO60, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO61, GPIO, EBU, NONE, NONE),
+ MFP_XWAY(GPIO62, NONE, NONE, NONE, NONE),
+ MFP_XWAY(GPIO63, NONE, NONE, NONE, NONE),
+};
+
+static const unsigned xrx300_exin_pin_map[] = {GPIO0, GPIO1, GPIO16, GPIO10, GPIO9};
+
+static const unsigned xrx300_pins_exin0[] = {GPIO0};
+static const unsigned xrx300_pins_exin1[] = {GPIO1};
+static const unsigned xrx300_pins_exin2[] = {GPIO16};
+/* EXIN3 is not available on xrX300 */
+static const unsigned xrx300_pins_exin4[] = {GPIO10};
+static const unsigned xrx300_pins_exin5[] = {GPIO9};
+
+static const unsigned xrx300_pins_usif_uart_rx[] = {GPIO11};
+static const unsigned xrx300_pins_usif_uart_tx[] = {GPIO10};
+
+static const unsigned xrx300_pins_usif_spi_di[] = {GPIO11};
+static const unsigned xrx300_pins_usif_spi_do[] = {GPIO10};
+static const unsigned xrx300_pins_usif_spi_clk[] = {GPIO19};
+static const unsigned xrx300_pins_usif_spi_cs0[] = {GPIO14};
+
+static const unsigned xrx300_pins_stp[] = {GPIO4, GPIO5, GPIO6};
+static const unsigned xrx300_pins_mdio[] = {GPIO42, GPIO43};
+
+static const unsigned xrx300_pins_dfe_led0[] = {GPIO4};
+static const unsigned xrx300_pins_dfe_led1[] = {GPIO5};
+
+static const unsigned xrx300_pins_ephy0_led0[] = {GPIO5};
+static const unsigned xrx300_pins_ephy0_led1[] = {GPIO8};
+static const unsigned xrx300_pins_ephy1_led0[] = {GPIO14};
+static const unsigned xrx300_pins_ephy1_led1[] = {GPIO19};
+
+static const unsigned xrx300_pins_nand_ale[] = {GPIO13};
+static const unsigned xrx300_pins_nand_cs1[] = {GPIO23};
+static const unsigned xrx300_pins_nand_cle[] = {GPIO24};
+static const unsigned xrx300_pins_nand_rdy[] = {GPIO48};
+static const unsigned xrx300_pins_nand_rd[] = {GPIO49};
+static const unsigned xrx300_pins_nand_d1[] = {GPIO50};
+static const unsigned xrx300_pins_nand_d0[] = {GPIO51};
+static const unsigned xrx300_pins_nand_d2[] = {GPIO52};
+static const unsigned xrx300_pins_nand_d7[] = {GPIO53};
+static const unsigned xrx300_pins_nand_d6[] = {GPIO54};
+static const unsigned xrx300_pins_nand_d5[] = {GPIO55};
+static const unsigned xrx300_pins_nand_d4[] = {GPIO56};
+static const unsigned xrx300_pins_nand_d3[] = {GPIO57};
+static const unsigned xrx300_pins_nand_cs0[] = {GPIO58};
+static const unsigned xrx300_pins_nand_wr[] = {GPIO59};
+static const unsigned xrx300_pins_nand_wp[] = {GPIO60};
+static const unsigned xrx300_pins_nand_se[] = {GPIO61};
+
+static const unsigned xrx300_pins_spi_di[] = {GPIO16};
+static const unsigned xrx300_pins_spi_do[] = {GPIO17};
+static const unsigned xrx300_pins_spi_clk[] = {GPIO18};
+static const unsigned xrx300_pins_spi_cs1[] = {GPIO15};
+/* SPI_CS2 is not available on xrX300 */
+/* SPI_CS3 is not available on xrX300 */
+static const unsigned xrx300_pins_spi_cs4[] = {GPIO10};
+/* SPI_CS5 is not available on xrX300 */
+static const unsigned xrx300_pins_spi_cs6[] = {GPIO11};
+
+/* CLKOUT0 is not available on xrX300 */
+/* CLKOUT1 is not available on xrX300 */
+static const unsigned xrx300_pins_clkout2[] = {GPIO3};
+
+static const struct ltq_pin_group xrx300_grps[] = {
+ GRP_MUX("exin0", EXIN, xrx300_pins_exin0),
+ GRP_MUX("exin1", EXIN, xrx300_pins_exin1),
+ GRP_MUX("exin2", EXIN, xrx300_pins_exin2),
+ GRP_MUX("exin4", EXIN, xrx300_pins_exin4),
+ GRP_MUX("exin5", EXIN, xrx300_pins_exin5),
+ GRP_MUX("nand ale", EBU, xrx300_pins_nand_ale),
+ GRP_MUX("nand cs1", EBU, xrx300_pins_nand_cs1),
+ GRP_MUX("nand cle", EBU, xrx300_pins_nand_cle),
+ GRP_MUX("nand rdy", EBU, xrx300_pins_nand_rdy),
+ GRP_MUX("nand rd", EBU, xrx300_pins_nand_rd),
+ GRP_MUX("nand d1", EBU, xrx300_pins_nand_d1),
+ GRP_MUX("nand d0", EBU, xrx300_pins_nand_d0),
+ GRP_MUX("nand d2", EBU, xrx300_pins_nand_d2),
+ GRP_MUX("nand d7", EBU, xrx300_pins_nand_d7),
+ GRP_MUX("nand d6", EBU, xrx300_pins_nand_d6),
+ GRP_MUX("nand d5", EBU, xrx300_pins_nand_d5),
+ GRP_MUX("nand d4", EBU, xrx300_pins_nand_d4),
+ GRP_MUX("nand d3", EBU, xrx300_pins_nand_d3),
+ GRP_MUX("nand cs0", EBU, xrx300_pins_nand_cs0),
+ GRP_MUX("nand wr", EBU, xrx300_pins_nand_wr),
+ GRP_MUX("nand wp", EBU, xrx300_pins_nand_wp),
+ GRP_MUX("nand se", EBU, xrx300_pins_nand_se),
+ GRP_MUX("spi_di", SPI, xrx300_pins_spi_di),
+ GRP_MUX("spi_do", SPI, xrx300_pins_spi_do),
+ GRP_MUX("spi_clk", SPI, xrx300_pins_spi_clk),
+ GRP_MUX("spi_cs1", SPI, xrx300_pins_spi_cs1),
+ GRP_MUX("spi_cs4", SPI, xrx300_pins_spi_cs4),
+ GRP_MUX("spi_cs6", SPI, xrx300_pins_spi_cs6),
+ GRP_MUX("usif uart_rx", USIF, xrx300_pins_usif_uart_rx),
+ GRP_MUX("usif uart_tx", USIF, xrx300_pins_usif_uart_tx),
+ GRP_MUX("usif spi_di", USIF, xrx300_pins_usif_spi_di),
+ GRP_MUX("usif spi_do", USIF, xrx300_pins_usif_spi_do),
+ GRP_MUX("usif spi_clk", USIF, xrx300_pins_usif_spi_clk),
+ GRP_MUX("usif spi_cs0", USIF, xrx300_pins_usif_spi_cs0),
+ GRP_MUX("stp", STP, xrx300_pins_stp),
+ GRP_MUX("clkout2", CGU, xrx300_pins_clkout2),
+ GRP_MUX("mdio", MDIO, xrx300_pins_mdio),
+ GRP_MUX("dfe led0", DFE, xrx300_pins_dfe_led0),
+ GRP_MUX("dfe led1", DFE, xrx300_pins_dfe_led1),
+ GRP_MUX("ephy0 led0", GPHY, xrx300_pins_ephy0_led0),
+ GRP_MUX("ephy0 led1", GPHY, xrx300_pins_ephy0_led1),
+ GRP_MUX("ephy1 led0", GPHY, xrx300_pins_ephy1_led0),
+ GRP_MUX("ephy1 led1", GPHY, xrx300_pins_ephy1_led1),
+};
+
+static const char * const xrx300_spi_grps[] = {"spi_di", "spi_do",
+ "spi_clk", "spi_cs1",
+ "spi_cs4", "spi_cs6"};
+static const char * const xrx300_cgu_grps[] = {"clkout2"};
+static const char * const xrx300_ebu_grps[] = {"nand ale", "nand cs1",
+ "nand cle", "nand rdy",
+ "nand rd", "nand d1",
+ "nand d0", "nand d2",
+ "nand d7", "nand d6",
+ "nand d5", "nand d4",
+ "nand d3", "nand cs0",
+ "nand wr", "nand wp",
+ "nand se"};
+static const char * const xrx300_exin_grps[] = {"exin0", "exin1", "exin2",
+ "exin4", "exin5"};
+static const char * const xrx300_usif_grps[] = {"usif uart_rx", "usif uart_tx",
+ "usif spi_di", "usif spi_do",
+ "usif spi_clk", "usif spi_cs0"};
+static const char * const xrx300_stp_grps[] = {"stp"};
+static const char * const xrx300_mdio_grps[] = {"mdio"};
+static const char * const xrx300_dfe_grps[] = {"dfe led0", "dfe led1"};
+static const char * const xrx300_gphy_grps[] = {"ephy0 led0", "ephy0 led1",
+ "ephy1 led0", "ephy1 led1"};
+
+static const struct ltq_pmx_func xrx300_funcs[] = {
+ {"spi", ARRAY_AND_SIZE(xrx300_spi_grps)},
+ {"usif", ARRAY_AND_SIZE(xrx300_usif_grps)},
+ {"cgu", ARRAY_AND_SIZE(xrx300_cgu_grps)},
+ {"exin", ARRAY_AND_SIZE(xrx300_exin_grps)},
+ {"stp", ARRAY_AND_SIZE(xrx300_stp_grps)},
+ {"ebu", ARRAY_AND_SIZE(xrx300_ebu_grps)},
+ {"mdio", ARRAY_AND_SIZE(xrx300_mdio_grps)},
+ {"dfe", ARRAY_AND_SIZE(xrx300_dfe_grps)},
+ {"ephy", ARRAY_AND_SIZE(xrx300_gphy_grps)},
+};
+
/* --------- pinconf related code --------- */
static int xway_pinconf_get(struct pinctrl_dev *pctldev,
unsigned pin,
@@ -676,6 +1563,10 @@ static int xway_gpio_dir_out(struct gpio_chip *chip, unsigned int pin, int val)
{
struct ltq_pinmux_info *info = dev_get_drvdata(chip->dev);
+ if (PORT(pin) == PORT3)
+ gpio_setbit(info->membase[0], GPIO3_OD, PORT_PIN(pin));
+ else
+ gpio_setbit(info->membase[0], GPIO_OD(pin), PORT_PIN(pin));
gpio_setbit(info->membase[0], GPIO_DIR(pin), PORT_PIN(pin));
xway_gpio_set(chip, pin, val);
@@ -695,10 +1586,7 @@ static struct gpio_chip xway_chip = {
/* --------- register the pinctrl layer --------- */
-static const unsigned xway_exin_pin_map[] = {GPIO0, GPIO1, GPIO2, GPIO39, GPIO46, GPIO9};
-static const unsigned ase_exin_pins_map[] = {GPIO6, GPIO29, GPIO0};
-
-static struct pinctrl_xway_soc {
+struct pinctrl_xway_soc {
int pin_count;
const struct ltq_mfp_pin *mfp;
const struct ltq_pin_group *grps;
@@ -707,22 +1595,54 @@ static struct pinctrl_xway_soc {
unsigned int num_funcs;
const unsigned *exin;
unsigned int num_exin;
-} soc_cfg[] = {
- /* legacy xway */
- {XWAY_MAX_PIN, xway_mfp,
- xway_grps, ARRAY_SIZE(xway_grps),
- danube_funcs, ARRAY_SIZE(danube_funcs),
- xway_exin_pin_map, 3},
- /* xway xr9 series */
- {XR9_MAX_PIN, xway_mfp,
- xway_grps, ARRAY_SIZE(xway_grps),
- xrx_funcs, ARRAY_SIZE(xrx_funcs),
- xway_exin_pin_map, 6},
- /* xway ase series */
- {XWAY_MAX_PIN, ase_mfp,
- ase_grps, ARRAY_SIZE(ase_grps),
- ase_funcs, ARRAY_SIZE(ase_funcs),
- ase_exin_pins_map, 3},
+};
+
+/* xway xr9 series (DEPRECATED: Use XWAY xRX100/xRX200 Family) */
+static struct pinctrl_xway_soc xr9_pinctrl = {
+ XR9_MAX_PIN, xway_mfp,
+ xway_grps, ARRAY_SIZE(xway_grps),
+ xrx_funcs, ARRAY_SIZE(xrx_funcs),
+ xway_exin_pin_map, 6
+};
+
+/* XWAY AMAZON Family */
+static struct pinctrl_xway_soc ase_pinctrl = {
+ ASE_MAX_PIN, ase_mfp,
+ ase_grps, ARRAY_SIZE(ase_grps),
+ ase_funcs, ARRAY_SIZE(ase_funcs),
+ ase_exin_pin_map, 3
+};
+
+/* XWAY DANUBE Family */
+static struct pinctrl_xway_soc danube_pinctrl = {
+ DANUBE_MAX_PIN, danube_mfp,
+ danube_grps, ARRAY_SIZE(danube_grps),
+ danube_funcs, ARRAY_SIZE(danube_funcs),
+ danube_exin_pin_map, 3
+};
+
+/* XWAY xRX100 Family */
+static struct pinctrl_xway_soc xrx100_pinctrl = {
+ XRX100_MAX_PIN, xrx100_mfp,
+ xrx100_grps, ARRAY_SIZE(xrx100_grps),
+ xrx100_funcs, ARRAY_SIZE(xrx100_funcs),
+ xrx100_exin_pin_map, 6
+};
+
+/* XWAY xRX200 Family */
+static struct pinctrl_xway_soc xrx200_pinctrl = {
+ XRX200_MAX_PIN, xrx200_mfp,
+ xrx200_grps, ARRAY_SIZE(xrx200_grps),
+ xrx200_funcs, ARRAY_SIZE(xrx200_funcs),
+ xrx200_exin_pin_map, 6
+};
+
+/* XWAY xRX300 Family */
+static struct pinctrl_xway_soc xrx300_pinctrl = {
+ XRX300_MAX_PIN, xrx300_mfp,
+ xrx300_grps, ARRAY_SIZE(xrx300_grps),
+ xrx300_funcs, ARRAY_SIZE(xrx300_funcs),
+ xrx300_exin_pin_map, 5
};
static struct pinctrl_gpio_range xway_gpio_range = {
@@ -731,9 +1651,14 @@ static struct pinctrl_gpio_range xway_gpio_range = {
};
static const struct of_device_id xway_match[] = {
- { .compatible = "lantiq,pinctrl-xway", .data = &soc_cfg[0]},
- { .compatible = "lantiq,pinctrl-xr9", .data = &soc_cfg[1]},
- { .compatible = "lantiq,pinctrl-ase", .data = &soc_cfg[2]},
+ { .compatible = "lantiq,pinctrl-xway", .data = &danube_pinctrl}, /*DEPRECATED*/
+ { .compatible = "lantiq,pinctrl-xr9", .data = &xr9_pinctrl}, /*DEPRECATED*/
+ { .compatible = "lantiq,pinctrl-ase", .data = &ase_pinctrl}, /*DEPRECATED*/
+ { .compatible = "lantiq,ase-pinctrl", .data = &ase_pinctrl},
+ { .compatible = "lantiq,danube-pinctrl", .data = &danube_pinctrl},
+ { .compatible = "lantiq,xrx100-pinctrl", .data = &xrx100_pinctrl},
+ { .compatible = "lantiq,xrx200-pinctrl", .data = &xrx200_pinctrl},
+ { .compatible = "lantiq,xrx300-pinctrl", .data = &xrx300_pinctrl},
{},
};
MODULE_DEVICE_TABLE(of, xway_match);
@@ -755,7 +1680,7 @@ static int pinmux_xway_probe(struct platform_device *pdev)
if (match)
xway_soc = (const struct pinctrl_xway_soc *) match->data;
else
- xway_soc = &soc_cfg[0];
+ xway_soc = &danube_pinctrl;
/* find out how many pads we have */
xway_chip.ngpio = xway_soc->pin_count;
diff --git a/drivers/pinctrl/pxa/Kconfig b/drivers/pinctrl/pxa/Kconfig
new file mode 100644
index 000000000000..990667ff772c
--- /dev/null
+++ b/drivers/pinctrl/pxa/Kconfig
@@ -0,0 +1,17 @@
+if (ARCH_PXA || COMPILE_TEST)
+
+config PINCTRL_PXA
+ bool
+ select PINMUX
+ select PINCONF
+ select GENERIC_PINCONF
+
+config PINCTRL_PXA27X
+ tristate "Marvell PXA27x pin controller driver"
+ select PINCTRL_PXA
+ default y if PXA27x
+ help
+ This is the pinctrl, pinmux, pinconf driver for the Marvell
+ PXA2xx block found in the pxa25x and pxa27x platforms.
+
+endif
diff --git a/drivers/pinctrl/pxa/Makefile b/drivers/pinctrl/pxa/Makefile
new file mode 100644
index 000000000000..f1d56af2bfc0
--- /dev/null
+++ b/drivers/pinctrl/pxa/Makefile
@@ -0,0 +1,2 @@
+# Marvell PXA pin control drivers
+obj-$(CONFIG_PINCTRL_PXA27X) += pinctrl-pxa2xx.o pinctrl-pxa27x.o
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa27x.c b/drivers/pinctrl/pxa/pinctrl-pxa27x.c
new file mode 100644
index 000000000000..2e2c3709ef05
--- /dev/null
+++ b/drivers/pinctrl/pxa/pinctrl-pxa27x.c
@@ -0,0 +1,566 @@
+/*
+ * Marvell PXA27x family pin control
+ *
+ * Copyright (C) 2015 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-pxa2xx.h"
+
+static const struct pxa_desc_pin pxa27x_pins[] = {
+ PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(0)),
+ PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(1)),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(9),
+ PXA_FUNCTION(0, 3, "FFCTS"),
+ PXA_FUNCTION(1, 1, "HZ_CLK"),
+ PXA_FUNCTION(1, 3, "CHOUT<0>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(10),
+ PXA_FUNCTION(0, 1, "FFDCD"),
+ PXA_FUNCTION(0, 3, "USB_P3_5"),
+ PXA_FUNCTION(1, 1, "HZ_CLK"),
+ PXA_FUNCTION(1, 3, "CHOUT<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(11),
+ PXA_FUNCTION(0, 1, "EXT_SYNC<0>"),
+ PXA_FUNCTION(0, 2, "SSPRXD2"),
+ PXA_FUNCTION(0, 3, "USB_P3_1"),
+ PXA_FUNCTION(1, 1, "CHOUT<0>"),
+ PXA_FUNCTION(1, 1, "PWM_OUT<2>"),
+ PXA_FUNCTION(1, 3, "48_MHz")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(12),
+ PXA_FUNCTION(0, 1, "EXT_SYNC<1>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<7>"),
+ PXA_FUNCTION(1, 1, "CHOUT<1>"),
+ PXA_FUNCTION(1, 1, "PWM_OUT<3>"),
+ PXA_FUNCTION(1, 3, "48_MHz")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(13),
+ PXA_FUNCTION(0, 1, "CLK_EXT"),
+ PXA_FUNCTION(0, 2, "KP_DKIN<7>"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<7>"),
+ PXA_FUNCTION(1, 1, "SSPTXD2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(14),
+ PXA_FUNCTION(0, 1, "L_VSYNC"),
+ PXA_FUNCTION(0, 2, "SSPSFRM2"),
+ PXA_FUNCTION(1, 1, "SSPSFRM2"),
+ PXA_FUNCTION(1, 3, "UCLK")),
+ PXA_GPIO_ONLY_PIN(PXA_PINCTRL_PIN(15)),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(16),
+ PXA_FUNCTION(0, 1, "KP_MKIN<5>"),
+ PXA_FUNCTION(1, 2, "PWM_OUT<0>"),
+ PXA_FUNCTION(1, 3, "FFTXD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(17),
+ PXA_FUNCTION(0, 1, "KP_MKIN<6>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<6>"),
+ PXA_FUNCTION(1, 2, "PWM_OUT<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(18),
+ PXA_FUNCTION(0, 1, "RDY")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(19),
+ PXA_FUNCTION(0, 1, "SSPSCLK2"),
+ PXA_FUNCTION(0, 3, "FFRXD"),
+ PXA_FUNCTION(1, 1, "SSPSCLK2"),
+ PXA_FUNCTION(1, 2, "L_CS"),
+ PXA_FUNCTION(1, 3, "nURST")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(20),
+ PXA_FUNCTION(0, 1, "DREQ<0>"),
+ PXA_FUNCTION(0, 2, "MBREQ"),
+ PXA_FUNCTION(1, 1, "nSDCS<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(21),
+ PXA_FUNCTION(1, 1, "nSDCS<3>"),
+ PXA_FUNCTION(1, 2, "DVAL<0>"),
+ PXA_FUNCTION(1, 3, "MBGNT")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(22),
+ PXA_FUNCTION(0, 1, "SSPEXTCLK2"),
+ PXA_FUNCTION(0, 2, "SSPSCLKEN2"),
+ PXA_FUNCTION(0, 3, "SSPSCLK2"),
+ PXA_FUNCTION(1, 1, "KP_MKOUT<7>"),
+ PXA_FUNCTION(1, 2, "SSPSYSCLK2"),
+ PXA_FUNCTION(1, 3, "SSPSCLK2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(23),
+ PXA_FUNCTION(0, 2, "SSPSCLK"),
+ PXA_FUNCTION(1, 1, "CIF_MCLK"),
+ PXA_FUNCTION(1, 1, "SSPSCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(24),
+ PXA_FUNCTION(0, 1, "CIF_FV"),
+ PXA_FUNCTION(0, 2, "SSPSFRM"),
+ PXA_FUNCTION(1, 1, "CIF_FV"),
+ PXA_FUNCTION(1, 2, "SSPSFRM")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(25),
+ PXA_FUNCTION(0, 1, "CIF_LV"),
+ PXA_FUNCTION(1, 1, "CIF_LV"),
+ PXA_FUNCTION(1, 2, "SSPTXD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(26),
+ PXA_FUNCTION(0, 1, "SSPRXD"),
+ PXA_FUNCTION(0, 2, "CIF_PCLK"),
+ PXA_FUNCTION(0, 3, "FFCTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(27),
+ PXA_FUNCTION(0, 1, "SSPEXTCLK"),
+ PXA_FUNCTION(0, 2, "SSPSCLKEN"),
+ PXA_FUNCTION(0, 3, "CIF_DD<0>"),
+ PXA_FUNCTION(1, 1, "SSPSYSCLK"),
+ PXA_FUNCTION(1, 3, "FFRTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(28),
+ PXA_FUNCTION(0, 1, "AC97_BITCLK"),
+ PXA_FUNCTION(0, 2, "I2S_BITCLK"),
+ PXA_FUNCTION(0, 3, "SSPSFRM"),
+ PXA_FUNCTION(1, 1, "I2S_BITCLK"),
+ PXA_FUNCTION(1, 3, "SSPSFRM")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(29),
+ PXA_FUNCTION(0, 1, "AC97_SDATA_IN_0"),
+ PXA_FUNCTION(0, 2, "I2S_SDATA_IN"),
+ PXA_FUNCTION(0, 3, "SSPSCLK"),
+ PXA_FUNCTION(1, 1, "SSPRXD2"),
+ PXA_FUNCTION(1, 3, "SSPSCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(30),
+ PXA_FUNCTION(1, 1, "I2S_SDATA_OUT"),
+ PXA_FUNCTION(1, 2, "AC97_SDATA_OUT"),
+ PXA_FUNCTION(1, 3, "USB_P3_2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(31),
+ PXA_FUNCTION(1, 1, "I2S_SYNC"),
+ PXA_FUNCTION(1, 2, "AC97_SYNC"),
+ PXA_FUNCTION(1, 3, "USB_P3_6")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(32),
+ PXA_FUNCTION(1, 1, "MSSCLK"),
+ PXA_FUNCTION(1, 2, "MMCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(33),
+ PXA_FUNCTION(0, 1, "FFRXD"),
+ PXA_FUNCTION(0, 2, "FFDSR"),
+ PXA_FUNCTION(1, 1, "DVAL<1>"),
+ PXA_FUNCTION(1, 2, "nCS<5>"),
+ PXA_FUNCTION(1, 3, "MBGNT")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(34),
+ PXA_FUNCTION(0, 1, "FFRXD"),
+ PXA_FUNCTION(0, 2, "KP_MKIN<3>"),
+ PXA_FUNCTION(0, 3, "SSPSCLK3"),
+ PXA_FUNCTION(1, 1, "USB_P2_2"),
+ PXA_FUNCTION(1, 3, "SSPSCLK3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(35),
+ PXA_FUNCTION(0, 1, "FFCTS"),
+ PXA_FUNCTION(0, 2, "USB_P2_1"),
+ PXA_FUNCTION(0, 3, "SSPSFRM3"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<6>"),
+ PXA_FUNCTION(1, 3, "SSPTXD3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(36),
+ PXA_FUNCTION(0, 1, "FFDCD"),
+ PXA_FUNCTION(0, 2, "SSPSCLK2"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<7>"),
+ PXA_FUNCTION(1, 1, "USB_P2_4"),
+ PXA_FUNCTION(1, 2, "SSPSCLK2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(37),
+ PXA_FUNCTION(0, 1, "FFDSR"),
+ PXA_FUNCTION(0, 2, "SSPSFRM2"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<3>"),
+ PXA_FUNCTION(1, 1, "USB_P2_8"),
+ PXA_FUNCTION(1, 2, "SSPSFRM2"),
+ PXA_FUNCTION(1, 3, "FFTXD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(38),
+ PXA_FUNCTION(0, 1, "FFRI"),
+ PXA_FUNCTION(0, 2, "KP_MKIN<4>"),
+ PXA_FUNCTION(0, 3, "USB_P2_3"),
+ PXA_FUNCTION(1, 1, "SSPTXD3"),
+ PXA_FUNCTION(1, 2, "SSPTXD2"),
+ PXA_FUNCTION(1, 3, "PWM_OUT<0>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(39),
+ PXA_FUNCTION(0, 1, "KP_MKIN<4>"),
+ PXA_FUNCTION(0, 3, "SSPSFRM3"),
+ PXA_FUNCTION(1, 1, "USB_P2_6"),
+ PXA_FUNCTION(1, 2, "FFTXD"),
+ PXA_FUNCTION(1, 3, "SSPSFRM3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(40),
+ PXA_FUNCTION(0, 1, "SSPRXD2"),
+ PXA_FUNCTION(0, 3, "USB_P2_5"),
+ PXA_FUNCTION(1, 1, "KP_MKOUT<6>"),
+ PXA_FUNCTION(1, 2, "FFDTR"),
+ PXA_FUNCTION(1, 3, "SSPSCLK3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(41),
+ PXA_FUNCTION(0, 1, "FFRXD"),
+ PXA_FUNCTION(0, 2, "USB_P2_7"),
+ PXA_FUNCTION(0, 3, "SSPRXD3"),
+ PXA_FUNCTION(1, 1, "KP_MKOUT<7>"),
+ PXA_FUNCTION(1, 2, "FFRTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(42),
+ PXA_FUNCTION(0, 1, "BTRXD"),
+ PXA_FUNCTION(0, 2, "ICP_RXD"),
+ PXA_FUNCTION(1, 3, "CIF_MCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(43),
+ PXA_FUNCTION(0, 3, "CIF_FV"),
+ PXA_FUNCTION(1, 1, "ICP_TXD"),
+ PXA_FUNCTION(1, 2, "BTTXD"),
+ PXA_FUNCTION(1, 3, "CIF_FV")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(44),
+ PXA_FUNCTION(0, 1, "BTCTS"),
+ PXA_FUNCTION(0, 3, "CIF_LV"),
+ PXA_FUNCTION(1, 3, "CIF_LV")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(45),
+ PXA_FUNCTION(0, 3, "CIF_PCLK"),
+ PXA_FUNCTION(1, 1, "AC97_SYSCLK"),
+ PXA_FUNCTION(1, 2, "BTRTS"),
+ PXA_FUNCTION(1, 3, "SSPSYSCLK3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(46),
+ PXA_FUNCTION(0, 1, "ICP_RXD"),
+ PXA_FUNCTION(0, 2, "STD_RXD"),
+ PXA_FUNCTION(1, 2, "PWM_OUT<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(47),
+ PXA_FUNCTION(0, 1, "CIF_DD<0>"),
+ PXA_FUNCTION(1, 1, "STD_TXD"),
+ PXA_FUNCTION(1, 2, "ICP_TXD"),
+ PXA_FUNCTION(1, 3, "PWM_OUT<3>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(48),
+ PXA_FUNCTION(0, 1, "CIF_DD<5>"),
+ PXA_FUNCTION(1, 1, "BB_OB_DAT<1>"),
+ PXA_FUNCTION(1, 2, "nPOE")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(49),
+ PXA_FUNCTION(1, 2, "nPWE")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(50),
+ PXA_FUNCTION(0, 1, "CIF_DD<3>"),
+ PXA_FUNCTION(0, 3, "SSPSCLK2"),
+ PXA_FUNCTION(1, 1, "BB_OB_DAT<2>"),
+ PXA_FUNCTION(1, 2, "nPIOR"),
+ PXA_FUNCTION(1, 3, "SSPSCLK2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(51),
+ PXA_FUNCTION(0, 1, "CIF_DD<2>"),
+ PXA_FUNCTION(1, 1, "BB_OB_DAT<3>"),
+ PXA_FUNCTION(1, 2, "nPIOW")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(52),
+ PXA_FUNCTION(0, 1, "CIF_DD<4>"),
+ PXA_FUNCTION(0, 2, "SSPSCLK3"),
+ PXA_FUNCTION(1, 1, "BB_OB_CLK"),
+ PXA_FUNCTION(1, 2, "SSPSCLK3")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(53),
+ PXA_FUNCTION(0, 1, "FFRXD"),
+ PXA_FUNCTION(0, 2, "USB_P2_3"),
+ PXA_FUNCTION(1, 1, "BB_OB_STB"),
+ PXA_FUNCTION(1, 2, "CIF_MCLK"),
+ PXA_FUNCTION(1, 3, "SSPSYSCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(54),
+ PXA_FUNCTION(0, 2, "BB_OB_WAIT"),
+ PXA_FUNCTION(0, 3, "CIF_PCLK"),
+ PXA_FUNCTION(1, 2, "nPCE<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(55),
+ PXA_FUNCTION(0, 1, "CIF_DD<1>"),
+ PXA_FUNCTION(0, 2, "BB_IB_DAT<1>"),
+ PXA_FUNCTION(1, 2, "nPREG")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(56),
+ PXA_FUNCTION(0, 1, "nPWAIT"),
+ PXA_FUNCTION(0, 2, "BB_IB_DAT<2>"),
+ PXA_FUNCTION(1, 1, "USB_P3_4")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(57),
+ PXA_FUNCTION(0, 1, "nIOS16"),
+ PXA_FUNCTION(0, 2, "BB_IB_DAT<3>"),
+ PXA_FUNCTION(1, 3, "SSPTXD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(58),
+ PXA_FUNCTION(0, 2, "LDD<0>"),
+ PXA_FUNCTION(1, 2, "LDD<0>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(59),
+ PXA_FUNCTION(0, 2, "LDD<1>"),
+ PXA_FUNCTION(1, 2, "LDD<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(60),
+ PXA_FUNCTION(0, 2, "LDD<2>"),
+ PXA_FUNCTION(1, 2, "LDD<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(61),
+ PXA_FUNCTION(0, 2, "LDD<3>"),
+ PXA_FUNCTION(1, 2, "LDD<3>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(62),
+ PXA_FUNCTION(0, 2, "LDD<4>"),
+ PXA_FUNCTION(1, 2, "LDD<4>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(63),
+ PXA_FUNCTION(0, 2, "LDD<5>"),
+ PXA_FUNCTION(1, 2, "LDD<5>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(64),
+ PXA_FUNCTION(0, 2, "LDD<6>"),
+ PXA_FUNCTION(1, 2, "LDD<6>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(65),
+ PXA_FUNCTION(0, 2, "LDD<7>"),
+ PXA_FUNCTION(1, 2, "LDD<7>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(66),
+ PXA_FUNCTION(0, 2, "LDD<8>"),
+ PXA_FUNCTION(1, 2, "LDD<8>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(67),
+ PXA_FUNCTION(0, 2, "LDD<9>"),
+ PXA_FUNCTION(1, 2, "LDD<9>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(68),
+ PXA_FUNCTION(0, 2, "LDD<10>"),
+ PXA_FUNCTION(1, 2, "LDD<10>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(69),
+ PXA_FUNCTION(0, 2, "LDD<11>"),
+ PXA_FUNCTION(1, 2, "LDD<11>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(70),
+ PXA_FUNCTION(0, 2, "LDD<12>"),
+ PXA_FUNCTION(1, 2, "LDD<12>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(71),
+ PXA_FUNCTION(0, 2, "LDD<13>"),
+ PXA_FUNCTION(1, 2, "LDD<13>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(72),
+ PXA_FUNCTION(0, 2, "LDD<14>"),
+ PXA_FUNCTION(1, 2, "LDD<14>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(73),
+ PXA_FUNCTION(0, 2, "LDD<15>"),
+ PXA_FUNCTION(1, 2, "LDD<15>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(74),
+ PXA_FUNCTION(1, 2, "L_FCLK_RD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(75),
+ PXA_FUNCTION(1, 2, "L_LCLK_A0")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(76),
+ PXA_FUNCTION(1, 2, "L_PCLK_WR")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(77),
+ PXA_FUNCTION(1, 2, "L_BIAS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(78),
+ PXA_FUNCTION(1, 1, "nPCE<2>"),
+ PXA_FUNCTION(1, 2, "nCS<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(79),
+ PXA_FUNCTION(1, 1, "PSKTSEL"),
+ PXA_FUNCTION(1, 2, "nCS<3>"),
+ PXA_FUNCTION(1, 3, "PWM_OUT<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(80),
+ PXA_FUNCTION(0, 1, "DREQ<1>"),
+ PXA_FUNCTION(0, 2, "MBREQ"),
+ PXA_FUNCTION(1, 2, "nCS<4>"),
+ PXA_FUNCTION(1, 3, "PWM_OUT<3>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(81),
+ PXA_FUNCTION(0, 2, "CIF_DD<0>"),
+ PXA_FUNCTION(1, 1, "SSPTXD3"),
+ PXA_FUNCTION(1, 2, "BB_OB_DAT<0>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(82),
+ PXA_FUNCTION(0, 1, "SSPRXD3"),
+ PXA_FUNCTION(0, 2, "BB_IB_DAT<0>"),
+ PXA_FUNCTION(0, 3, "CIF_DD<5>"),
+ PXA_FUNCTION(1, 3, "FFDTR")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(83),
+ PXA_FUNCTION(0, 1, "SSPSFRM3"),
+ PXA_FUNCTION(0, 2, "BB_IB_CLK"),
+ PXA_FUNCTION(0, 3, "CIF_DD<5>"),
+ PXA_FUNCTION(1, 1, "SSPSFRM3"),
+ PXA_FUNCTION(1, 2, "FFTXD"),
+ PXA_FUNCTION(1, 3, "FFRTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(84),
+ PXA_FUNCTION(0, 1, "SSPCLK3"),
+ PXA_FUNCTION(0, 2, "BB_IB_STB"),
+ PXA_FUNCTION(0, 3, "CIF_FV"),
+ PXA_FUNCTION(1, 1, "SSPCLK3"),
+ PXA_FUNCTION(1, 3, "CIF_FV")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(85),
+ PXA_FUNCTION(0, 1, "FFRXD"),
+ PXA_FUNCTION(0, 2, "DREQ<2>"),
+ PXA_FUNCTION(0, 3, "CIF_LV"),
+ PXA_FUNCTION(1, 1, "nPCE<1>"),
+ PXA_FUNCTION(1, 2, "BB_IB_WAIT"),
+ PXA_FUNCTION(1, 3, "CIF_LV")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(86),
+ PXA_FUNCTION(0, 1, "SSPRXD2"),
+ PXA_FUNCTION(0, 2, "LDD<16>"),
+ PXA_FUNCTION(0, 3, "USB_P3_5"),
+ PXA_FUNCTION(1, 1, "nPCE<1>"),
+ PXA_FUNCTION(1, 2, "LDD<16>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(87),
+ PXA_FUNCTION(0, 1, "nPCE<2>"),
+ PXA_FUNCTION(0, 2, "LDD<17>"),
+ PXA_FUNCTION(0, 3, "USB_P3_1"),
+ PXA_FUNCTION(1, 1, "SSPTXD2"),
+ PXA_FUNCTION(1, 2, "LDD<17>"),
+ PXA_FUNCTION(1, 3, "SSPSFRM2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(88),
+ PXA_FUNCTION(0, 1, "USBHPWR<1>"),
+ PXA_FUNCTION(0, 2, "SSPRXD2"),
+ PXA_FUNCTION(0, 3, "SSPSFRM2"),
+ PXA_FUNCTION(1, 2, "SSPTXD2"),
+ PXA_FUNCTION(1, 3, "SSPSFRM2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(89),
+ PXA_FUNCTION(0, 1, "SSPRXD3"),
+ PXA_FUNCTION(0, 3, "FFRI"),
+ PXA_FUNCTION(1, 1, "AC97_SYSCLK"),
+ PXA_FUNCTION(1, 2, "USBHPEN<1>"),
+ PXA_FUNCTION(1, 3, "SSPTXD2")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(90),
+ PXA_FUNCTION(0, 1, "KP_MKIN<5>"),
+ PXA_FUNCTION(0, 3, "USB_P3_5"),
+ PXA_FUNCTION(1, 1, "CIF_DD<4>"),
+ PXA_FUNCTION(1, 2, "nURST")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(91),
+ PXA_FUNCTION(0, 1, "KP_MKIN<6>"),
+ PXA_FUNCTION(0, 3, "USB_P3_1"),
+ PXA_FUNCTION(1, 1, "CIF_DD<5>"),
+ PXA_FUNCTION(1, 2, "UCLK")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(92),
+ PXA_FUNCTION(0, 1, "MMDAT<0>"),
+ PXA_FUNCTION(1, 1, "MMDAT<0>"),
+ PXA_FUNCTION(1, 2, "MSBS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(93),
+ PXA_FUNCTION(0, 1, "KP_DKIN<0>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<6>"),
+ PXA_FUNCTION(1, 1, "AC97_SDATA_OUT")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(94),
+ PXA_FUNCTION(0, 1, "KP_DKIN<1>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<5>"),
+ PXA_FUNCTION(1, 1, "AC97_SYNC")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(95),
+ PXA_FUNCTION(0, 1, "KP_DKIN<2>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<4>"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<6>"),
+ PXA_FUNCTION(1, 1, "AC97_RESET_n")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(96),
+ PXA_FUNCTION(0, 1, "KP_DKIN<3>"),
+ PXA_FUNCTION(0, 2, "MBREQ"),
+ PXA_FUNCTION(0, 3, "FFRXD"),
+ PXA_FUNCTION(1, 2, "DVAL<1>"),
+ PXA_FUNCTION(1, 3, "KP_MKOUT<6>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(97),
+ PXA_FUNCTION(0, 1, "KP_DKIN<4>"),
+ PXA_FUNCTION(0, 2, "DREQ<1>"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<3>"),
+ PXA_FUNCTION(1, 2, "MBGNT")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(98),
+ PXA_FUNCTION(0, 1, "KP_DKIN<5>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<0>"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<4>"),
+ PXA_FUNCTION(1, 1, "AC97_SYSCLK"),
+ PXA_FUNCTION(1, 3, "FFRTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(99),
+ PXA_FUNCTION(0, 1, "KP_DKIN<6>"),
+ PXA_FUNCTION(0, 2, "AC97_SDATA_IN_1"),
+ PXA_FUNCTION(0, 3, "KP_MKIN<5>"),
+ PXA_FUNCTION(1, 3, "FFTXD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(100),
+ PXA_FUNCTION(0, 1, "KP_MKIN<0>"),
+ PXA_FUNCTION(0, 2, "DREQ<2>"),
+ PXA_FUNCTION(0, 3, "FFCTS")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(101),
+ PXA_FUNCTION(0, 1, "KP_MKIN<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(102),
+ PXA_FUNCTION(0, 1, "KP_MKIN<2>"),
+ PXA_FUNCTION(0, 3, "FFRXD"),
+ PXA_FUNCTION(1, 1, "nPCE<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(103),
+ PXA_FUNCTION(0, 1, "CIF_DD<3>"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<0>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(104),
+ PXA_FUNCTION(0, 1, "CIF_DD<2>"),
+ PXA_FUNCTION(1, 1, "PSKTSEL"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(105),
+ PXA_FUNCTION(0, 1, "CIF_DD<1>"),
+ PXA_FUNCTION(1, 1, "nPCE<2>"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(106),
+ PXA_FUNCTION(0, 1, "CIF_DD<9>"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<3>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(107),
+ PXA_FUNCTION(0, 1, "CIF_DD<8>"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<4>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(108),
+ PXA_FUNCTION(0, 1, "CIF_DD<7>"),
+ PXA_FUNCTION(1, 1, "CHOUT<0>"),
+ PXA_FUNCTION(1, 2, "KP_MKOUT<5>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(109),
+ PXA_FUNCTION(0, 1, "MMDAT<1>"),
+ PXA_FUNCTION(0, 2, "MSSDIO"),
+ PXA_FUNCTION(1, 1, "MMDAT<1>"),
+ PXA_FUNCTION(1, 2, "MSSDIO")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(110),
+ PXA_FUNCTION(0, 1, "MMDAT<2>"),
+ PXA_FUNCTION(1, 1, "MMDAT<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(111),
+ PXA_FUNCTION(0, 1, "MMDAT<3>"),
+ PXA_FUNCTION(1, 1, "MMDAT<3>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(112),
+ PXA_FUNCTION(0, 1, "MMCMD"),
+ PXA_FUNCTION(0, 2, "nMSINS"),
+ PXA_FUNCTION(1, 1, "MMCMD")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(113),
+ PXA_FUNCTION(0, 3, "USB_P3_3"),
+ PXA_FUNCTION(1, 1, "I2S_SYSCLK"),
+ PXA_FUNCTION(1, 2, "AC97_RESET_n")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(114),
+ PXA_FUNCTION(0, 1, "CIF_DD<1>"),
+ PXA_FUNCTION(1, 1, "UEN"),
+ PXA_FUNCTION(1, 2, "UVS0")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(115),
+ PXA_FUNCTION(0, 1, "DREQ<0>"),
+ PXA_FUNCTION(0, 2, "CIF_DD<3>"),
+ PXA_FUNCTION(0, 3, "MBREQ"),
+ PXA_FUNCTION(1, 1, "UEN"),
+ PXA_FUNCTION(1, 2, "nUVS1"),
+ PXA_FUNCTION(1, 3, "PWM_OUT<1>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(116),
+ PXA_FUNCTION(0, 1, "CIF_DD<2>"),
+ PXA_FUNCTION(0, 2, "AC97_SDATA_IN_0"),
+ PXA_FUNCTION(0, 3, "UDET"),
+ PXA_FUNCTION(1, 1, "DVAL<0>"),
+ PXA_FUNCTION(1, 2, "nUVS2"),
+ PXA_FUNCTION(1, 3, "MBGNT")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(117),
+ PXA_FUNCTION(0, 1, "SCL"),
+ PXA_FUNCTION(1, 1, "SCL")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(118),
+ PXA_FUNCTION(0, 1, "SDA"),
+ PXA_FUNCTION(1, 1, "SDA")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(119),
+ PXA_FUNCTION(0, 1, "USBHPWR<2>")),
+ PXA_GPIO_PIN(PXA_PINCTRL_PIN(120),
+ PXA_FUNCTION(1, 2, "USBHPEN<2>")),
+};
+
+static int pxa27x_pinctrl_probe(struct platform_device *pdev)
+{
+ int ret, i;
+ void __iomem *base_af[8];
+ void __iomem *base_dir[4];
+ void __iomem *base_sleep[4];
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base_af[0] = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base_af[0]))
+ return PTR_ERR(base_af[0]);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ base_dir[0] = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base_dir[0]))
+ return PTR_ERR(base_dir[0]);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+ base_dir[3] = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base_dir[3]))
+ return PTR_ERR(base_dir[3]);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+ base_sleep[0] = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base_sleep[0]))
+ return PTR_ERR(base_sleep[0]);
+
+ for (i = 0; i < ARRAY_SIZE(base_af); i++)
+ base_af[i] = base_af[0] + sizeof(base_af[0]) * i;
+ for (i = 0; i < 3; i++)
+ base_dir[i] = base_dir[0] + sizeof(base_dir[0]) * i;
+ for (i = 0; i < ARRAY_SIZE(base_sleep); i++)
+ base_sleep[i] = base_sleep[0] + sizeof(base_af[0]) * i;
+
+ ret = pxa2xx_pinctrl_init(pdev, pxa27x_pins, ARRAY_SIZE(pxa27x_pins),
+ base_af, base_dir, base_sleep);
+ return ret;
+}
+
+static const struct of_device_id pxa27x_pinctrl_match[] = {
+ { .compatible = "marvell,pxa27x-pinctrl", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, pxa27x_pinctrl_match);
+
+static struct platform_driver pxa27x_pinctrl_driver = {
+ .probe = pxa27x_pinctrl_probe,
+ .driver = {
+ .name = "pxa27x-pinctrl",
+ .of_match_table = pxa27x_pinctrl_match,
+ },
+};
+module_platform_driver(pxa27x_pinctrl_driver);
+
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_DESCRIPTION("Marvell PXA27x pinctrl driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
new file mode 100644
index 000000000000..d90e205cf809
--- /dev/null
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
@@ -0,0 +1,436 @@
+/*
+ * Marvell PXA2xx family pin control
+ *
+ * Copyright (C) 2015 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/module.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "../pinctrl-utils.h"
+#include "pinctrl-pxa2xx.h"
+
+static int pxa2xx_pctrl_get_groups_count(struct pinctrl_dev *pctldev)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+
+ return pctl->ngroups;
+}
+
+static const char *pxa2xx_pctrl_get_group_name(struct pinctrl_dev *pctldev,
+ unsigned tgroup)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_group *group = pctl->groups + tgroup;
+
+ return group->name;
+}
+
+static int pxa2xx_pctrl_get_group_pins(struct pinctrl_dev *pctldev,
+ unsigned tgroup,
+ const unsigned **pins,
+ unsigned *num_pins)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_group *group = pctl->groups + tgroup;
+
+ *pins = (unsigned *)&group->pin;
+ *num_pins = 1;
+
+ return 0;
+}
+
+static const struct pinctrl_ops pxa2xx_pctl_ops = {
+#ifdef CONFIG_OF
+ .dt_node_to_map = pinconf_generic_dt_node_to_map_all,
+ .dt_free_map = pinctrl_utils_dt_free_map,
+#endif
+ .get_groups_count = pxa2xx_pctrl_get_groups_count,
+ .get_group_name = pxa2xx_pctrl_get_group_name,
+ .get_group_pins = pxa2xx_pctrl_get_group_pins,
+};
+
+static struct pxa_desc_function *
+pxa_desc_by_func_group(struct pxa_pinctrl *pctl, const char *pin_name,
+ const char *func_name)
+{
+ int i;
+ struct pxa_desc_function *df;
+
+ for (i = 0; i < pctl->npins; i++) {
+ const struct pxa_desc_pin *pin = pctl->ppins + i;
+
+ if (!strcmp(pin->pin.name, pin_name))
+ for (df = pin->functions; df->name; df++)
+ if (!strcmp(df->name, func_name))
+ return df;
+ }
+
+ return NULL;
+}
+
+static int pxa2xx_pmx_gpio_set_direction(struct pinctrl_dev *pctldev,
+ struct pinctrl_gpio_range *range,
+ unsigned pin,
+ bool input)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ unsigned long flags;
+ uint32_t val;
+ void __iomem *gpdr;
+
+ gpdr = pctl->base_gpdr[pin / 32];
+ dev_dbg(pctl->dev, "set_direction(pin=%d): dir=%d\n",
+ pin, !input);
+
+ spin_lock_irqsave(&pctl->lock, flags);
+
+ val = readl_relaxed(gpdr);
+ val = (val & ~BIT(pin % 32)) | (input ? 0 : BIT(pin % 32));
+ writel_relaxed(val, gpdr);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
+
+ return 0;
+}
+
+static const char *pxa2xx_pmx_get_func_name(struct pinctrl_dev *pctldev,
+ unsigned function)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_function *pf = pctl->functions + function;
+
+ return pf->name;
+}
+
+static int pxa2xx_get_functions_count(struct pinctrl_dev *pctldev)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+
+ return pctl->nfuncs;
+}
+
+static int pxa2xx_pmx_get_func_groups(struct pinctrl_dev *pctldev,
+ unsigned function,
+ const char * const **groups,
+ unsigned * const num_groups)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_function *pf = pctl->functions + function;
+
+ *groups = pf->groups;
+ *num_groups = pf->ngroups;
+
+ return 0;
+}
+
+static int pxa2xx_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned function,
+ unsigned tgroup)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_group *group = pctl->groups + tgroup;
+ struct pxa_desc_function *df;
+ int pin, shift;
+ unsigned long flags;
+ void __iomem *gafr, *gpdr;
+ u32 val;
+
+
+ df = pxa_desc_by_func_group(pctl, group->name,
+ (pctl->functions + function)->name);
+ if (!df)
+ return -EINVAL;
+
+ pin = group->pin;
+ gafr = pctl->base_gafr[pin / 16];
+ gpdr = pctl->base_gpdr[pin / 32];
+ shift = (pin % 16) << 1;
+ dev_dbg(pctl->dev, "set_mux(pin=%d): af=%d dir=%d\n",
+ pin, df->muxval >> 1, df->muxval & 0x1);
+
+ spin_lock_irqsave(&pctl->lock, flags);
+
+ val = readl_relaxed(gafr);
+ val = (val & ~(0x3 << shift)) | ((df->muxval >> 1) << shift);
+ writel_relaxed(val, gafr);
+
+ val = readl_relaxed(gpdr);
+ val = (val & ~BIT(pin % 32)) | ((df->muxval & 1) ? BIT(pin % 32) : 0);
+ writel_relaxed(val, gpdr);
+
+ spin_unlock_irqrestore(&pctl->lock, flags);
+
+ return 0;
+}
+static const struct pinmux_ops pxa2xx_pinmux_ops = {
+ .get_functions_count = pxa2xx_get_functions_count,
+ .get_function_name = pxa2xx_pmx_get_func_name,
+ .get_function_groups = pxa2xx_pmx_get_func_groups,
+ .set_mux = pxa2xx_pmx_set_mux,
+ .gpio_set_direction = pxa2xx_pmx_gpio_set_direction,
+};
+
+static int pxa2xx_pconf_group_get(struct pinctrl_dev *pctldev,
+ unsigned group,
+ unsigned long *config)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_group *g = pctl->groups + group;
+ unsigned long flags;
+ unsigned pin = g->pin;
+ void __iomem *pgsr = pctl->base_pgsr[pin / 32];
+ u32 val;
+
+ spin_lock_irqsave(&pctl->lock, flags);
+ val = readl_relaxed(pgsr) & BIT(pin % 32);
+ *config = val ? PIN_CONFIG_LOW_POWER_MODE : 0;
+ spin_unlock_irqrestore(&pctl->lock, flags);
+
+ dev_dbg(pctl->dev, "get sleep gpio state(pin=%d) %d\n",
+ pin, !!val);
+ return 0;
+}
+
+static int pxa2xx_pconf_group_set(struct pinctrl_dev *pctldev,
+ unsigned group,
+ unsigned long *configs,
+ unsigned num_configs)
+{
+ struct pxa_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+ struct pxa_pinctrl_group *g = pctl->groups + group;
+ unsigned long flags;
+ unsigned pin = g->pin;
+ void __iomem *pgsr = pctl->base_pgsr[pin / 32];
+ int i, is_set = 0;
+ u32 val;
+
+ for (i = 0; i < num_configs; i++) {
+ switch (pinconf_to_config_param(configs[i])) {
+ case PIN_CONFIG_LOW_POWER_MODE:
+ is_set = pinconf_to_config_argument(configs[i]);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ dev_dbg(pctl->dev, "set sleep gpio state(pin=%d) %d\n",
+ pin, is_set);
+
+ spin_lock_irqsave(&pctl->lock, flags);
+ val = readl_relaxed(pgsr);
+ val = (val & ~BIT(pin % 32)) | (is_set ? BIT(pin % 32) : 0);
+ writel_relaxed(val, pgsr);
+ spin_unlock_irqrestore(&pctl->lock, flags);
+
+ return 0;
+}
+
+static const struct pinconf_ops pxa2xx_pconf_ops = {
+ .pin_config_group_get = pxa2xx_pconf_group_get,
+ .pin_config_group_set = pxa2xx_pconf_group_set,
+ .is_generic = true,
+};
+
+static struct pinctrl_desc pxa2xx_pinctrl_desc = {
+ .confops = &pxa2xx_pconf_ops,
+ .pctlops = &pxa2xx_pctl_ops,
+ .pmxops = &pxa2xx_pinmux_ops,
+};
+
+static const struct pxa_pinctrl_function *
+pxa2xx_find_function(struct pxa_pinctrl *pctl, const char *fname,
+ const struct pxa_pinctrl_function *functions)
+{
+ const struct pxa_pinctrl_function *func;
+
+ for (func = functions; func->name; func++)
+ if (!strcmp(fname, func->name))
+ return func;
+
+ return NULL;
+}
+
+static int pxa2xx_build_functions(struct pxa_pinctrl *pctl)
+{
+ int i;
+ struct pxa_pinctrl_function *functions;
+ struct pxa_desc_function *df;
+
+ /*
+ * Each pin can have at most 6 alternate functions, and 2 gpio functions
+ * which are common to each pin. As there are more than 2 pins without
+ * alternate function, 6 * npins is an absolute high limit of the number
+ * of functions.
+ */
+ functions = devm_kcalloc(pctl->dev, pctl->npins * 6,
+ sizeof(*functions), GFP_KERNEL);
+ if (!functions)
+ return -ENOMEM;
+
+ for (i = 0; i < pctl->npins; i++)
+ for (df = pctl->ppins[i].functions; df->name; df++)
+ if (!pxa2xx_find_function(pctl, df->name, functions))
+ (functions + pctl->nfuncs++)->name = df->name;
+ pctl->functions = devm_kmemdup(pctl->dev, functions,
+ pctl->nfuncs * sizeof(*functions),
+ GFP_KERNEL);
+ if (!pctl->functions)
+ return -ENOMEM;
+
+ devm_kfree(pctl->dev, functions);
+ return 0;
+}
+
+static int pxa2xx_build_groups(struct pxa_pinctrl *pctl)
+{
+ int i, j, ngroups;
+ struct pxa_pinctrl_function *func;
+ struct pxa_desc_function *df;
+ char **gtmp;
+
+ gtmp = devm_kmalloc_array(pctl->dev, pctl->npins, sizeof(*gtmp),
+ GFP_KERNEL);
+ if (!gtmp)
+ return -ENOMEM;
+
+ for (i = 0; i < pctl->nfuncs; i++) {
+ ngroups = 0;
+ for (j = 0; j < pctl->npins; j++)
+ for (df = pctl->ppins[j].functions; df->name;
+ df++)
+ if (!strcmp(pctl->functions[i].name,
+ df->name))
+ gtmp[ngroups++] = (char *)
+ pctl->ppins[j].pin.name;
+ func = pctl->functions + i;
+ func->ngroups = ngroups;
+ func->groups =
+ devm_kmalloc_array(pctl->dev, ngroups,
+ sizeof(char *), GFP_KERNEL);
+ if (!func->groups)
+ return -ENOMEM;
+
+ memcpy(func->groups, gtmp, ngroups * sizeof(*gtmp));
+ }
+
+ devm_kfree(pctl->dev, gtmp);
+ return 0;
+}
+
+static int pxa2xx_build_state(struct pxa_pinctrl *pctl,
+ const struct pxa_desc_pin *ppins, int npins)
+{
+ struct pxa_pinctrl_group *group;
+ struct pinctrl_pin_desc *pins;
+ int ret, i;
+
+ pctl->npins = npins;
+ pctl->ppins = ppins;
+ pctl->ngroups = npins;
+
+ pctl->desc.npins = npins;
+ pins = devm_kcalloc(pctl->dev, npins, sizeof(*pins), GFP_KERNEL);
+ if (!pins)
+ return -ENOMEM;
+
+ pctl->desc.pins = pins;
+ for (i = 0; i < npins; i++)
+ pins[i] = ppins[i].pin;
+
+ pctl->groups = devm_kmalloc_array(pctl->dev, pctl->ngroups,
+ sizeof(*pctl->groups), GFP_KERNEL);
+ if (!pctl->groups)
+ return -ENOMEM;
+
+ for (i = 0; i < npins; i++) {
+ group = pctl->groups + i;
+ group->name = ppins[i].pin.name;
+ group->pin = ppins[i].pin.number;
+ }
+
+ ret = pxa2xx_build_functions(pctl);
+ if (ret)
+ return ret;
+
+ ret = pxa2xx_build_groups(pctl);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+int pxa2xx_pinctrl_init(struct platform_device *pdev,
+ const struct pxa_desc_pin *ppins, int npins,
+ void __iomem *base_gafr[], void __iomem *base_gpdr[],
+ void __iomem *base_pgsr[])
+{
+ struct pxa_pinctrl *pctl;
+ int ret, i, maxpin = 0;
+
+ for (i = 0; i < npins; i++)
+ maxpin = max_t(int, ppins[i].pin.number, maxpin);
+
+ pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL);
+ if (!pctl)
+ return -ENOMEM;
+ pctl->base_gafr = devm_kcalloc(&pdev->dev, roundup(maxpin, 16),
+ sizeof(*pctl->base_gafr), GFP_KERNEL);
+ pctl->base_gpdr = devm_kcalloc(&pdev->dev, roundup(maxpin, 32),
+ sizeof(*pctl->base_gpdr), GFP_KERNEL);
+ pctl->base_pgsr = devm_kcalloc(&pdev->dev, roundup(maxpin, 32),
+ sizeof(*pctl->base_pgsr), GFP_KERNEL);
+ if (!pctl->base_gafr || !pctl->base_gpdr || !pctl->base_pgsr)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, pctl);
+ spin_lock_init(&pctl->lock);
+
+ pctl->dev = &pdev->dev;
+ pctl->desc = pxa2xx_pinctrl_desc;
+ pctl->desc.name = dev_name(&pdev->dev);
+ pctl->desc.owner = THIS_MODULE;
+
+ for (i = 0; i < roundup(maxpin, 16); i += 16)
+ pctl->base_gafr[i / 16] = base_gafr[i / 16];
+ for (i = 0; i < roundup(maxpin, 32); i += 32) {
+ pctl->base_gpdr[i / 32] = base_gpdr[i / 32];
+ pctl->base_pgsr[i / 32] = base_pgsr[i / 32];
+ }
+
+ ret = pxa2xx_build_state(pctl, ppins, npins);
+ if (ret)
+ return ret;
+
+ pctl->pctl_dev = pinctrl_register(&pctl->desc, &pdev->dev, pctl);
+ if (IS_ERR(pctl->pctl_dev)) {
+ dev_err(&pdev->dev, "couldn't register pinctrl driver\n");
+ return PTR_ERR(pctl->pctl_dev);
+ }
+
+ dev_info(&pdev->dev, "initialized pxa2xx pinctrl driver\n");
+
+ return 0;
+}
+
+int pxa2xx_pinctrl_exit(struct platform_device *pdev)
+{
+ struct pxa_pinctrl *pctl = platform_get_drvdata(pdev);
+
+ pinctrl_unregister(pctl->pctl_dev);
+ return 0;
+}
diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.h b/drivers/pinctrl/pxa/pinctrl-pxa2xx.h
new file mode 100644
index 000000000000..8be1e0b79751
--- /dev/null
+++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.h
@@ -0,0 +1,92 @@
+/*
+ * Marvell PXA2xx family pin control
+ *
+ * Copyright (C) 2015 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ */
+
+#ifndef __PINCTRL_PXA_H
+#define __PINCTRL_PXA_H
+
+#define PXA_FUNCTION(_dir, _af, _name) \
+ { \
+ .name = _name, \
+ .muxval = (_dir | (_af << 1)), \
+ }
+
+#define PXA_PIN(_pin, funcs...) \
+ { \
+ .pin = _pin, \
+ .functions = (struct pxa_desc_function[]){ \
+ funcs, { } }, \
+ }
+
+#define PXA_GPIO_PIN(_pin, funcs...) \
+ { \
+ .pin = _pin, \
+ .functions = (struct pxa_desc_function[]){ \
+ PXA_FUNCTION(0, 0, "gpio_in"), \
+ PXA_FUNCTION(1, 0, "gpio_out"), \
+ funcs, { } }, \
+ }
+
+#define PXA_GPIO_ONLY_PIN(_pin) \
+ { \
+ .pin = _pin, \
+ .functions = (struct pxa_desc_function[]){ \
+ PXA_FUNCTION(0, 0, "gpio_in"), \
+ PXA_FUNCTION(1, 0, "gpio_out"), \
+ { } }, \
+ }
+
+#define PXA_PINCTRL_PIN(pin) \
+ PINCTRL_PIN(pin, "P" #pin)
+
+struct pxa_desc_function {
+ const char *name;
+ u8 muxval;
+};
+
+struct pxa_desc_pin {
+ struct pinctrl_pin_desc pin;
+ struct pxa_desc_function *functions;
+};
+
+struct pxa_pinctrl_group {
+ const char *name;
+ unsigned pin;
+};
+
+struct pxa_pinctrl_function {
+ const char *name;
+ const char **groups;
+ unsigned ngroups;
+};
+
+struct pxa_pinctrl {
+ spinlock_t lock;
+ void __iomem **base_gafr;
+ void __iomem **base_gpdr;
+ void __iomem **base_pgsr;
+ struct device *dev;
+ struct pinctrl_desc desc;
+ struct pinctrl_dev *pctl_dev;
+ unsigned npins;
+ const struct pxa_desc_pin *ppins;
+ unsigned ngroups;
+ struct pxa_pinctrl_group *groups;
+ unsigned nfuncs;
+ struct pxa_pinctrl_function *functions;
+ char *name;
+};
+
+int pxa2xx_pinctrl_init(struct platform_device *pdev,
+ const struct pxa_desc_pin *ppins, int npins,
+ void __iomem *base_gafr[], void __iomem *base_gpdr[],
+ void __iomem *base_gpsr[]);
+
+#endif /* __PINCTRL_PXA_H */
diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index 383263a92e59..eeac8cba8a21 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -63,6 +63,14 @@ config PINCTRL_MSM8916
This is the pinctrl, pinmux, pinconf and gpiolib driver for the
Qualcomm TLMM block found on the Qualcomm 8916 platform.
+config PINCTRL_MSM8996
+ tristate "Qualcomm MSM8996 pin controller driver"
+ depends on GPIOLIB && OF
+ select PINCTRL_MSM
+ help
+ This is the pinctrl, pinmux, pinconf and gpiolib driver for the
+ Qualcomm TLMM block found in the Qualcomm MSM8996 platform.
+
config PINCTRL_QDF2XXX
tristate "Qualcomm Technologies QDF2xxx pin controller driver"
depends on GPIOLIB && ACPI
diff --git a/drivers/pinctrl/qcom/Makefile b/drivers/pinctrl/qcom/Makefile
index 13b190e72c21..dfb50a9fe04a 100644
--- a/drivers/pinctrl/qcom/Makefile
+++ b/drivers/pinctrl/qcom/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_PINCTRL_MSM8660) += pinctrl-msm8660.o
obj-$(CONFIG_PINCTRL_MSM8960) += pinctrl-msm8960.o
obj-$(CONFIG_PINCTRL_MSM8X74) += pinctrl-msm8x74.o
obj-$(CONFIG_PINCTRL_MSM8916) += pinctrl-msm8916.o
+obj-$(CONFIG_PINCTRL_MSM8996) += pinctrl-msm8996.o
obj-$(CONFIG_PINCTRL_QDF2XXX) += pinctrl-qdf2xxx.o
obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-gpio.o
obj-$(CONFIG_PINCTRL_QCOM_SPMI_PMIC) += pinctrl-spmi-mpp.o
diff --git a/drivers/pinctrl/qcom/pinctrl-msm8996.c b/drivers/pinctrl/qcom/pinctrl-msm8996.c
new file mode 100644
index 000000000000..c257927bea05
--- /dev/null
+++ b/drivers/pinctrl/qcom/pinctrl-msm8996.c
@@ -0,0 +1,1942 @@
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-msm.h"
+
+#define FUNCTION(fname) \
+ [msm_mux_##fname] = { \
+ .name = #fname, \
+ .groups = fname##_groups, \
+ .ngroups = ARRAY_SIZE(fname##_groups), \
+ }
+
+#define REG_BASE 0x0
+#define REG_SIZE 0x1000
+#define PINGROUP(id, f1, f2, f3, f4, f5, f6, f7, f8, f9) \
+ { \
+ .name = "gpio" #id, \
+ .pins = gpio##id##_pins, \
+ .npins = (unsigned)ARRAY_SIZE(gpio##id##_pins), \
+ .funcs = (int[]){ \
+ msm_mux_gpio, /* gpio mode */ \
+ msm_mux_##f1, \
+ msm_mux_##f2, \
+ msm_mux_##f3, \
+ msm_mux_##f4, \
+ msm_mux_##f5, \
+ msm_mux_##f6, \
+ msm_mux_##f7, \
+ msm_mux_##f8, \
+ msm_mux_##f9 \
+ }, \
+ .nfuncs = 10, \
+ .ctl_reg = REG_BASE + REG_SIZE * id, \
+ .io_reg = REG_BASE + 0x4 + REG_SIZE * id, \
+ .intr_cfg_reg = REG_BASE + 0x8 + REG_SIZE * id, \
+ .intr_status_reg = REG_BASE + 0xc + REG_SIZE * id, \
+ .intr_target_reg = REG_BASE + 0x8 + REG_SIZE * id, \
+ .mux_bit = 2, \
+ .pull_bit = 0, \
+ .drv_bit = 6, \
+ .oe_bit = 9, \
+ .in_bit = 0, \
+ .out_bit = 1, \
+ .intr_enable_bit = 0, \
+ .intr_status_bit = 0, \
+ .intr_target_bit = 5, \
+ .intr_target_kpss_val = 3, \
+ .intr_raw_status_bit = 4, \
+ .intr_polarity_bit = 1, \
+ .intr_detection_bit = 2, \
+ .intr_detection_width = 2, \
+ }
+
+#define SDC_QDSD_PINGROUP(pg_name, ctl, pull, drv) \
+ { \
+ .name = #pg_name, \
+ .pins = pg_name##_pins, \
+ .npins = (unsigned)ARRAY_SIZE(pg_name##_pins), \
+ .ctl_reg = ctl, \
+ .io_reg = 0, \
+ .intr_cfg_reg = 0, \
+ .intr_status_reg = 0, \
+ .intr_target_reg = 0, \
+ .mux_bit = -1, \
+ .pull_bit = pull, \
+ .drv_bit = drv, \
+ .oe_bit = -1, \
+ .in_bit = -1, \
+ .out_bit = -1, \
+ .intr_enable_bit = -1, \
+ .intr_status_bit = -1, \
+ .intr_target_bit = -1, \
+ .intr_raw_status_bit = -1, \
+ .intr_polarity_bit = -1, \
+ .intr_detection_bit = -1, \
+ .intr_detection_width = -1, \
+ }
+static const struct pinctrl_pin_desc msm8996_pins[] = {
+ PINCTRL_PIN(0, "GPIO_0"),
+ PINCTRL_PIN(1, "GPIO_1"),
+ PINCTRL_PIN(2, "GPIO_2"),
+ PINCTRL_PIN(3, "GPIO_3"),
+ PINCTRL_PIN(4, "GPIO_4"),
+ PINCTRL_PIN(5, "GPIO_5"),
+ PINCTRL_PIN(6, "GPIO_6"),
+ PINCTRL_PIN(7, "GPIO_7"),
+ PINCTRL_PIN(8, "GPIO_8"),
+ PINCTRL_PIN(9, "GPIO_9"),
+ PINCTRL_PIN(10, "GPIO_10"),
+ PINCTRL_PIN(11, "GPIO_11"),
+ PINCTRL_PIN(12, "GPIO_12"),
+ PINCTRL_PIN(13, "GPIO_13"),
+ PINCTRL_PIN(14, "GPIO_14"),
+ PINCTRL_PIN(15, "GPIO_15"),
+ PINCTRL_PIN(16, "GPIO_16"),
+ PINCTRL_PIN(17, "GPIO_17"),
+ PINCTRL_PIN(18, "GPIO_18"),
+ PINCTRL_PIN(19, "GPIO_19"),
+ PINCTRL_PIN(20, "GPIO_20"),
+ PINCTRL_PIN(21, "GPIO_21"),
+ PINCTRL_PIN(22, "GPIO_22"),
+ PINCTRL_PIN(23, "GPIO_23"),
+ PINCTRL_PIN(24, "GPIO_24"),
+ PINCTRL_PIN(25, "GPIO_25"),
+ PINCTRL_PIN(26, "GPIO_26"),
+ PINCTRL_PIN(27, "GPIO_27"),
+ PINCTRL_PIN(28, "GPIO_28"),
+ PINCTRL_PIN(29, "GPIO_29"),
+ PINCTRL_PIN(30, "GPIO_30"),
+ PINCTRL_PIN(31, "GPIO_31"),
+ PINCTRL_PIN(32, "GPIO_32"),
+ PINCTRL_PIN(33, "GPIO_33"),
+ PINCTRL_PIN(34, "GPIO_34"),
+ PINCTRL_PIN(35, "GPIO_35"),
+ PINCTRL_PIN(36, "GPIO_36"),
+ PINCTRL_PIN(37, "GPIO_37"),
+ PINCTRL_PIN(38, "GPIO_38"),
+ PINCTRL_PIN(39, "GPIO_39"),
+ PINCTRL_PIN(40, "GPIO_40"),
+ PINCTRL_PIN(41, "GPIO_41"),
+ PINCTRL_PIN(42, "GPIO_42"),
+ PINCTRL_PIN(43, "GPIO_43"),
+ PINCTRL_PIN(44, "GPIO_44"),
+ PINCTRL_PIN(45, "GPIO_45"),
+ PINCTRL_PIN(46, "GPIO_46"),
+ PINCTRL_PIN(47, "GPIO_47"),
+ PINCTRL_PIN(48, "GPIO_48"),
+ PINCTRL_PIN(49, "GPIO_49"),
+ PINCTRL_PIN(50, "GPIO_50"),
+ PINCTRL_PIN(51, "GPIO_51"),
+ PINCTRL_PIN(52, "GPIO_52"),
+ PINCTRL_PIN(53, "GPIO_53"),
+ PINCTRL_PIN(54, "GPIO_54"),
+ PINCTRL_PIN(55, "GPIO_55"),
+ PINCTRL_PIN(56, "GPIO_56"),
+ PINCTRL_PIN(57, "GPIO_57"),
+ PINCTRL_PIN(58, "GPIO_58"),
+ PINCTRL_PIN(59, "GPIO_59"),
+ PINCTRL_PIN(60, "GPIO_60"),
+ PINCTRL_PIN(61, "GPIO_61"),
+ PINCTRL_PIN(62, "GPIO_62"),
+ PINCTRL_PIN(63, "GPIO_63"),
+ PINCTRL_PIN(64, "GPIO_64"),
+ PINCTRL_PIN(65, "GPIO_65"),
+ PINCTRL_PIN(66, "GPIO_66"),
+ PINCTRL_PIN(67, "GPIO_67"),
+ PINCTRL_PIN(68, "GPIO_68"),
+ PINCTRL_PIN(69, "GPIO_69"),
+ PINCTRL_PIN(70, "GPIO_70"),
+ PINCTRL_PIN(71, "GPIO_71"),
+ PINCTRL_PIN(72, "GPIO_72"),
+ PINCTRL_PIN(73, "GPIO_73"),
+ PINCTRL_PIN(74, "GPIO_74"),
+ PINCTRL_PIN(75, "GPIO_75"),
+ PINCTRL_PIN(76, "GPIO_76"),
+ PINCTRL_PIN(77, "GPIO_77"),
+ PINCTRL_PIN(78, "GPIO_78"),
+ PINCTRL_PIN(79, "GPIO_79"),
+ PINCTRL_PIN(80, "GPIO_80"),
+ PINCTRL_PIN(81, "GPIO_81"),
+ PINCTRL_PIN(82, "GPIO_82"),
+ PINCTRL_PIN(83, "GPIO_83"),
+ PINCTRL_PIN(84, "GPIO_84"),
+ PINCTRL_PIN(85, "GPIO_85"),
+ PINCTRL_PIN(86, "GPIO_86"),
+ PINCTRL_PIN(87, "GPIO_87"),
+ PINCTRL_PIN(88, "GPIO_88"),
+ PINCTRL_PIN(89, "GPIO_89"),
+ PINCTRL_PIN(90, "GPIO_90"),
+ PINCTRL_PIN(91, "GPIO_91"),
+ PINCTRL_PIN(92, "GPIO_92"),
+ PINCTRL_PIN(93, "GPIO_93"),
+ PINCTRL_PIN(94, "GPIO_94"),
+ PINCTRL_PIN(95, "GPIO_95"),
+ PINCTRL_PIN(96, "GPIO_96"),
+ PINCTRL_PIN(97, "GPIO_97"),
+ PINCTRL_PIN(98, "GPIO_98"),
+ PINCTRL_PIN(99, "GPIO_99"),
+ PINCTRL_PIN(100, "GPIO_100"),
+ PINCTRL_PIN(101, "GPIO_101"),
+ PINCTRL_PIN(102, "GPIO_102"),
+ PINCTRL_PIN(103, "GPIO_103"),
+ PINCTRL_PIN(104, "GPIO_104"),
+ PINCTRL_PIN(105, "GPIO_105"),
+ PINCTRL_PIN(106, "GPIO_106"),
+ PINCTRL_PIN(107, "GPIO_107"),
+ PINCTRL_PIN(108, "GPIO_108"),
+ PINCTRL_PIN(109, "GPIO_109"),
+ PINCTRL_PIN(110, "GPIO_110"),
+ PINCTRL_PIN(111, "GPIO_111"),
+ PINCTRL_PIN(112, "GPIO_112"),
+ PINCTRL_PIN(113, "GPIO_113"),
+ PINCTRL_PIN(114, "GPIO_114"),
+ PINCTRL_PIN(115, "GPIO_115"),
+ PINCTRL_PIN(116, "GPIO_116"),
+ PINCTRL_PIN(117, "GPIO_117"),
+ PINCTRL_PIN(118, "GPIO_118"),
+ PINCTRL_PIN(119, "GPIO_119"),
+ PINCTRL_PIN(120, "GPIO_120"),
+ PINCTRL_PIN(121, "GPIO_121"),
+ PINCTRL_PIN(122, "GPIO_122"),
+ PINCTRL_PIN(123, "GPIO_123"),
+ PINCTRL_PIN(124, "GPIO_124"),
+ PINCTRL_PIN(125, "GPIO_125"),
+ PINCTRL_PIN(126, "GPIO_126"),
+ PINCTRL_PIN(127, "GPIO_127"),
+ PINCTRL_PIN(128, "GPIO_128"),
+ PINCTRL_PIN(129, "GPIO_129"),
+ PINCTRL_PIN(130, "GPIO_130"),
+ PINCTRL_PIN(131, "GPIO_131"),
+ PINCTRL_PIN(132, "GPIO_132"),
+ PINCTRL_PIN(133, "GPIO_133"),
+ PINCTRL_PIN(134, "GPIO_134"),
+ PINCTRL_PIN(135, "GPIO_135"),
+ PINCTRL_PIN(136, "GPIO_136"),
+ PINCTRL_PIN(137, "GPIO_137"),
+ PINCTRL_PIN(138, "GPIO_138"),
+ PINCTRL_PIN(139, "GPIO_139"),
+ PINCTRL_PIN(140, "GPIO_140"),
+ PINCTRL_PIN(141, "GPIO_141"),
+ PINCTRL_PIN(142, "GPIO_142"),
+ PINCTRL_PIN(143, "GPIO_143"),
+ PINCTRL_PIN(144, "GPIO_144"),
+ PINCTRL_PIN(145, "GPIO_145"),
+ PINCTRL_PIN(146, "GPIO_146"),
+ PINCTRL_PIN(147, "GPIO_147"),
+ PINCTRL_PIN(148, "GPIO_148"),
+ PINCTRL_PIN(149, "GPIO_149"),
+ PINCTRL_PIN(150, "SDC1_CLK"),
+ PINCTRL_PIN(151, "SDC1_CMD"),
+ PINCTRL_PIN(152, "SDC1_DATA"),
+ PINCTRL_PIN(153, "SDC2_CLK"),
+ PINCTRL_PIN(154, "SDC2_CMD"),
+ PINCTRL_PIN(155, "SDC2_DATA"),
+ PINCTRL_PIN(156, "SDC1_RCLK"),
+};
+
+#define DECLARE_MSM_GPIO_PINS(pin) \
+ static const unsigned int gpio##pin##_pins[] = { pin }
+DECLARE_MSM_GPIO_PINS(0);
+DECLARE_MSM_GPIO_PINS(1);
+DECLARE_MSM_GPIO_PINS(2);
+DECLARE_MSM_GPIO_PINS(3);
+DECLARE_MSM_GPIO_PINS(4);
+DECLARE_MSM_GPIO_PINS(5);
+DECLARE_MSM_GPIO_PINS(6);
+DECLARE_MSM_GPIO_PINS(7);
+DECLARE_MSM_GPIO_PINS(8);
+DECLARE_MSM_GPIO_PINS(9);
+DECLARE_MSM_GPIO_PINS(10);
+DECLARE_MSM_GPIO_PINS(11);
+DECLARE_MSM_GPIO_PINS(12);
+DECLARE_MSM_GPIO_PINS(13);
+DECLARE_MSM_GPIO_PINS(14);
+DECLARE_MSM_GPIO_PINS(15);
+DECLARE_MSM_GPIO_PINS(16);
+DECLARE_MSM_GPIO_PINS(17);
+DECLARE_MSM_GPIO_PINS(18);
+DECLARE_MSM_GPIO_PINS(19);
+DECLARE_MSM_GPIO_PINS(20);
+DECLARE_MSM_GPIO_PINS(21);
+DECLARE_MSM_GPIO_PINS(22);
+DECLARE_MSM_GPIO_PINS(23);
+DECLARE_MSM_GPIO_PINS(24);
+DECLARE_MSM_GPIO_PINS(25);
+DECLARE_MSM_GPIO_PINS(26);
+DECLARE_MSM_GPIO_PINS(27);
+DECLARE_MSM_GPIO_PINS(28);
+DECLARE_MSM_GPIO_PINS(29);
+DECLARE_MSM_GPIO_PINS(30);
+DECLARE_MSM_GPIO_PINS(31);
+DECLARE_MSM_GPIO_PINS(32);
+DECLARE_MSM_GPIO_PINS(33);
+DECLARE_MSM_GPIO_PINS(34);
+DECLARE_MSM_GPIO_PINS(35);
+DECLARE_MSM_GPIO_PINS(36);
+DECLARE_MSM_GPIO_PINS(37);
+DECLARE_MSM_GPIO_PINS(38);
+DECLARE_MSM_GPIO_PINS(39);
+DECLARE_MSM_GPIO_PINS(40);
+DECLARE_MSM_GPIO_PINS(41);
+DECLARE_MSM_GPIO_PINS(42);
+DECLARE_MSM_GPIO_PINS(43);
+DECLARE_MSM_GPIO_PINS(44);
+DECLARE_MSM_GPIO_PINS(45);
+DECLARE_MSM_GPIO_PINS(46);
+DECLARE_MSM_GPIO_PINS(47);
+DECLARE_MSM_GPIO_PINS(48);
+DECLARE_MSM_GPIO_PINS(49);
+DECLARE_MSM_GPIO_PINS(50);
+DECLARE_MSM_GPIO_PINS(51);
+DECLARE_MSM_GPIO_PINS(52);
+DECLARE_MSM_GPIO_PINS(53);
+DECLARE_MSM_GPIO_PINS(54);
+DECLARE_MSM_GPIO_PINS(55);
+DECLARE_MSM_GPIO_PINS(56);
+DECLARE_MSM_GPIO_PINS(57);
+DECLARE_MSM_GPIO_PINS(58);
+DECLARE_MSM_GPIO_PINS(59);
+DECLARE_MSM_GPIO_PINS(60);
+DECLARE_MSM_GPIO_PINS(61);
+DECLARE_MSM_GPIO_PINS(62);
+DECLARE_MSM_GPIO_PINS(63);
+DECLARE_MSM_GPIO_PINS(64);
+DECLARE_MSM_GPIO_PINS(65);
+DECLARE_MSM_GPIO_PINS(66);
+DECLARE_MSM_GPIO_PINS(67);
+DECLARE_MSM_GPIO_PINS(68);
+DECLARE_MSM_GPIO_PINS(69);
+DECLARE_MSM_GPIO_PINS(70);
+DECLARE_MSM_GPIO_PINS(71);
+DECLARE_MSM_GPIO_PINS(72);
+DECLARE_MSM_GPIO_PINS(73);
+DECLARE_MSM_GPIO_PINS(74);
+DECLARE_MSM_GPIO_PINS(75);
+DECLARE_MSM_GPIO_PINS(76);
+DECLARE_MSM_GPIO_PINS(77);
+DECLARE_MSM_GPIO_PINS(78);
+DECLARE_MSM_GPIO_PINS(79);
+DECLARE_MSM_GPIO_PINS(80);
+DECLARE_MSM_GPIO_PINS(81);
+DECLARE_MSM_GPIO_PINS(82);
+DECLARE_MSM_GPIO_PINS(83);
+DECLARE_MSM_GPIO_PINS(84);
+DECLARE_MSM_GPIO_PINS(85);
+DECLARE_MSM_GPIO_PINS(86);
+DECLARE_MSM_GPIO_PINS(87);
+DECLARE_MSM_GPIO_PINS(88);
+DECLARE_MSM_GPIO_PINS(89);
+DECLARE_MSM_GPIO_PINS(90);
+DECLARE_MSM_GPIO_PINS(91);
+DECLARE_MSM_GPIO_PINS(92);
+DECLARE_MSM_GPIO_PINS(93);
+DECLARE_MSM_GPIO_PINS(94);
+DECLARE_MSM_GPIO_PINS(95);
+DECLARE_MSM_GPIO_PINS(96);
+DECLARE_MSM_GPIO_PINS(97);
+DECLARE_MSM_GPIO_PINS(98);
+DECLARE_MSM_GPIO_PINS(99);
+DECLARE_MSM_GPIO_PINS(100);
+DECLARE_MSM_GPIO_PINS(101);
+DECLARE_MSM_GPIO_PINS(102);
+DECLARE_MSM_GPIO_PINS(103);
+DECLARE_MSM_GPIO_PINS(104);
+DECLARE_MSM_GPIO_PINS(105);
+DECLARE_MSM_GPIO_PINS(106);
+DECLARE_MSM_GPIO_PINS(107);
+DECLARE_MSM_GPIO_PINS(108);
+DECLARE_MSM_GPIO_PINS(109);
+DECLARE_MSM_GPIO_PINS(110);
+DECLARE_MSM_GPIO_PINS(111);
+DECLARE_MSM_GPIO_PINS(112);
+DECLARE_MSM_GPIO_PINS(113);
+DECLARE_MSM_GPIO_PINS(114);
+DECLARE_MSM_GPIO_PINS(115);
+DECLARE_MSM_GPIO_PINS(116);
+DECLARE_MSM_GPIO_PINS(117);
+DECLARE_MSM_GPIO_PINS(118);
+DECLARE_MSM_GPIO_PINS(119);
+DECLARE_MSM_GPIO_PINS(120);
+DECLARE_MSM_GPIO_PINS(121);
+DECLARE_MSM_GPIO_PINS(122);
+DECLARE_MSM_GPIO_PINS(123);
+DECLARE_MSM_GPIO_PINS(124);
+DECLARE_MSM_GPIO_PINS(125);
+DECLARE_MSM_GPIO_PINS(126);
+DECLARE_MSM_GPIO_PINS(127);
+DECLARE_MSM_GPIO_PINS(128);
+DECLARE_MSM_GPIO_PINS(129);
+DECLARE_MSM_GPIO_PINS(130);
+DECLARE_MSM_GPIO_PINS(131);
+DECLARE_MSM_GPIO_PINS(132);
+DECLARE_MSM_GPIO_PINS(133);
+DECLARE_MSM_GPIO_PINS(134);
+DECLARE_MSM_GPIO_PINS(135);
+DECLARE_MSM_GPIO_PINS(136);
+DECLARE_MSM_GPIO_PINS(137);
+DECLARE_MSM_GPIO_PINS(138);
+DECLARE_MSM_GPIO_PINS(139);
+DECLARE_MSM_GPIO_PINS(140);
+DECLARE_MSM_GPIO_PINS(141);
+DECLARE_MSM_GPIO_PINS(142);
+DECLARE_MSM_GPIO_PINS(143);
+DECLARE_MSM_GPIO_PINS(144);
+DECLARE_MSM_GPIO_PINS(145);
+DECLARE_MSM_GPIO_PINS(146);
+DECLARE_MSM_GPIO_PINS(147);
+DECLARE_MSM_GPIO_PINS(148);
+DECLARE_MSM_GPIO_PINS(149);
+
+static const unsigned int sdc1_clk_pins[] = { 150 };
+static const unsigned int sdc1_cmd_pins[] = { 151 };
+static const unsigned int sdc1_data_pins[] = { 152 };
+static const unsigned int sdc2_clk_pins[] = { 153 };
+static const unsigned int sdc2_cmd_pins[] = { 154 };
+static const unsigned int sdc2_data_pins[] = { 155 };
+static const unsigned int sdc1_rclk_pins[] = { 156 };
+
+enum msm8996_functions {
+ msm_mux_adsp_ext,
+ msm_mux_atest_bbrx0,
+ msm_mux_atest_bbrx1,
+ msm_mux_atest_char,
+ msm_mux_atest_char0,
+ msm_mux_atest_char1,
+ msm_mux_atest_char2,
+ msm_mux_atest_char3,
+ msm_mux_atest_gpsadc0,
+ msm_mux_atest_gpsadc1,
+ msm_mux_atest_tsens,
+ msm_mux_atest_tsens2,
+ msm_mux_atest_usb1,
+ msm_mux_atest_usb10,
+ msm_mux_atest_usb11,
+ msm_mux_atest_usb12,
+ msm_mux_atest_usb13,
+ msm_mux_atest_usb2,
+ msm_mux_atest_usb20,
+ msm_mux_atest_usb21,
+ msm_mux_atest_usb22,
+ msm_mux_atest_usb23,
+ msm_mux_audio_ref,
+ msm_mux_bimc_dte0,
+ msm_mux_bimc_dte1,
+ msm_mux_blsp10_spi,
+ msm_mux_blsp11_i2c_scl_b,
+ msm_mux_blsp11_i2c_sda_b,
+ msm_mux_blsp11_uart_rx_b,
+ msm_mux_blsp11_uart_tx_b,
+ msm_mux_blsp1_spi,
+ msm_mux_blsp2_spi,
+ msm_mux_blsp_i2c1,
+ msm_mux_blsp_i2c10,
+ msm_mux_blsp_i2c11,
+ msm_mux_blsp_i2c12,
+ msm_mux_blsp_i2c2,
+ msm_mux_blsp_i2c3,
+ msm_mux_blsp_i2c4,
+ msm_mux_blsp_i2c5,
+ msm_mux_blsp_i2c6,
+ msm_mux_blsp_i2c7,
+ msm_mux_blsp_i2c8,
+ msm_mux_blsp_i2c9,
+ msm_mux_blsp_spi1,
+ msm_mux_blsp_spi10,
+ msm_mux_blsp_spi11,
+ msm_mux_blsp_spi12,
+ msm_mux_blsp_spi2,
+ msm_mux_blsp_spi3,
+ msm_mux_blsp_spi4,
+ msm_mux_blsp_spi5,
+ msm_mux_blsp_spi6,
+ msm_mux_blsp_spi7,
+ msm_mux_blsp_spi8,
+ msm_mux_blsp_spi9,
+ msm_mux_blsp_uart1,
+ msm_mux_blsp_uart10,
+ msm_mux_blsp_uart11,
+ msm_mux_blsp_uart12,
+ msm_mux_blsp_uart2,
+ msm_mux_blsp_uart3,
+ msm_mux_blsp_uart4,
+ msm_mux_blsp_uart5,
+ msm_mux_blsp_uart6,
+ msm_mux_blsp_uart7,
+ msm_mux_blsp_uart8,
+ msm_mux_blsp_uart9,
+ msm_mux_blsp_uim1,
+ msm_mux_blsp_uim10,
+ msm_mux_blsp_uim11,
+ msm_mux_blsp_uim12,
+ msm_mux_blsp_uim2,
+ msm_mux_blsp_uim3,
+ msm_mux_blsp_uim4,
+ msm_mux_blsp_uim5,
+ msm_mux_blsp_uim6,
+ msm_mux_blsp_uim7,
+ msm_mux_blsp_uim8,
+ msm_mux_blsp_uim9,
+ msm_mux_btfm_slimbus,
+ msm_mux_cam_mclk,
+ msm_mux_cci_async,
+ msm_mux_cci_i2c,
+ msm_mux_cci_timer0,
+ msm_mux_cci_timer1,
+ msm_mux_cci_timer2,
+ msm_mux_cci_timer3,
+ msm_mux_cci_timer4,
+ msm_mux_cri_trng,
+ msm_mux_cri_trng0,
+ msm_mux_cri_trng1,
+ msm_mux_dac_calib0,
+ msm_mux_dac_calib1,
+ msm_mux_dac_calib10,
+ msm_mux_dac_calib11,
+ msm_mux_dac_calib12,
+ msm_mux_dac_calib13,
+ msm_mux_dac_calib14,
+ msm_mux_dac_calib15,
+ msm_mux_dac_calib16,
+ msm_mux_dac_calib17,
+ msm_mux_dac_calib18,
+ msm_mux_dac_calib19,
+ msm_mux_dac_calib2,
+ msm_mux_dac_calib20,
+ msm_mux_dac_calib21,
+ msm_mux_dac_calib22,
+ msm_mux_dac_calib23,
+ msm_mux_dac_calib24,
+ msm_mux_dac_calib25,
+ msm_mux_dac_calib26,
+ msm_mux_dac_calib3,
+ msm_mux_dac_calib4,
+ msm_mux_dac_calib5,
+ msm_mux_dac_calib6,
+ msm_mux_dac_calib7,
+ msm_mux_dac_calib8,
+ msm_mux_dac_calib9,
+ msm_mux_dac_gpio,
+ msm_mux_dbg_out,
+ msm_mux_ddr_bist,
+ msm_mux_edp_hot,
+ msm_mux_edp_lcd,
+ msm_mux_gcc_gp1_clk_a,
+ msm_mux_gcc_gp1_clk_b,
+ msm_mux_gcc_gp2_clk_a,
+ msm_mux_gcc_gp2_clk_b,
+ msm_mux_gcc_gp3_clk_a,
+ msm_mux_gcc_gp3_clk_b,
+ msm_mux_gsm_tx,
+ msm_mux_hdmi_cec,
+ msm_mux_hdmi_ddc,
+ msm_mux_hdmi_hot,
+ msm_mux_hdmi_rcv,
+ msm_mux_isense_dbg,
+ msm_mux_ldo_en,
+ msm_mux_ldo_update,
+ msm_mux_lpass_slimbus,
+ msm_mux_m_voc,
+ msm_mux_mdp_vsync,
+ msm_mux_mdp_vsync_p_b,
+ msm_mux_mdp_vsync_s_b,
+ msm_mux_modem_tsync,
+ msm_mux_mss_lte,
+ msm_mux_nav_dr,
+ msm_mux_nav_pps,
+ msm_mux_pa_indicator,
+ msm_mux_pci_e0,
+ msm_mux_pci_e1,
+ msm_mux_pci_e2,
+ msm_mux_pll_bypassnl,
+ msm_mux_pll_reset,
+ msm_mux_pri_mi2s,
+ msm_mux_prng_rosc,
+ msm_mux_pwr_crypto,
+ msm_mux_pwr_modem,
+ msm_mux_pwr_nav,
+ msm_mux_qdss_cti,
+ msm_mux_qdss_cti_trig_in_a,
+ msm_mux_qdss_cti_trig_in_b,
+ msm_mux_qdss_cti_trig_out_a,
+ msm_mux_qdss_cti_trig_out_b,
+ msm_mux_qdss_stm0,
+ msm_mux_qdss_stm1,
+ msm_mux_qdss_stm10,
+ msm_mux_qdss_stm11,
+ msm_mux_qdss_stm12,
+ msm_mux_qdss_stm13,
+ msm_mux_qdss_stm14,
+ msm_mux_qdss_stm15,
+ msm_mux_qdss_stm16,
+ msm_mux_qdss_stm17,
+ msm_mux_qdss_stm18,
+ msm_mux_qdss_stm19,
+ msm_mux_qdss_stm2,
+ msm_mux_qdss_stm20,
+ msm_mux_qdss_stm21,
+ msm_mux_qdss_stm22,
+ msm_mux_qdss_stm23,
+ msm_mux_qdss_stm24,
+ msm_mux_qdss_stm25,
+ msm_mux_qdss_stm26,
+ msm_mux_qdss_stm27,
+ msm_mux_qdss_stm28,
+ msm_mux_qdss_stm29,
+ msm_mux_qdss_stm3,
+ msm_mux_qdss_stm30,
+ msm_mux_qdss_stm31,
+ msm_mux_qdss_stm4,
+ msm_mux_qdss_stm5,
+ msm_mux_qdss_stm6,
+ msm_mux_qdss_stm7,
+ msm_mux_qdss_stm8,
+ msm_mux_qdss_stm9,
+ msm_mux_qdss_traceclk_a,
+ msm_mux_qdss_traceclk_b,
+ msm_mux_qdss_tracectl_a,
+ msm_mux_qdss_tracectl_b,
+ msm_mux_qdss_tracedata_11,
+ msm_mux_qdss_tracedata_12,
+ msm_mux_qdss_tracedata_a,
+ msm_mux_qdss_tracedata_b,
+ msm_mux_qspi0,
+ msm_mux_qspi1,
+ msm_mux_qspi2,
+ msm_mux_qspi3,
+ msm_mux_qspi_clk,
+ msm_mux_qspi_cs,
+ msm_mux_qua_mi2s,
+ msm_mux_sd_card,
+ msm_mux_sd_write,
+ msm_mux_sdc40,
+ msm_mux_sdc41,
+ msm_mux_sdc42,
+ msm_mux_sdc43,
+ msm_mux_sdc4_clk,
+ msm_mux_sdc4_cmd,
+ msm_mux_sec_mi2s,
+ msm_mux_spkr_i2s,
+ msm_mux_ssbi1,
+ msm_mux_ssbi2,
+ msm_mux_ssc_irq,
+ msm_mux_ter_mi2s,
+ msm_mux_tsense_pwm1,
+ msm_mux_tsense_pwm2,
+ msm_mux_tsif1_clk,
+ msm_mux_tsif1_data,
+ msm_mux_tsif1_en,
+ msm_mux_tsif1_error,
+ msm_mux_tsif1_sync,
+ msm_mux_tsif2_clk,
+ msm_mux_tsif2_data,
+ msm_mux_tsif2_en,
+ msm_mux_tsif2_error,
+ msm_mux_tsif2_sync,
+ msm_mux_uim1,
+ msm_mux_uim2,
+ msm_mux_uim3,
+ msm_mux_uim4,
+ msm_mux_uim_batt,
+ msm_mux_vfr_1,
+ msm_mux_gpio,
+ msm_mux_NA,
+};
+
+static const char * const gpio_groups[] = {
+ "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6", "gpio7",
+ "gpio8", "gpio9", "gpio10", "gpio11", "gpio12", "gpio13", "gpio14",
+ "gpio15", "gpio16", "gpio17", "gpio18", "gpio19", "gpio20", "gpio21",
+ "gpio22", "gpio23", "gpio24", "gpio25", "gpio26", "gpio27", "gpio28",
+ "gpio29", "gpio30", "gpio31", "gpio32", "gpio33", "gpio34", "gpio35",
+ "gpio36", "gpio37", "gpio38", "gpio39", "gpio40", "gpio41", "gpio42",
+ "gpio43", "gpio44", "gpio45", "gpio46", "gpio47", "gpio48", "gpio49",
+ "gpio50", "gpio51", "gpio52", "gpio53", "gpio54", "gpio55", "gpio56",
+ "gpio57", "gpio58", "gpio59", "gpio60", "gpio61", "gpio62", "gpio63",
+ "gpio64", "gpio65", "gpio66", "gpio67", "gpio68", "gpio69", "gpio70",
+ "gpio71", "gpio72", "gpio73", "gpio74", "gpio75", "gpio76", "gpio77",
+ "gpio78", "gpio79", "gpio80", "gpio81", "gpio82", "gpio83", "gpio84",
+ "gpio85", "gpio86", "gpio87", "gpio88", "gpio89", "gpio90", "gpio91",
+ "gpio92", "gpio93", "gpio94", "gpio95", "gpio96", "gpio97", "gpio98",
+ "gpio99", "gpio100", "gpio101", "gpio102", "gpio103", "gpio104",
+ "gpio105", "gpio106", "gpio107", "gpio108", "gpio109", "gpio110",
+ "gpio111", "gpio112", "gpio113", "gpio114", "gpio115", "gpio116",
+ "gpio117", "gpio118", "gpio119", "gpio120", "gpio121", "gpio122",
+ "gpio123", "gpio124", "gpio125", "gpio126", "gpio127", "gpio128",
+ "gpio129", "gpio130", "gpio131", "gpio132", "gpio133", "gpio134",
+ "gpio135", "gpio136", "gpio137", "gpio138", "gpio139", "gpio140",
+ "gpio141", "gpio142", "gpio143", "gpio144", "gpio145", "gpio146",
+ "gpio147", "gpio148", "gpio149"
+};
+
+
+static const char * const blsp_uart1_groups[] = {
+ "gpio0", "gpio1", "gpio2", "gpio3",
+};
+static const char * const blsp_spi1_groups[] = {
+ "gpio0", "gpio1", "gpio2", "gpio3",
+};
+static const char * const blsp_i2c1_groups[] = {
+ "gpio2", "gpio3",
+};
+static const char * const blsp_uim1_groups[] = {
+ "gpio0", "gpio1",
+};
+static const char * const atest_tsens_groups[] = {
+ "gpio3",
+};
+static const char * const bimc_dte1_groups[] = {
+ "gpio3", "gpio5",
+};
+static const char * const blsp_spi8_groups[] = {
+ "gpio4", "gpio5", "gpio6", "gpio7",
+};
+static const char * const blsp_uart8_groups[] = {
+ "gpio4", "gpio5", "gpio6", "gpio7",
+};
+static const char * const blsp_uim8_groups[] = {
+ "gpio4", "gpio5",
+};
+static const char * const qdss_cti_trig_out_b_groups[] = {
+ "gpio4",
+};
+static const char * const dac_calib0_groups[] = {
+ "gpio4", "gpio41",
+};
+static const char * const bimc_dte0_groups[] = {
+ "gpio4", "gpio6",
+};
+static const char * const qdss_cti_trig_in_b_groups[] = {
+ "gpio5",
+};
+static const char * const dac_calib1_groups[] = {
+ "gpio5", "gpio42",
+};
+static const char * const dac_calib2_groups[] = {
+ "gpio6", "gpio43",
+};
+static const char * const atest_tsens2_groups[] = {
+ "gpio7",
+};
+static const char * const blsp_spi10_groups[] = {
+ "gpio8", "gpio9", "gpio10", "gpio11",
+};
+static const char * const blsp_uart10_groups[] = {
+ "gpio8", "gpio9", "gpio10", "gpio11",
+};
+static const char * const blsp_uim10_groups[] = {
+ "gpio8", "gpio9",
+};
+static const char * const atest_bbrx1_groups[] = {
+ "gpio8",
+};
+static const char * const atest_usb12_groups[] = {
+ "gpio9",
+};
+static const char * const mdp_vsync_groups[] = {
+ "gpio10", "gpio11", "gpio12",
+};
+static const char * const edp_lcd_groups[] = {
+ "gpio10",
+};
+static const char * const blsp_i2c10_groups[] = {
+ "gpio10", "gpio11",
+};
+static const char * const atest_usb11_groups[] = {
+ "gpio10",
+};
+static const char * const atest_gpsadc0_groups[] = {
+ "gpio11",
+};
+static const char * const edp_hot_groups[] = {
+ "gpio11",
+};
+static const char * const atest_usb10_groups[] = {
+ "gpio11",
+};
+static const char * const m_voc_groups[] = {
+ "gpio12",
+};
+static const char * const dac_gpio_groups[] = {
+ "gpio12",
+};
+static const char * const atest_char_groups[] = {
+ "gpio12",
+};
+static const char * const cam_mclk_groups[] = {
+ "gpio13", "gpio14", "gpio15", "gpio16",
+};
+static const char * const pll_bypassnl_groups[] = {
+ "gpio13",
+};
+static const char * const qdss_stm7_groups[] = {
+ "gpio13",
+};
+static const char * const blsp_i2c8_groups[] = {
+ "gpio6", "gpio7",
+};
+static const char * const atest_usb1_groups[] = {
+ "gpio7",
+};
+static const char * const atest_usb13_groups[] = {
+ "gpio8",
+};
+static const char * const atest_bbrx0_groups[] = {
+ "gpio9",
+};
+static const char * const atest_gpsadc1_groups[] = {
+ "gpio10",
+};
+static const char * const qdss_tracedata_b_groups[] = {
+ "gpio13", "gpio14", "gpio15", "gpio16", "gpio17", "gpio18", "gpio19",
+ "gpio21", "gpio22", "gpio23", "gpio26", "gpio29", "gpio57", "gpio58",
+ "gpio92", "gpio93",
+};
+static const char * const pll_reset_groups[] = {
+ "gpio14",
+};
+static const char * const qdss_stm6_groups[] = {
+ "gpio14",
+};
+static const char * const qdss_stm5_groups[] = {
+ "gpio15",
+};
+static const char * const qdss_stm4_groups[] = {
+ "gpio16",
+};
+static const char * const atest_usb2_groups[] = {
+ "gpio16",
+};
+static const char * const dac_calib3_groups[] = {
+ "gpio17", "gpio44",
+};
+static const char * const cci_i2c_groups[] = {
+ "gpio17", "gpio18", "gpio19", "gpio20",
+};
+static const char * const qdss_stm3_groups[] = {
+ "gpio17",
+};
+static const char * const atest_usb23_groups[] = {
+ "gpio17",
+};
+static const char * const atest_char3_groups[] = {
+ "gpio17",
+};
+static const char * const dac_calib4_groups[] = {
+ "gpio18", "gpio45",
+};
+static const char * const qdss_stm2_groups[] = {
+ "gpio18",
+};
+static const char * const atest_usb22_groups[] = {
+ "gpio18",
+};
+static const char * const atest_char2_groups[] = {
+ "gpio18",
+};
+static const char * const dac_calib5_groups[] = {
+ "gpio19", "gpio46",
+};
+static const char * const qdss_stm1_groups[] = {
+ "gpio19",
+};
+static const char * const atest_usb21_groups[] = {
+ "gpio19",
+};
+static const char * const atest_char1_groups[] = {
+ "gpio19",
+};
+static const char * const dac_calib6_groups[] = {
+ "gpio20", "gpio47",
+};
+static const char * const dbg_out_groups[] = {
+ "gpio20",
+};
+static const char * const qdss_stm0_groups[] = {
+ "gpio20",
+};
+static const char * const atest_usb20_groups[] = {
+ "gpio20",
+};
+static const char * const atest_char0_groups[] = {
+ "gpio20",
+};
+static const char * const dac_calib7_groups[] = {
+ "gpio21", "gpio48",
+};
+static const char * const cci_timer0_groups[] = {
+ "gpio21",
+};
+static const char * const qdss_stm13_groups[] = {
+ "gpio21",
+};
+static const char * const dac_calib8_groups[] = {
+ "gpio22", "gpio49",
+};
+static const char * const cci_timer1_groups[] = {
+ "gpio22",
+};
+static const char * const qdss_stm12_groups[] = {
+ "gpio22",
+};
+static const char * const dac_calib9_groups[] = {
+ "gpio23", "gpio50",
+};
+static const char * const cci_timer2_groups[] = {
+ "gpio23",
+};
+static const char * const qdss_stm11_groups[] = {
+ "gpio23",
+};
+static const char * const dac_calib10_groups[] = {
+ "gpio24", "gpio51",
+};
+static const char * const cci_timer3_groups[] = {
+ "gpio24",
+};
+static const char * const cci_async_groups[] = {
+ "gpio24", "gpio25", "gpio26",
+};
+static const char * const blsp1_spi_groups[] = {
+ "gpio24", "gpio27", "gpio28", "gpio90",
+};
+static const char * const qdss_stm10_groups[] = {
+ "gpio24",
+};
+static const char * const qdss_cti_trig_in_a_groups[] = {
+ "gpio24",
+};
+static const char * const dac_calib11_groups[] = {
+ "gpio25", "gpio52",
+};
+static const char * const cci_timer4_groups[] = {
+ "gpio25",
+};
+static const char * const blsp_spi6_groups[] = {
+ "gpio25", "gpio26", "gpio27", "gpio28",
+};
+static const char * const blsp_uart6_groups[] = {
+ "gpio25", "gpio26", "gpio27", "gpio28",
+};
+static const char * const blsp_uim6_groups[] = {
+ "gpio25", "gpio26",
+};
+static const char * const blsp2_spi_groups[] = {
+ "gpio25", "gpio29", "gpio30",
+};
+static const char * const qdss_stm9_groups[] = {
+ "gpio25",
+};
+static const char * const qdss_cti_trig_out_a_groups[] = {
+ "gpio25",
+};
+static const char * const dac_calib12_groups[] = {
+ "gpio26", "gpio53",
+};
+static const char * const qdss_stm8_groups[] = {
+ "gpio26",
+};
+static const char * const dac_calib13_groups[] = {
+ "gpio27", "gpio54",
+};
+static const char * const blsp_i2c6_groups[] = {
+ "gpio27", "gpio28",
+};
+static const char * const qdss_tracectl_a_groups[] = {
+ "gpio27",
+};
+static const char * const dac_calib14_groups[] = {
+ "gpio28", "gpio55",
+};
+static const char * const qdss_traceclk_a_groups[] = {
+ "gpio28",
+};
+static const char * const dac_calib15_groups[] = {
+ "gpio29", "gpio56",
+};
+static const char * const dac_calib16_groups[] = {
+ "gpio30", "gpio57",
+};
+static const char * const hdmi_rcv_groups[] = {
+ "gpio30",
+};
+static const char * const dac_calib17_groups[] = {
+ "gpio31", "gpio58",
+};
+static const char * const pwr_modem_groups[] = {
+ "gpio31",
+};
+static const char * const hdmi_cec_groups[] = {
+ "gpio31",
+};
+static const char * const pwr_nav_groups[] = {
+ "gpio32",
+};
+static const char * const dac_calib18_groups[] = {
+ "gpio32", "gpio59",
+};
+static const char * const hdmi_ddc_groups[] = {
+ "gpio32", "gpio33",
+};
+static const char * const pwr_crypto_groups[] = {
+ "gpio33",
+};
+static const char * const dac_calib19_groups[] = {
+ "gpio33", "gpio60",
+};
+static const char * const dac_calib20_groups[] = {
+ "gpio34", "gpio61",
+};
+static const char * const hdmi_hot_groups[] = {
+ "gpio34",
+};
+static const char * const dac_calib21_groups[] = {
+ "gpio35", "gpio62",
+};
+static const char * const pci_e0_groups[] = {
+ "gpio35", "gpio36",
+};
+static const char * const dac_calib22_groups[] = {
+ "gpio36", "gpio63",
+};
+static const char * const dac_calib23_groups[] = {
+ "gpio37", "gpio64",
+};
+static const char * const blsp_i2c2_groups[] = {
+ "gpio43", "gpio44",
+};
+static const char * const blsp_spi3_groups[] = {
+ "gpio45", "gpio46", "gpio47", "gpio48",
+};
+static const char * const blsp_uart3_groups[] = {
+ "gpio45", "gpio46", "gpio47", "gpio48",
+};
+static const char * const blsp_uim3_groups[] = {
+ "gpio45", "gpio46",
+};
+static const char * const blsp_i2c3_groups[] = {
+ "gpio47", "gpio48",
+};
+static const char * const dac_calib24_groups[] = {
+ "gpio38", "gpio65",
+};
+static const char * const dac_calib25_groups[] = {
+ "gpio39", "gpio66",
+};
+static const char * const tsif1_sync_groups[] = {
+ "gpio39",
+};
+static const char * const sd_write_groups[] = {
+ "gpio40",
+};
+static const char * const tsif1_error_groups[] = {
+ "gpio40",
+};
+static const char * const blsp_spi2_groups[] = {
+ "gpio41", "gpio42", "gpio43", "gpio44",
+};
+static const char * const blsp_uart2_groups[] = {
+ "gpio41", "gpio42", "gpio43", "gpio44",
+};
+static const char * const blsp_uim2_groups[] = {
+ "gpio41", "gpio42",
+};
+static const char * const qdss_cti_groups[] = {
+ "gpio41", "gpio42", "gpio100", "gpio101",
+};
+static const char * const uim3_groups[] = {
+ "gpio49", "gpio50", "gpio51", "gpio52",
+};
+static const char * const blsp_spi9_groups[] = {
+ "gpio49", "gpio50", "gpio51", "gpio52",
+};
+static const char * const blsp_uart9_groups[] = {
+ "gpio49", "gpio50", "gpio51", "gpio52",
+};
+static const char * const blsp_uim9_groups[] = {
+ "gpio49", "gpio50",
+};
+static const char * const blsp10_spi_groups[] = {
+ "gpio49", "gpio50", "gpio51", "gpio52", "gpio88",
+};
+static const char * const blsp_i2c9_groups[] = {
+ "gpio51", "gpio52",
+};
+static const char * const blsp_spi7_groups[] = {
+ "gpio53", "gpio54", "gpio55", "gpio56",
+};
+static const char * const blsp_uart7_groups[] = {
+ "gpio53", "gpio54", "gpio55", "gpio56",
+};
+static const char * const blsp_uim7_groups[] = {
+ "gpio53", "gpio54",
+};
+static const char * const qdss_tracedata_a_groups[] = {
+ "gpio53", "gpio54", "gpio63", "gpio64", "gpio65", "gpio66", "gpio67",
+ "gpio74", "gpio75", "gpio76", "gpio77", "gpio85", "gpio86", "gpio87",
+ "gpio89", "gpio90",
+};
+static const char * const blsp_i2c7_groups[] = {
+ "gpio55", "gpio56",
+};
+static const char * const qua_mi2s_groups[] = {
+ "gpio57", "gpio58", "gpio59", "gpio60", "gpio61", "gpio62", "gpio63",
+};
+static const char * const gcc_gp1_clk_a_groups[] = {
+ "gpio57",
+};
+static const char * const uim4_groups[] = {
+ "gpio58", "gpio59", "gpio60", "gpio61",
+};
+static const char * const blsp_spi11_groups[] = {
+ "gpio58", "gpio59", "gpio60", "gpio61",
+};
+static const char * const blsp_uart11_groups[] = {
+ "gpio58", "gpio59", "gpio60", "gpio61",
+};
+static const char * const blsp_uim11_groups[] = {
+ "gpio58", "gpio59",
+};
+static const char * const gcc_gp2_clk_a_groups[] = {
+ "gpio58",
+};
+static const char * const gcc_gp3_clk_a_groups[] = {
+ "gpio59",
+};
+static const char * const blsp_i2c11_groups[] = {
+ "gpio60", "gpio61",
+};
+static const char * const cri_trng0_groups[] = {
+ "gpio60",
+};
+static const char * const cri_trng1_groups[] = {
+ "gpio61",
+};
+static const char * const cri_trng_groups[] = {
+ "gpio62",
+};
+static const char * const qdss_stm18_groups[] = {
+ "gpio63",
+};
+static const char * const pri_mi2s_groups[] = {
+ "gpio64", "gpio65", "gpio66", "gpio67", "gpio68",
+};
+static const char * const qdss_stm17_groups[] = {
+ "gpio64",
+};
+static const char * const blsp_spi4_groups[] = {
+ "gpio65", "gpio66", "gpio67", "gpio68",
+};
+static const char * const blsp_uart4_groups[] = {
+ "gpio65", "gpio66", "gpio67", "gpio68",
+};
+static const char * const blsp_uim4_groups[] = {
+ "gpio65", "gpio66",
+};
+static const char * const qdss_stm16_groups[] = {
+ "gpio65",
+};
+static const char * const qdss_stm15_groups[] = {
+ "gpio66",
+};
+static const char * const dac_calib26_groups[] = {
+ "gpio67",
+};
+static const char * const blsp_i2c4_groups[] = {
+ "gpio67", "gpio68",
+};
+static const char * const qdss_stm14_groups[] = {
+ "gpio67",
+};
+static const char * const spkr_i2s_groups[] = {
+ "gpio69", "gpio70", "gpio71", "gpio72",
+};
+static const char * const audio_ref_groups[] = {
+ "gpio69",
+};
+static const char * const lpass_slimbus_groups[] = {
+ "gpio70", "gpio71", "gpio72",
+};
+static const char * const isense_dbg_groups[] = {
+ "gpio70",
+};
+static const char * const tsense_pwm1_groups[] = {
+ "gpio71",
+};
+static const char * const tsense_pwm2_groups[] = {
+ "gpio71",
+};
+static const char * const btfm_slimbus_groups[] = {
+ "gpio73", "gpio74",
+};
+static const char * const ter_mi2s_groups[] = {
+ "gpio74", "gpio75", "gpio76", "gpio77", "gpio78",
+};
+static const char * const qdss_stm22_groups[] = {
+ "gpio74",
+};
+static const char * const qdss_stm21_groups[] = {
+ "gpio75",
+};
+static const char * const qdss_stm20_groups[] = {
+ "gpio76",
+};
+static const char * const qdss_stm19_groups[] = {
+ "gpio77",
+};
+static const char * const ssc_irq_groups[] = {
+ "gpio78", "gpio79", "gpio80", "gpio117", "gpio118", "gpio119",
+ "gpio120", "gpio121", "gpio122", "gpio123", "gpio124", "gpio125",
+};
+static const char * const gcc_gp1_clk_b_groups[] = {
+ "gpio78",
+};
+static const char * const sec_mi2s_groups[] = {
+ "gpio79", "gpio80", "gpio81", "gpio82", "gpio83",
+};
+static const char * const blsp_spi5_groups[] = {
+ "gpio81", "gpio82", "gpio83", "gpio84",
+};
+static const char * const blsp_uart5_groups[] = {
+ "gpio81", "gpio82", "gpio83", "gpio84",
+};
+static const char * const blsp_uim5_groups[] = {
+ "gpio81", "gpio82",
+};
+static const char * const gcc_gp2_clk_b_groups[] = {
+ "gpio81",
+};
+static const char * const gcc_gp3_clk_b_groups[] = {
+ "gpio82",
+};
+static const char * const blsp_i2c5_groups[] = {
+ "gpio83", "gpio84",
+};
+static const char * const blsp_spi12_groups[] = {
+ "gpio85", "gpio86", "gpio87", "gpio88",
+};
+static const char * const blsp_uart12_groups[] = {
+ "gpio85", "gpio86", "gpio87", "gpio88",
+};
+static const char * const blsp_uim12_groups[] = {
+ "gpio85", "gpio86",
+};
+static const char * const qdss_stm25_groups[] = {
+ "gpio85",
+};
+static const char * const qdss_stm31_groups[] = {
+ "gpio86",
+};
+static const char * const blsp_i2c12_groups[] = {
+ "gpio87", "gpio88",
+};
+static const char * const qdss_stm30_groups[] = {
+ "gpio87",
+};
+static const char * const qdss_stm29_groups[] = {
+ "gpio88",
+};
+static const char * const tsif1_clk_groups[] = {
+ "gpio89",
+};
+static const char * const qdss_stm28_groups[] = {
+ "gpio89",
+};
+static const char * const tsif1_en_groups[] = {
+ "gpio90",
+};
+static const char * const tsif1_data_groups[] = {
+ "gpio91",
+};
+static const char * const sdc4_cmd_groups[] = {
+ "gpio91",
+};
+static const char * const qdss_stm27_groups[] = {
+ "gpio91",
+};
+static const char * const qdss_traceclk_b_groups[] = {
+ "gpio91",
+};
+static const char * const tsif2_error_groups[] = {
+ "gpio92",
+};
+static const char * const sdc43_groups[] = {
+ "gpio92",
+};
+static const char * const vfr_1_groups[] = {
+ "gpio92",
+};
+static const char * const qdss_stm26_groups[] = {
+ "gpio92",
+};
+static const char * const tsif2_clk_groups[] = {
+ "gpio93",
+};
+static const char * const sdc4_clk_groups[] = {
+ "gpio93",
+};
+static const char * const qdss_stm24_groups[] = {
+ "gpio93",
+};
+static const char * const tsif2_en_groups[] = {
+ "gpio94",
+};
+static const char * const sdc42_groups[] = {
+ "gpio94",
+};
+static const char * const qdss_stm23_groups[] = {
+ "gpio94",
+};
+static const char * const qdss_tracectl_b_groups[] = {
+ "gpio94",
+};
+static const char * const sd_card_groups[] = {
+ "gpio95",
+};
+static const char * const tsif2_data_groups[] = {
+ "gpio95",
+};
+static const char * const sdc41_groups[] = {
+ "gpio95",
+};
+static const char * const tsif2_sync_groups[] = {
+ "gpio96",
+};
+static const char * const sdc40_groups[] = {
+ "gpio96",
+};
+static const char * const mdp_vsync_p_b_groups[] = {
+ "gpio97",
+};
+static const char * const ldo_en_groups[] = {
+ "gpio97",
+};
+static const char * const mdp_vsync_s_b_groups[] = {
+ "gpio98",
+};
+static const char * const ldo_update_groups[] = {
+ "gpio98",
+};
+static const char * const blsp11_uart_tx_b_groups[] = {
+ "gpio100",
+};
+static const char * const blsp11_uart_rx_b_groups[] = {
+ "gpio101",
+};
+static const char * const blsp11_i2c_sda_b_groups[] = {
+ "gpio102",
+};
+static const char * const prng_rosc_groups[] = {
+ "gpio102",
+};
+static const char * const blsp11_i2c_scl_b_groups[] = {
+ "gpio103",
+};
+static const char * const uim2_groups[] = {
+ "gpio105", "gpio106", "gpio107", "gpio108",
+};
+static const char * const uim1_groups[] = {
+ "gpio109", "gpio110", "gpio111", "gpio112",
+};
+static const char * const uim_batt_groups[] = {
+ "gpio113",
+};
+static const char * const pci_e2_groups[] = {
+ "gpio114", "gpio115", "gpio116",
+};
+static const char * const pa_indicator_groups[] = {
+ "gpio116",
+};
+static const char * const adsp_ext_groups[] = {
+ "gpio118",
+};
+static const char * const ddr_bist_groups[] = {
+ "gpio121", "gpio122", "gpio123", "gpio124",
+};
+static const char * const qdss_tracedata_11_groups[] = {
+ "gpio123",
+};
+static const char * const qdss_tracedata_12_groups[] = {
+ "gpio124",
+};
+static const char * const modem_tsync_groups[] = {
+ "gpio128",
+};
+static const char * const nav_dr_groups[] = {
+ "gpio128",
+};
+static const char * const nav_pps_groups[] = {
+ "gpio128",
+};
+static const char * const pci_e1_groups[] = {
+ "gpio130", "gpio131", "gpio132",
+};
+static const char * const gsm_tx_groups[] = {
+ "gpio134", "gpio135",
+};
+static const char * const qspi_cs_groups[] = {
+ "gpio138", "gpio141",
+};
+static const char * const ssbi2_groups[] = {
+ "gpio139",
+};
+static const char * const ssbi1_groups[] = {
+ "gpio140",
+};
+static const char * const mss_lte_groups[] = {
+ "gpio144", "gpio145",
+};
+static const char * const qspi_clk_groups[] = {
+ "gpio145",
+};
+static const char * const qspi0_groups[] = {
+ "gpio146",
+};
+static const char * const qspi1_groups[] = {
+ "gpio147",
+};
+static const char * const qspi2_groups[] = {
+ "gpio148",
+};
+static const char * const qspi3_groups[] = {
+ "gpio149",
+};
+
+static const struct msm_function msm8996_functions[] = {
+ FUNCTION(adsp_ext),
+ FUNCTION(atest_bbrx0),
+ FUNCTION(atest_bbrx1),
+ FUNCTION(atest_char),
+ FUNCTION(atest_char0),
+ FUNCTION(atest_char1),
+ FUNCTION(atest_char2),
+ FUNCTION(atest_char3),
+ FUNCTION(atest_gpsadc0),
+ FUNCTION(atest_gpsadc1),
+ FUNCTION(atest_tsens),
+ FUNCTION(atest_tsens2),
+ FUNCTION(atest_usb1),
+ FUNCTION(atest_usb10),
+ FUNCTION(atest_usb11),
+ FUNCTION(atest_usb12),
+ FUNCTION(atest_usb13),
+ FUNCTION(atest_usb2),
+ FUNCTION(atest_usb20),
+ FUNCTION(atest_usb21),
+ FUNCTION(atest_usb22),
+ FUNCTION(atest_usb23),
+ FUNCTION(audio_ref),
+ FUNCTION(bimc_dte0),
+ FUNCTION(bimc_dte1),
+ FUNCTION(blsp10_spi),
+ FUNCTION(blsp11_i2c_scl_b),
+ FUNCTION(blsp11_i2c_sda_b),
+ FUNCTION(blsp11_uart_rx_b),
+ FUNCTION(blsp11_uart_tx_b),
+ FUNCTION(blsp1_spi),
+ FUNCTION(blsp2_spi),
+ FUNCTION(blsp_i2c1),
+ FUNCTION(blsp_i2c10),
+ FUNCTION(blsp_i2c11),
+ FUNCTION(blsp_i2c12),
+ FUNCTION(blsp_i2c2),
+ FUNCTION(blsp_i2c3),
+ FUNCTION(blsp_i2c4),
+ FUNCTION(blsp_i2c5),
+ FUNCTION(blsp_i2c6),
+ FUNCTION(blsp_i2c7),
+ FUNCTION(blsp_i2c8),
+ FUNCTION(blsp_i2c9),
+ FUNCTION(blsp_spi1),
+ FUNCTION(blsp_spi10),
+ FUNCTION(blsp_spi11),
+ FUNCTION(blsp_spi12),
+ FUNCTION(blsp_spi2),
+ FUNCTION(blsp_spi3),
+ FUNCTION(blsp_spi4),
+ FUNCTION(blsp_spi5),
+ FUNCTION(blsp_spi6),
+ FUNCTION(blsp_spi7),
+ FUNCTION(blsp_spi8),
+ FUNCTION(blsp_spi9),
+ FUNCTION(blsp_uart1),
+ FUNCTION(blsp_uart10),
+ FUNCTION(blsp_uart11),
+ FUNCTION(blsp_uart12),
+ FUNCTION(blsp_uart2),
+ FUNCTION(blsp_uart3),
+ FUNCTION(blsp_uart4),
+ FUNCTION(blsp_uart5),
+ FUNCTION(blsp_uart6),
+ FUNCTION(blsp_uart7),
+ FUNCTION(blsp_uart8),
+ FUNCTION(blsp_uart9),
+ FUNCTION(blsp_uim1),
+ FUNCTION(blsp_uim10),
+ FUNCTION(blsp_uim11),
+ FUNCTION(blsp_uim12),
+ FUNCTION(blsp_uim2),
+ FUNCTION(blsp_uim3),
+ FUNCTION(blsp_uim4),
+ FUNCTION(blsp_uim5),
+ FUNCTION(blsp_uim6),
+ FUNCTION(blsp_uim7),
+ FUNCTION(blsp_uim8),
+ FUNCTION(blsp_uim9),
+ FUNCTION(btfm_slimbus),
+ FUNCTION(cam_mclk),
+ FUNCTION(cci_async),
+ FUNCTION(cci_i2c),
+ FUNCTION(cci_timer0),
+ FUNCTION(cci_timer1),
+ FUNCTION(cci_timer2),
+ FUNCTION(cci_timer3),
+ FUNCTION(cci_timer4),
+ FUNCTION(cri_trng),
+ FUNCTION(cri_trng0),
+ FUNCTION(cri_trng1),
+ FUNCTION(dac_calib0),
+ FUNCTION(dac_calib1),
+ FUNCTION(dac_calib10),
+ FUNCTION(dac_calib11),
+ FUNCTION(dac_calib12),
+ FUNCTION(dac_calib13),
+ FUNCTION(dac_calib14),
+ FUNCTION(dac_calib15),
+ FUNCTION(dac_calib16),
+ FUNCTION(dac_calib17),
+ FUNCTION(dac_calib18),
+ FUNCTION(dac_calib19),
+ FUNCTION(dac_calib2),
+ FUNCTION(dac_calib20),
+ FUNCTION(dac_calib21),
+ FUNCTION(dac_calib22),
+ FUNCTION(dac_calib23),
+ FUNCTION(dac_calib24),
+ FUNCTION(dac_calib25),
+ FUNCTION(dac_calib26),
+ FUNCTION(dac_calib3),
+ FUNCTION(dac_calib4),
+ FUNCTION(dac_calib5),
+ FUNCTION(dac_calib6),
+ FUNCTION(dac_calib7),
+ FUNCTION(dac_calib8),
+ FUNCTION(dac_calib9),
+ FUNCTION(dac_gpio),
+ FUNCTION(dbg_out),
+ FUNCTION(ddr_bist),
+ FUNCTION(edp_hot),
+ FUNCTION(edp_lcd),
+ FUNCTION(gcc_gp1_clk_a),
+ FUNCTION(gcc_gp1_clk_b),
+ FUNCTION(gcc_gp2_clk_a),
+ FUNCTION(gcc_gp2_clk_b),
+ FUNCTION(gcc_gp3_clk_a),
+ FUNCTION(gcc_gp3_clk_b),
+ FUNCTION(gpio),
+ FUNCTION(gsm_tx),
+ FUNCTION(hdmi_cec),
+ FUNCTION(hdmi_ddc),
+ FUNCTION(hdmi_hot),
+ FUNCTION(hdmi_rcv),
+ FUNCTION(isense_dbg),
+ FUNCTION(ldo_en),
+ FUNCTION(ldo_update),
+ FUNCTION(lpass_slimbus),
+ FUNCTION(m_voc),
+ FUNCTION(mdp_vsync),
+ FUNCTION(mdp_vsync_p_b),
+ FUNCTION(mdp_vsync_s_b),
+ FUNCTION(modem_tsync),
+ FUNCTION(mss_lte),
+ FUNCTION(nav_dr),
+ FUNCTION(nav_pps),
+ FUNCTION(pa_indicator),
+ FUNCTION(pci_e0),
+ FUNCTION(pci_e1),
+ FUNCTION(pci_e2),
+ FUNCTION(pll_bypassnl),
+ FUNCTION(pll_reset),
+ FUNCTION(pri_mi2s),
+ FUNCTION(prng_rosc),
+ FUNCTION(pwr_crypto),
+ FUNCTION(pwr_modem),
+ FUNCTION(pwr_nav),
+ FUNCTION(qdss_cti),
+ FUNCTION(qdss_cti_trig_in_a),
+ FUNCTION(qdss_cti_trig_in_b),
+ FUNCTION(qdss_cti_trig_out_a),
+ FUNCTION(qdss_cti_trig_out_b),
+ FUNCTION(qdss_stm0),
+ FUNCTION(qdss_stm1),
+ FUNCTION(qdss_stm10),
+ FUNCTION(qdss_stm11),
+ FUNCTION(qdss_stm12),
+ FUNCTION(qdss_stm13),
+ FUNCTION(qdss_stm14),
+ FUNCTION(qdss_stm15),
+ FUNCTION(qdss_stm16),
+ FUNCTION(qdss_stm17),
+ FUNCTION(qdss_stm18),
+ FUNCTION(qdss_stm19),
+ FUNCTION(qdss_stm2),
+ FUNCTION(qdss_stm20),
+ FUNCTION(qdss_stm21),
+ FUNCTION(qdss_stm22),
+ FUNCTION(qdss_stm23),
+ FUNCTION(qdss_stm24),
+ FUNCTION(qdss_stm25),
+ FUNCTION(qdss_stm26),
+ FUNCTION(qdss_stm27),
+ FUNCTION(qdss_stm28),
+ FUNCTION(qdss_stm29),
+ FUNCTION(qdss_stm3),
+ FUNCTION(qdss_stm30),
+ FUNCTION(qdss_stm31),
+ FUNCTION(qdss_stm4),
+ FUNCTION(qdss_stm5),
+ FUNCTION(qdss_stm6),
+ FUNCTION(qdss_stm7),
+ FUNCTION(qdss_stm8),
+ FUNCTION(qdss_stm9),
+ FUNCTION(qdss_traceclk_a),
+ FUNCTION(qdss_traceclk_b),
+ FUNCTION(qdss_tracectl_a),
+ FUNCTION(qdss_tracectl_b),
+ FUNCTION(qdss_tracedata_11),
+ FUNCTION(qdss_tracedata_12),
+ FUNCTION(qdss_tracedata_a),
+ FUNCTION(qdss_tracedata_b),
+ FUNCTION(qspi0),
+ FUNCTION(qspi1),
+ FUNCTION(qspi2),
+ FUNCTION(qspi3),
+ FUNCTION(qspi_clk),
+ FUNCTION(qspi_cs),
+ FUNCTION(qua_mi2s),
+ FUNCTION(sd_card),
+ FUNCTION(sd_write),
+ FUNCTION(sdc40),
+ FUNCTION(sdc41),
+ FUNCTION(sdc42),
+ FUNCTION(sdc43),
+ FUNCTION(sdc4_clk),
+ FUNCTION(sdc4_cmd),
+ FUNCTION(sec_mi2s),
+ FUNCTION(spkr_i2s),
+ FUNCTION(ssbi1),
+ FUNCTION(ssbi2),
+ FUNCTION(ssc_irq),
+ FUNCTION(ter_mi2s),
+ FUNCTION(tsense_pwm1),
+ FUNCTION(tsense_pwm2),
+ FUNCTION(tsif1_clk),
+ FUNCTION(tsif1_data),
+ FUNCTION(tsif1_en),
+ FUNCTION(tsif1_error),
+ FUNCTION(tsif1_sync),
+ FUNCTION(tsif2_clk),
+ FUNCTION(tsif2_data),
+ FUNCTION(tsif2_en),
+ FUNCTION(tsif2_error),
+ FUNCTION(tsif2_sync),
+ FUNCTION(uim1),
+ FUNCTION(uim2),
+ FUNCTION(uim3),
+ FUNCTION(uim4),
+ FUNCTION(uim_batt),
+ FUNCTION(vfr_1),
+};
+
+static const struct msm_pingroup msm8996_groups[] = {
+ PINGROUP(0, blsp_spi1, blsp_uart1, blsp_uim1, NA, NA, NA, NA, NA, NA),
+ PINGROUP(1, blsp_spi1, blsp_uart1, blsp_uim1, NA, NA, NA, NA, NA, NA),
+ PINGROUP(2, blsp_spi1, blsp_uart1, blsp_i2c1, NA, NA, NA, NA, NA, NA),
+ PINGROUP(3, blsp_spi1, blsp_uart1, blsp_i2c1, NA, atest_tsens,
+ bimc_dte1, NA, NA, NA),
+ PINGROUP(4, blsp_spi8, blsp_uart8, blsp_uim8, NA, qdss_cti_trig_out_b,
+ dac_calib0, bimc_dte0, NA, NA),
+ PINGROUP(5, blsp_spi8, blsp_uart8, blsp_uim8, NA, qdss_cti_trig_in_b,
+ dac_calib1, bimc_dte1, NA, NA),
+ PINGROUP(6, blsp_spi8, blsp_uart8, blsp_i2c8, NA, dac_calib2,
+ bimc_dte0, NA, NA, NA),
+ PINGROUP(7, blsp_spi8, blsp_uart8, blsp_i2c8, NA, atest_tsens2,
+ atest_usb1, NA, NA, NA),
+ PINGROUP(8, blsp_spi10, blsp_uart10, blsp_uim10, NA, atest_bbrx1,
+ atest_usb13, NA, NA, NA),
+ PINGROUP(9, blsp_spi10, blsp_uart10, blsp_uim10, atest_bbrx0,
+ atest_usb12, NA, NA, NA, NA),
+ PINGROUP(10, mdp_vsync, blsp_spi10, blsp_uart10, blsp_i2c10,
+ atest_gpsadc1, atest_usb11, NA, NA, NA),
+ PINGROUP(11, mdp_vsync, blsp_spi10, blsp_uart10, blsp_i2c10,
+ atest_gpsadc0, atest_usb10, NA, NA, NA),
+ PINGROUP(12, mdp_vsync, m_voc, dac_gpio, atest_char, NA, NA, NA, NA,
+ NA),
+ PINGROUP(13, cam_mclk, pll_bypassnl, qdss_stm7, qdss_tracedata_b, NA,
+ NA, NA, NA, NA),
+ PINGROUP(14, cam_mclk, pll_reset, qdss_stm6, qdss_tracedata_b, NA, NA,
+ NA, NA, NA),
+ PINGROUP(15, cam_mclk, qdss_stm5, qdss_tracedata_b, NA, NA, NA, NA, NA,
+ NA),
+ PINGROUP(16, cam_mclk, qdss_stm4, qdss_tracedata_b, NA, atest_usb2, NA,
+ NA, NA, NA),
+ PINGROUP(17, cci_i2c, qdss_stm3, qdss_tracedata_b, dac_calib3,
+ atest_usb23, atest_char3, NA, NA, NA),
+ PINGROUP(18, cci_i2c, qdss_stm2, qdss_tracedata_b, dac_calib4,
+ atest_usb22, atest_char2, NA, NA, NA),
+ PINGROUP(19, cci_i2c, qdss_stm1, qdss_tracedata_b, dac_calib5,
+ atest_usb21, atest_char1, NA, NA, NA),
+ PINGROUP(20, cci_i2c, dbg_out, qdss_stm0, dac_calib6, atest_usb20,
+ atest_char0, NA, NA, NA),
+ PINGROUP(21, cci_timer0, qdss_stm13, qdss_tracedata_b, dac_calib7, NA,
+ NA, NA, NA, NA),
+ PINGROUP(22, cci_timer1, qdss_stm12, qdss_tracedata_b, dac_calib8, NA,
+ NA, NA, NA, NA),
+ PINGROUP(23, cci_timer2, blsp1_spi, qdss_stm11, qdss_tracedata_b,
+ dac_calib9, NA, NA, NA, NA),
+ PINGROUP(24, cci_timer3, cci_async, blsp1_spi, qdss_stm10,
+ qdss_cti_trig_in_a, dac_calib10, NA, NA, NA),
+ PINGROUP(25, cci_timer4, cci_async, blsp_spi6, blsp_uart6, blsp_uim6,
+ blsp2_spi, qdss_stm9, qdss_cti_trig_out_a, dac_calib11),
+ PINGROUP(26, cci_async, blsp_spi6, blsp_uart6, blsp_uim6, qdss_stm8,
+ qdss_tracedata_b, dac_calib12, NA, NA),
+ PINGROUP(27, blsp_spi6, blsp_uart6, blsp_i2c6, blsp1_spi,
+ qdss_tracectl_a, dac_calib13, NA, NA, NA),
+ PINGROUP(28, blsp_spi6, blsp_uart6, blsp_i2c6, blsp1_spi,
+ qdss_traceclk_a, dac_calib14, NA, NA, NA),
+ PINGROUP(29, blsp2_spi, NA, qdss_tracedata_b, dac_calib15, NA, NA, NA,
+ NA, NA),
+ PINGROUP(30, hdmi_rcv, blsp2_spi, dac_calib16, NA, NA, NA, NA, NA, NA),
+ PINGROUP(31, hdmi_cec, pwr_modem, dac_calib17, NA, NA, NA, NA, NA, NA),
+ PINGROUP(32, hdmi_ddc, pwr_nav, NA, dac_calib18, NA, NA, NA, NA, NA),
+ PINGROUP(33, hdmi_ddc, pwr_crypto, NA, dac_calib19, NA, NA, NA, NA, NA),
+ PINGROUP(34, hdmi_hot, NA, dac_calib20, NA, NA, NA, NA, NA, NA),
+ PINGROUP(35, pci_e0, NA, dac_calib21, NA, NA, NA, NA, NA, NA),
+ PINGROUP(36, pci_e0, NA, dac_calib22, NA, NA, NA, NA, NA, NA),
+ PINGROUP(37, NA, dac_calib23, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(38, NA, dac_calib24, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(39, tsif1_sync, NA, dac_calib25, NA, NA, NA, NA, NA, NA),
+ PINGROUP(40, sd_write, tsif1_error, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(41, blsp_spi2, blsp_uart2, blsp_uim2, NA, qdss_cti,
+ dac_calib0, NA, NA, NA),
+ PINGROUP(42, blsp_spi2, blsp_uart2, blsp_uim2, NA, qdss_cti,
+ dac_calib1, NA, NA, NA),
+ PINGROUP(43, blsp_spi2, blsp_uart2, blsp_i2c2, NA, dac_calib2, NA, NA,
+ NA, NA),
+ PINGROUP(44, blsp_spi2, blsp_uart2, blsp_i2c2, NA, dac_calib3, NA, NA,
+ NA, NA),
+ PINGROUP(45, blsp_spi3, blsp_uart3, blsp_uim3, NA, dac_calib4, NA, NA,
+ NA, NA),
+ PINGROUP(46, blsp_spi3, blsp_uart3, blsp_uim3, NA, dac_calib5, NA, NA,
+ NA, NA),
+ PINGROUP(47, blsp_spi3, blsp_uart3, blsp_i2c3, dac_calib6, NA, NA, NA,
+ NA, NA),
+ PINGROUP(48, blsp_spi3, blsp_uart3, blsp_i2c3, dac_calib7, NA, NA, NA,
+ NA, NA),
+ PINGROUP(49, uim3, blsp_spi9, blsp_uart9, blsp_uim9, blsp10_spi,
+ dac_calib8, NA, NA, NA),
+ PINGROUP(50, uim3, blsp_spi9, blsp_uart9, blsp_uim9, blsp10_spi,
+ dac_calib9, NA, NA, NA),
+ PINGROUP(51, uim3, blsp_spi9, blsp_uart9, blsp_i2c9, blsp10_spi,
+ dac_calib10, NA, NA, NA),
+ PINGROUP(52, uim3, blsp_spi9, blsp_uart9, blsp_i2c9,
+ blsp10_spi, dac_calib11, NA, NA, NA),
+ PINGROUP(53, blsp_spi7, blsp_uart7, blsp_uim7, NA, qdss_tracedata_a,
+ dac_calib12, NA, NA, NA),
+ PINGROUP(54, blsp_spi7, blsp_uart7, blsp_uim7, NA, NA,
+ qdss_tracedata_a, dac_calib13, NA, NA),
+ PINGROUP(55, blsp_spi7, blsp_uart7, blsp_i2c7, NA, dac_calib14, NA, NA,
+ NA, NA),
+ PINGROUP(56, blsp_spi7, blsp_uart7, blsp_i2c7, NA, dac_calib15, NA, NA,
+ NA, NA),
+ PINGROUP(57, qua_mi2s, gcc_gp1_clk_a, NA, qdss_tracedata_b,
+ dac_calib16, NA, NA, NA, NA),
+ PINGROUP(58, qua_mi2s, uim4, blsp_spi11, blsp_uart11, blsp_uim11,
+ gcc_gp2_clk_a, NA, qdss_tracedata_b, dac_calib17),
+ PINGROUP(59, qua_mi2s, uim4, blsp_spi11, blsp_uart11, blsp_uim11,
+ gcc_gp3_clk_a, NA, dac_calib18, NA),
+ PINGROUP(60, qua_mi2s, uim4, blsp_spi11, blsp_uart11, blsp_i2c11,
+ cri_trng0, NA, dac_calib19, NA),
+ PINGROUP(61, qua_mi2s, uim4, blsp_spi11, blsp_uart11,
+ blsp_i2c11, cri_trng1, NA, dac_calib20, NA),
+ PINGROUP(62, qua_mi2s, cri_trng, NA, dac_calib21, NA, NA, NA, NA, NA),
+ PINGROUP(63, qua_mi2s, NA, NA, qdss_stm18, qdss_tracedata_a,
+ dac_calib22, NA, NA, NA),
+ PINGROUP(64, pri_mi2s, NA, qdss_stm17, qdss_tracedata_a, dac_calib23,
+ NA, NA, NA, NA),
+ PINGROUP(65, pri_mi2s, blsp_spi4, blsp_uart4, blsp_uim4, NA,
+ qdss_stm16, qdss_tracedata_a, dac_calib24, NA),
+ PINGROUP(66, pri_mi2s, blsp_spi4, blsp_uart4, blsp_uim4, NA,
+ qdss_stm15, qdss_tracedata_a, dac_calib25, NA),
+ PINGROUP(67, pri_mi2s, blsp_spi4, blsp_uart4, blsp_i2c4, qdss_stm14,
+ qdss_tracedata_a, dac_calib26, NA, NA),
+ PINGROUP(68, pri_mi2s, blsp_spi4, blsp_uart4, blsp_i2c4, NA, NA, NA,
+ NA, NA),
+ PINGROUP(69, spkr_i2s, audio_ref, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(70, lpass_slimbus, spkr_i2s, isense_dbg, NA, NA, NA, NA, NA,
+ NA),
+ PINGROUP(71, lpass_slimbus, spkr_i2s, tsense_pwm1, tsense_pwm2, NA, NA,
+ NA, NA, NA),
+ PINGROUP(72, lpass_slimbus, spkr_i2s, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(73, btfm_slimbus, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(74, btfm_slimbus, ter_mi2s, qdss_stm22, qdss_tracedata_a, NA,
+ NA, NA, NA, NA),
+ PINGROUP(75, ter_mi2s, qdss_stm21, qdss_tracedata_a, NA, NA, NA, NA,
+ NA, NA),
+ PINGROUP(76, ter_mi2s, qdss_stm20, qdss_tracedata_a, NA, NA, NA, NA,
+ NA, NA),
+ PINGROUP(77, ter_mi2s, qdss_stm19, qdss_tracedata_a, NA, NA, NA, NA,
+ NA, NA),
+ PINGROUP(78, ter_mi2s, gcc_gp1_clk_b, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(79, sec_mi2s, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(80, sec_mi2s, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(81, sec_mi2s, blsp_spi5, blsp_uart5, blsp_uim5, gcc_gp2_clk_b,
+ NA, NA, NA, NA),
+ PINGROUP(82, sec_mi2s, blsp_spi5, blsp_uart5, blsp_uim5, gcc_gp3_clk_b,
+ NA, NA, NA, NA),
+ PINGROUP(83, sec_mi2s, blsp_spi5, blsp_uart5, blsp_i2c5, NA, NA, NA,
+ NA, NA),
+ PINGROUP(84, blsp_spi5, blsp_uart5, blsp_i2c5, NA, NA, NA, NA, NA, NA),
+ PINGROUP(85, blsp_spi12, blsp_uart12, blsp_uim12, NA, qdss_stm25,
+ qdss_tracedata_a, NA, NA, NA),
+ PINGROUP(86, blsp_spi12, blsp_uart12, blsp_uim12, NA, NA, qdss_stm31,
+ qdss_tracedata_a, NA, NA),
+ PINGROUP(87, blsp_spi12, blsp_uart12, blsp_i2c12, NA, qdss_stm30,
+ qdss_tracedata_a, NA, NA, NA),
+ PINGROUP(88, blsp_spi12, blsp_uart12, blsp_i2c12, blsp10_spi, NA,
+ qdss_stm29, NA, NA, NA),
+ PINGROUP(89, tsif1_clk, qdss_stm28, qdss_tracedata_a, NA, NA, NA, NA,
+ NA, NA),
+ PINGROUP(90, tsif1_en, blsp1_spi, qdss_tracedata_a, NA, NA, NA, NA, NA,
+ NA),
+ PINGROUP(91, tsif1_data, sdc4_cmd, qdss_stm27, qdss_traceclk_b, NA, NA,
+ NA, NA, NA),
+ PINGROUP(92, tsif2_error, sdc43, vfr_1, qdss_stm26, qdss_tracedata_b,
+ NA, NA, NA, NA),
+ PINGROUP(93, tsif2_clk, sdc4_clk, NA, qdss_stm24, qdss_tracedata_b, NA,
+ NA, NA, NA),
+ PINGROUP(94, tsif2_en, sdc42, NA, qdss_stm23, qdss_tracectl_b, NA, NA,
+ NA, NA),
+ PINGROUP(95, tsif2_data, sdc41, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(96, tsif2_sync, sdc40, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(97, NA, NA, mdp_vsync_p_b, ldo_en, NA, NA, NA, NA, NA),
+ PINGROUP(98, NA, NA, mdp_vsync_s_b, ldo_update, NA, NA, NA, NA, NA),
+ PINGROUP(99, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(100, NA, NA, blsp11_uart_tx_b, qdss_cti, NA, NA, NA, NA, NA),
+ PINGROUP(101, NA, blsp11_uart_rx_b, qdss_cti, NA, NA, NA, NA, NA, NA),
+ PINGROUP(102, NA, blsp11_i2c_sda_b, prng_rosc, NA, NA, NA, NA, NA, NA),
+ PINGROUP(103, NA, blsp11_i2c_scl_b, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(104, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(105, uim2, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(106, uim2, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(107, uim2, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(108, uim2, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(109, uim1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(110, uim1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(111, uim1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(112, uim1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(113, uim_batt, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(114, NA, pci_e2, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(115, NA, pci_e2, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(116, NA, pa_indicator, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(117, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(118, adsp_ext, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(119, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(120, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(121, ddr_bist, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(122, ddr_bist, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(123, ddr_bist, qdss_tracedata_11, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(124, ddr_bist, qdss_tracedata_12, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(125, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(126, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(127, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(128, NA, modem_tsync, nav_dr, nav_pps, NA, NA, NA, NA, NA),
+ PINGROUP(129, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(130, pci_e1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(131, pci_e1, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(132, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(133, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(134, gsm_tx, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(135, gsm_tx, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(136, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(137, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(138, NA, qspi_cs, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(139, NA, ssbi2, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(140, NA, ssbi1, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(141, NA, qspi_cs, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(142, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(143, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(144, mss_lte, NA, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(145, mss_lte, qspi_clk, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(146, NA, qspi0, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(147, NA, qspi1, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(148, NA, qspi2, NA, NA, NA, NA, NA, NA, NA),
+ PINGROUP(149, NA, qspi3, NA, NA, NA, NA, NA, NA, NA),
+ SDC_QDSD_PINGROUP(sdc1_clk, 0x12c000, 13, 6),
+ SDC_QDSD_PINGROUP(sdc1_cmd, 0x12c000, 11, 3),
+ SDC_QDSD_PINGROUP(sdc1_data, 0x12c000, 9, 0),
+ SDC_QDSD_PINGROUP(sdc2_clk, 0x12d000, 14, 6),
+ SDC_QDSD_PINGROUP(sdc2_cmd, 0x12d000, 11, 3),
+ SDC_QDSD_PINGROUP(sdc2_data, 0x12d000, 9, 0),
+ SDC_QDSD_PINGROUP(sdc1_rclk, 0x12c000, 15, 0),
+};
+
+static const struct msm_pinctrl_soc_data msm8996_pinctrl = {
+ .pins = msm8996_pins,
+ .npins = ARRAY_SIZE(msm8996_pins),
+ .functions = msm8996_functions,
+ .nfunctions = ARRAY_SIZE(msm8996_functions),
+ .groups = msm8996_groups,
+ .ngroups = ARRAY_SIZE(msm8996_groups),
+ .ngpios = 150,
+};
+
+static int msm8996_pinctrl_probe(struct platform_device *pdev)
+{
+ return msm_pinctrl_probe(pdev, &msm8996_pinctrl);
+}
+
+static const struct of_device_id msm8996_pinctrl_of_match[] = {
+ { .compatible = "qcom,msm8996-pinctrl", },
+ { }
+};
+
+static struct platform_driver msm8996_pinctrl_driver = {
+ .driver = {
+ .name = "msm8996-pinctrl",
+ .of_match_table = msm8996_pinctrl_of_match,
+ },
+ .probe = msm8996_pinctrl_probe,
+ .remove = msm_pinctrl_remove,
+};
+
+static int __init msm8996_pinctrl_init(void)
+{
+ return platform_driver_register(&msm8996_pinctrl_driver);
+}
+arch_initcall(msm8996_pinctrl_init);
+
+static void __exit msm8996_pinctrl_exit(void)
+{
+ platform_driver_unregister(&msm8996_pinctrl_driver);
+}
+module_exit(msm8996_pinctrl_exit);
+
+MODULE_DESCRIPTION("Qualcomm msm8996 pinctrl driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(of, msm8996_pinctrl_of_match);
diff --git a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
index e9ff3bc150bb..f448534edf46 100644
--- a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
+++ b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c
@@ -32,6 +32,9 @@
static struct msm_pinctrl_soc_data qdf2xxx_pinctrl;
+/* A reasonable limit to the number of GPIOS */
+#define MAX_GPIOS 256
+
static int qdf2xxx_pinctrl_probe(struct platform_device *pdev)
{
struct pinctrl_pin_desc *pins;
@@ -42,11 +45,13 @@ static int qdf2xxx_pinctrl_probe(struct platform_device *pdev)
/* Query the number of GPIOs from ACPI */
ret = device_property_read_u32(&pdev->dev, "num-gpios", &num_gpios);
- if (ret < 0)
+ if (ret < 0) {
+ dev_warn(&pdev->dev, "missing num-gpios property\n");
return ret;
+ }
- if (!num_gpios) {
- dev_warn(&pdev->dev, "missing num-gpios property\n");
+ if (!num_gpios || num_gpios > MAX_GPIOS) {
+ dev_warn(&pdev->dev, "invalid num-gpios property\n");
return -ENODEV;
}
@@ -55,6 +60,9 @@ static int qdf2xxx_pinctrl_probe(struct platform_device *pdev)
groups = devm_kcalloc(&pdev->dev, num_gpios,
sizeof(struct msm_pingroup), GFP_KERNEL);
+ if (!pins || !groups)
+ return -ENOMEM;
+
for (i = 0; i < num_gpios; i++) {
pins[i].number = i;
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
index 6c42ca14d2fd..77f6a5cb1008 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c
@@ -14,6 +14,7 @@
#include <linux/gpio.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinmux.h>
@@ -693,18 +694,19 @@ static int pmic_gpio_probe(struct platform_device *pdev)
struct pmic_gpio_pad *pad, *pads;
struct pmic_gpio_state *state;
int ret, npins, i;
- u32 res[2];
+ u32 reg;
- ret = of_property_read_u32_array(dev->of_node, "reg", res, 2);
+ ret = of_property_read_u32(dev->of_node, "reg", &reg);
if (ret < 0) {
- dev_err(dev, "missing base address and/or range");
+ dev_err(dev, "missing base address");
return ret;
}
- npins = res[1] / PMIC_GPIO_ADDRESS_RANGE;
-
+ npins = platform_irq_count(pdev);
if (!npins)
return -EINVAL;
+ if (npins < 0)
+ return npins;
BUG_ON(npins > ARRAY_SIZE(pmic_gpio_groups));
@@ -752,7 +754,7 @@ static int pmic_gpio_probe(struct platform_device *pdev)
if (pad->irq < 0)
return pad->irq;
- pad->base = res[0] + i * PMIC_GPIO_ADDRESS_RANGE;
+ pad->base = reg + i * PMIC_GPIO_ADDRESS_RANGE;
ret = pmic_gpio_populate(state, pad);
if (ret < 0)
@@ -804,6 +806,7 @@ static int pmic_gpio_remove(struct platform_device *pdev)
static const struct of_device_id pmic_gpio_of_match[] = {
{ .compatible = "qcom,pm8916-gpio" }, /* 4 GPIO's */
{ .compatible = "qcom,pm8941-gpio" }, /* 36 GPIO's */
+ { .compatible = "qcom,pm8994-gpio" }, /* 22 GPIO's */
{ .compatible = "qcom,pma8084-gpio" }, /* 22 GPIO's */
{ },
};
diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
index 9ce0e30e33e8..2df4f29175ae 100644
--- a/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-spmi-mpp.c
@@ -14,6 +14,7 @@
#include <linux/gpio.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinmux.h>
@@ -795,17 +796,19 @@ static int pmic_mpp_probe(struct platform_device *pdev)
struct pmic_mpp_pad *pad, *pads;
struct pmic_mpp_state *state;
int ret, npins, i;
- u32 res[2];
+ u32 reg;
- ret = of_property_read_u32_array(dev->of_node, "reg", res, 2);
+ ret = of_property_read_u32(dev->of_node, "reg", &reg);
if (ret < 0) {
- dev_err(dev, "missing base address and/or range");
+ dev_err(dev, "missing base address");
return ret;
}
- npins = res[1] / PMIC_MPP_ADDRESS_RANGE;
+ npins = platform_irq_count(pdev);
if (!npins)
return -EINVAL;
+ if (npins < 0)
+ return npins;
BUG_ON(npins > ARRAY_SIZE(pmic_mpp_groups));
@@ -854,7 +857,7 @@ static int pmic_mpp_probe(struct platform_device *pdev)
if (pad->irq < 0)
return pad->irq;
- pad->base = res[0] + i * PMIC_MPP_ADDRESS_RANGE;
+ pad->base = reg + i * PMIC_MPP_ADDRESS_RANGE;
ret = pmic_mpp_populate(state, pad);
if (ret < 0)
@@ -907,6 +910,7 @@ static const struct of_device_id pmic_mpp_of_match[] = {
{ .compatible = "qcom,pm8841-mpp" }, /* 4 MPP's */
{ .compatible = "qcom,pm8916-mpp" }, /* 4 MPP's */
{ .compatible = "qcom,pm8941-mpp" }, /* 8 MPP's */
+ { .compatible = "qcom,pm8994-mpp" }, /* 8 MPP's */
{ .compatible = "qcom,pma8084-mpp" }, /* 8 MPP's */
{ },
};
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index 19a3c3bc2f1f..e51176ec83d2 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -23,6 +23,7 @@
#include <linux/gpio.h>
#include <linux/interrupt.h>
#include <linux/of_device.h>
+#include <linux/of_irq.h>
#include <dt-bindings/pinctrl/qcom,pmic-gpio.h>
@@ -650,11 +651,12 @@ static int pm8xxx_pin_populate(struct pm8xxx_gpio *pctrl,
}
static const struct of_device_id pm8xxx_gpio_of_match[] = {
- { .compatible = "qcom,pm8018-gpio", .data = (void *)6 },
- { .compatible = "qcom,pm8038-gpio", .data = (void *)12 },
- { .compatible = "qcom,pm8058-gpio", .data = (void *)40 },
- { .compatible = "qcom,pm8917-gpio", .data = (void *)38 },
- { .compatible = "qcom,pm8921-gpio", .data = (void *)44 },
+ { .compatible = "qcom,pm8018-gpio" },
+ { .compatible = "qcom,pm8038-gpio" },
+ { .compatible = "qcom,pm8058-gpio" },
+ { .compatible = "qcom,pm8917-gpio" },
+ { .compatible = "qcom,pm8921-gpio" },
+ { .compatible = "qcom,ssbi-gpio" },
{ },
};
MODULE_DEVICE_TABLE(of, pm8xxx_gpio_of_match);
@@ -665,14 +667,19 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev)
struct pinctrl_pin_desc *pins;
struct pm8xxx_gpio *pctrl;
int ret;
- int i;
+ int i, npins;
pctrl = devm_kzalloc(&pdev->dev, sizeof(*pctrl), GFP_KERNEL);
if (!pctrl)
return -ENOMEM;
pctrl->dev = &pdev->dev;
- pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
+ npins = platform_irq_count(pdev);
+ if (!npins)
+ return -EINVAL;
+ if (npins < 0)
+ return npins;
+ pctrl->npins = npins;
pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index b868ef1766a0..e9f01de51e18 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -23,6 +23,7 @@
#include <linux/gpio.h>
#include <linux/interrupt.h>
#include <linux/of_device.h>
+#include <linux/of_irq.h>
#include <dt-bindings/pinctrl/qcom,pmic-mpp.h>
@@ -741,11 +742,12 @@ static int pm8xxx_pin_populate(struct pm8xxx_mpp *pctrl,
}
static const struct of_device_id pm8xxx_mpp_of_match[] = {
- { .compatible = "qcom,pm8018-mpp", .data = (void *)6 },
- { .compatible = "qcom,pm8038-mpp", .data = (void *)6 },
- { .compatible = "qcom,pm8917-mpp", .data = (void *)10 },
- { .compatible = "qcom,pm8821-mpp", .data = (void *)4 },
- { .compatible = "qcom,pm8921-mpp", .data = (void *)12 },
+ { .compatible = "qcom,pm8018-mpp" },
+ { .compatible = "qcom,pm8038-mpp" },
+ { .compatible = "qcom,pm8917-mpp" },
+ { .compatible = "qcom,pm8821-mpp" },
+ { .compatible = "qcom,pm8921-mpp" },
+ { .compatible = "qcom,ssbi-mpp" },
{ },
};
MODULE_DEVICE_TABLE(of, pm8xxx_mpp_of_match);
@@ -756,14 +758,19 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev)
struct pinctrl_pin_desc *pins;
struct pm8xxx_mpp *pctrl;
int ret;
- int i;
+ int i, npins;
pctrl = devm_kzalloc(&pdev->dev, sizeof(*pctrl), GFP_KERNEL);
if (!pctrl)
return -ENOMEM;
pctrl->dev = &pdev->dev;
- pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
+ npins = platform_irq_count(pdev);
+ if (!npins)
+ return -EINVAL;
+ if (npins < 0)
+ return npins;
+ pctrl->npins = npins;
pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 71ccf6a90b22..16e2293cc2bc 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -1150,6 +1150,109 @@ const struct samsung_pin_ctrl exynos5260_pin_ctrl[] __initconst = {
},
};
+/* pin banks of exynos5410 pin-controller 0 */
+static const struct samsung_pin_bank_data exynos5410_pin_banks0[] __initconst = {
+ EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpa0", 0x00),
+ EXYNOS_PIN_BANK_EINTG(6, 0x020, "gpa1", 0x04),
+ EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpa2", 0x08),
+ EXYNOS_PIN_BANK_EINTG(5, 0x060, "gpb0", 0x0c),
+ EXYNOS_PIN_BANK_EINTG(5, 0x080, "gpb1", 0x10),
+ EXYNOS_PIN_BANK_EINTG(4, 0x0A0, "gpb2", 0x14),
+ EXYNOS_PIN_BANK_EINTG(4, 0x0C0, "gpb3", 0x18),
+ EXYNOS_PIN_BANK_EINTG(7, 0x0E0, "gpc0", 0x1c),
+ EXYNOS_PIN_BANK_EINTG(4, 0x100, "gpc3", 0x20),
+ EXYNOS_PIN_BANK_EINTG(7, 0x120, "gpc1", 0x24),
+ EXYNOS_PIN_BANK_EINTG(7, 0x140, "gpc2", 0x28),
+ EXYNOS_PIN_BANK_EINTN(2, 0x160, "gpm5"),
+ EXYNOS_PIN_BANK_EINTG(8, 0x180, "gpd1", 0x2c),
+ EXYNOS_PIN_BANK_EINTG(8, 0x1A0, "gpe0", 0x30),
+ EXYNOS_PIN_BANK_EINTG(2, 0x1C0, "gpe1", 0x34),
+ EXYNOS_PIN_BANK_EINTG(6, 0x1E0, "gpf0", 0x38),
+ EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpf1", 0x3c),
+ EXYNOS_PIN_BANK_EINTG(8, 0x220, "gpg0", 0x40),
+ EXYNOS_PIN_BANK_EINTG(8, 0x240, "gpg1", 0x44),
+ EXYNOS_PIN_BANK_EINTG(2, 0x260, "gpg2", 0x48),
+ EXYNOS_PIN_BANK_EINTG(4, 0x280, "gph0", 0x4c),
+ EXYNOS_PIN_BANK_EINTG(8, 0x2A0, "gph1", 0x50),
+ EXYNOS_PIN_BANK_EINTN(8, 0x2C0, "gpm7"),
+ EXYNOS_PIN_BANK_EINTN(6, 0x2E0, "gpy0"),
+ EXYNOS_PIN_BANK_EINTN(4, 0x300, "gpy1"),
+ EXYNOS_PIN_BANK_EINTN(6, 0x320, "gpy2"),
+ EXYNOS_PIN_BANK_EINTN(8, 0x340, "gpy3"),
+ EXYNOS_PIN_BANK_EINTN(8, 0x360, "gpy4"),
+ EXYNOS_PIN_BANK_EINTN(8, 0x380, "gpy5"),
+ EXYNOS_PIN_BANK_EINTN(8, 0x3A0, "gpy6"),
+ EXYNOS_PIN_BANK_EINTN(8, 0x3C0, "gpy7"),
+ EXYNOS_PIN_BANK_EINTW(8, 0xC00, "gpx0", 0x00),
+ EXYNOS_PIN_BANK_EINTW(8, 0xC20, "gpx1", 0x04),
+ EXYNOS_PIN_BANK_EINTW(8, 0xC40, "gpx2", 0x08),
+ EXYNOS_PIN_BANK_EINTW(8, 0xC60, "gpx3", 0x0c),
+};
+
+/* pin banks of exynos5410 pin-controller 1 */
+static const struct samsung_pin_bank_data exynos5410_pin_banks1[] __initconst = {
+ EXYNOS_PIN_BANK_EINTG(5, 0x000, "gpj0", 0x00),
+ EXYNOS_PIN_BANK_EINTG(8, 0x020, "gpj1", 0x04),
+ EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpj2", 0x08),
+ EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpj3", 0x0c),
+ EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpj4", 0x10),
+ EXYNOS_PIN_BANK_EINTG(8, 0x0A0, "gpk0", 0x14),
+ EXYNOS_PIN_BANK_EINTG(8, 0x0C0, "gpk1", 0x18),
+ EXYNOS_PIN_BANK_EINTG(8, 0x0E0, "gpk2", 0x1c),
+ EXYNOS_PIN_BANK_EINTG(7, 0x100, "gpk3", 0x20),
+};
+
+/* pin banks of exynos5410 pin-controller 2 */
+static const struct samsung_pin_bank_data exynos5410_pin_banks2[] __initconst = {
+ EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpv0", 0x00),
+ EXYNOS_PIN_BANK_EINTG(8, 0x020, "gpv1", 0x04),
+ EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpv2", 0x08),
+ EXYNOS_PIN_BANK_EINTG(8, 0x080, "gpv3", 0x0c),
+ EXYNOS_PIN_BANK_EINTG(2, 0x0C0, "gpv4", 0x10),
+};
+
+/* pin banks of exynos5410 pin-controller 3 */
+static const struct samsung_pin_bank_data exynos5410_pin_banks3[] __initconst = {
+ EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz", 0x00),
+};
+
+/*
+ * Samsung pinctrl driver data for Exynos5410 SoC. Exynos5410 SoC includes
+ * four gpio/pin-mux/pinconfig controllers.
+ */
+const struct samsung_pin_ctrl exynos5410_pin_ctrl[] __initconst = {
+ {
+ /* pin-controller instance 0 data */
+ .pin_banks = exynos5410_pin_banks0,
+ .nr_banks = ARRAY_SIZE(exynos5410_pin_banks0),
+ .eint_gpio_init = exynos_eint_gpio_init,
+ .eint_wkup_init = exynos_eint_wkup_init,
+ .suspend = exynos_pinctrl_suspend,
+ .resume = exynos_pinctrl_resume,
+ }, {
+ /* pin-controller instance 1 data */
+ .pin_banks = exynos5410_pin_banks1,
+ .nr_banks = ARRAY_SIZE(exynos5410_pin_banks1),
+ .eint_gpio_init = exynos_eint_gpio_init,
+ .suspend = exynos_pinctrl_suspend,
+ .resume = exynos_pinctrl_resume,
+ }, {
+ /* pin-controller instance 2 data */
+ .pin_banks = exynos5410_pin_banks2,
+ .nr_banks = ARRAY_SIZE(exynos5410_pin_banks2),
+ .eint_gpio_init = exynos_eint_gpio_init,
+ .suspend = exynos_pinctrl_suspend,
+ .resume = exynos_pinctrl_resume,
+ }, {
+ /* pin-controller instance 3 data */
+ .pin_banks = exynos5410_pin_banks3,
+ .nr_banks = ARRAY_SIZE(exynos5410_pin_banks3),
+ .eint_gpio_init = exynos_eint_gpio_init,
+ .suspend = exynos_pinctrl_suspend,
+ .resume = exynos_pinctrl_resume,
+ },
+};
+
/* pin banks of exynos5420 pin-controller 0 */
static const struct samsung_pin_bank_data exynos5420_pin_banks0[] __initconst = {
EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpy7", 0x00),
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index 3f622ccd8eab..48294e7449a4 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -1222,6 +1222,8 @@ static const struct of_device_id samsung_pinctrl_dt_match[] = {
.data = (void *)exynos5250_pin_ctrl },
{ .compatible = "samsung,exynos5260-pinctrl",
.data = (void *)exynos5260_pin_ctrl },
+ { .compatible = "samsung,exynos5410-pinctrl",
+ .data = (void *)exynos5410_pin_ctrl },
{ .compatible = "samsung,exynos5420-pinctrl",
.data = (void *)exynos5420_pin_ctrl },
{ .compatible = "samsung,exynos5433-pinctrl",
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h
index c1239ff6157d..cd31bfaf62cb 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.h
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.h
@@ -270,6 +270,7 @@ extern const struct samsung_pin_ctrl exynos4x12_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos4415_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos5250_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos5260_pin_ctrl[];
+extern const struct samsung_pin_ctrl exynos5410_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos5420_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos5433_pin_ctrl[];
extern const struct samsung_pin_ctrl exynos7_pin_ctrl[];
diff --git a/drivers/pinctrl/sh-pfc/pfc-emev2.c b/drivers/pinctrl/sh-pfc/pfc-emev2.c
index 02118ab336fc..1cbbe04d7df6 100644
--- a/drivers/pinctrl/sh-pfc/pfc-emev2.c
+++ b/drivers/pinctrl/sh-pfc/pfc-emev2.c
@@ -258,18 +258,18 @@ static const u16 pinmux_data[] = {
/* GPSR0 */
/* V9 */
- PINMUX_DATA(JT_SEL_MARK, FN_JT_SEL),
+ PINMUX_SINGLE(JT_SEL),
/* U9 */
- PINMUX_DATA(ERR_RST_REQB_MARK, FN_ERR_RST_REQB),
+ PINMUX_SINGLE(ERR_RST_REQB),
/* V8 */
- PINMUX_DATA(REF_CLKO_MARK, FN_REF_CLKO),
+ PINMUX_SINGLE(REF_CLKO),
/* U8 */
- PINMUX_DATA(EXT_CLKI_MARK, FN_EXT_CLKI),
+ PINMUX_SINGLE(EXT_CLKI),
/* B22*/
PINMUX_IPSR_NOFN(LCD3_1_0_PORT18, LCD3_PXCLK, SEL_LCD3_1_0_00),
PINMUX_IPSR_NOFN(LCD3_1_0_PORT18, YUV3_CLK_O, SEL_LCD3_1_0_01),
/* C21 */
- PINMUX_DATA(LCD3_PXCLKB_MARK, FN_LCD3_PXCLKB),
+ PINMUX_SINGLE(LCD3_PXCLKB),
/* A21 */
PINMUX_IPSR_NOFN(LCD3_1_0_PORT20, LCD3_CLK_I, SEL_LCD3_1_0_00),
PINMUX_IPSR_NOFN(LCD3_1_0_PORT20, YUV3_CLK_I, SEL_LCD3_1_0_01),
@@ -285,17 +285,17 @@ static const u16 pinmux_data[] = {
/* GPSR1 */
/* A20 */
- PINMUX_DATA(LCD3_R0_MARK, FN_LCD3_R0),
+ PINMUX_SINGLE(LCD3_R0),
/* B20 */
- PINMUX_DATA(LCD3_R1_MARK, FN_LCD3_R1),
+ PINMUX_SINGLE(LCD3_R1),
/* A19 */
- PINMUX_DATA(LCD3_R2_MARK, FN_LCD3_R2),
+ PINMUX_SINGLE(LCD3_R2),
/* B19 */
- PINMUX_DATA(LCD3_R3_MARK, FN_LCD3_R3),
+ PINMUX_SINGLE(LCD3_R3),
/* C19 */
- PINMUX_DATA(LCD3_R4_MARK, FN_LCD3_R4),
+ PINMUX_SINGLE(LCD3_R4),
/* B18 */
- PINMUX_DATA(LCD3_R5_MARK, FN_LCD3_R5),
+ PINMUX_SINGLE(LCD3_R5),
/* C18 */
PINMUX_IPSR_NOFN(LCD3_9_8_PORT38, LCD3_R6, SEL_LCD3_9_8_00),
PINMUX_IPSR_NOFN(LCD3_9_8_PORT38, TP33_CLK, SEL_LCD3_9_8_10),
@@ -367,9 +367,9 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_NOFN(LCD3_11_10_PORT43, YUV3_D15, SEL_LCD3_11_10_01),
PINMUX_IPSR_NOFN(LCD3_11_10_PORT43, TP33_DATA15, SEL_LCD3_11_10_10),
/* AA9 */
- PINMUX_DATA(IIC0_SCL_MARK, FN_IIC0_SCL),
+ PINMUX_SINGLE(IIC0_SCL),
/* AA8 */
- PINMUX_DATA(IIC0_SDA_MARK, FN_IIC0_SDA),
+ PINMUX_SINGLE(IIC0_SDA),
/* Y9 */
PINMUX_IPSR_NOFN(IIC_1_0_PORT46, IIC1_SCL, SEL_IIC_1_0_00),
PINMUX_IPSR_NOFN(IIC_1_0_PORT46, UART3_RX, SEL_IIC_1_0_01),
@@ -377,51 +377,51 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_NOFN(IIC_1_0_PORT47, IIC1_SDA, SEL_IIC_1_0_00),
PINMUX_IPSR_NOFN(IIC_1_0_PORT47, UART3_TX, SEL_IIC_1_0_01),
/* AC19 */
- PINMUX_DATA(SD_CKI_MARK, FN_SD_CKI),
+ PINMUX_SINGLE(SD_CKI),
/* AB18 */
- PINMUX_DATA(SDI0_CKO_MARK, FN_SDI0_CKO),
+ PINMUX_SINGLE(SDI0_CKO),
/* AC18 */
- PINMUX_DATA(SDI0_CKI_MARK, FN_SDI0_CKI),
+ PINMUX_SINGLE(SDI0_CKI),
/* Y12 */
- PINMUX_DATA(SDI0_CMD_MARK, FN_SDI0_CMD),
+ PINMUX_SINGLE(SDI0_CMD),
/* AA13 */
- PINMUX_DATA(SDI0_DATA0_MARK, FN_SDI0_DATA0),
+ PINMUX_SINGLE(SDI0_DATA0),
/* Y13 */
- PINMUX_DATA(SDI0_DATA1_MARK, FN_SDI0_DATA1),
+ PINMUX_SINGLE(SDI0_DATA1),
/* AA14 */
- PINMUX_DATA(SDI0_DATA2_MARK, FN_SDI0_DATA2),
+ PINMUX_SINGLE(SDI0_DATA2),
/* Y14 */
- PINMUX_DATA(SDI0_DATA3_MARK, FN_SDI0_DATA3),
+ PINMUX_SINGLE(SDI0_DATA3),
/* AA15 */
- PINMUX_DATA(SDI0_DATA4_MARK, FN_SDI0_DATA4),
+ PINMUX_SINGLE(SDI0_DATA4),
/* Y15 */
- PINMUX_DATA(SDI0_DATA5_MARK, FN_SDI0_DATA5),
+ PINMUX_SINGLE(SDI0_DATA5),
/* AA16 */
- PINMUX_DATA(SDI0_DATA6_MARK, FN_SDI0_DATA6),
+ PINMUX_SINGLE(SDI0_DATA6),
/* Y16 */
- PINMUX_DATA(SDI0_DATA7_MARK, FN_SDI0_DATA7),
+ PINMUX_SINGLE(SDI0_DATA7),
/* AB22 */
- PINMUX_DATA(SDI1_CKO_MARK, FN_SDI1_CKO),
+ PINMUX_SINGLE(SDI1_CKO),
/* AA23 */
- PINMUX_DATA(SDI1_CKI_MARK, FN_SDI1_CKI),
+ PINMUX_SINGLE(SDI1_CKI),
/* AC21 */
- PINMUX_DATA(SDI1_CMD_MARK, FN_SDI1_CMD),
+ PINMUX_SINGLE(SDI1_CMD),
/* GPSR2 */
/* AB21 */
- PINMUX_DATA(SDI1_DATA0_MARK, FN_SDI1_DATA0),
+ PINMUX_SINGLE(SDI1_DATA0),
/* AB20 */
- PINMUX_DATA(SDI1_DATA1_MARK, FN_SDI1_DATA1),
+ PINMUX_SINGLE(SDI1_DATA1),
/* AB19 */
- PINMUX_DATA(SDI1_DATA2_MARK, FN_SDI1_DATA2),
+ PINMUX_SINGLE(SDI1_DATA2),
/* AA19 */
- PINMUX_DATA(SDI1_DATA3_MARK, FN_SDI1_DATA3),
+ PINMUX_SINGLE(SDI1_DATA3),
/* J23 */
- PINMUX_DATA(AB_CLK_MARK, FN_AB_CLK),
+ PINMUX_SINGLE(AB_CLK),
/* D21 */
- PINMUX_DATA(AB_CSB0_MARK, FN_AB_CSB0),
+ PINMUX_SINGLE(AB_CSB0),
/* E21 */
- PINMUX_DATA(AB_CSB1_MARK, FN_AB_CSB1),
+ PINMUX_SINGLE(AB_CSB1),
/* F20 */
PINMUX_IPSR_NOFN(AB_1_0_PORT71, AB_CSB2, SEL_AB_1_0_00),
PINMUX_IPSR_NOFN(AB_1_0_PORT71, CF_CSB0, SEL_AB_1_0_10),
@@ -514,7 +514,7 @@ static const u16 pinmux_data[] = {
/* GPSR3 */
/* M21 */
- PINMUX_DATA(AB_A20_MARK, FN_AB_A20),
+ PINMUX_SINGLE(AB_A20),
/* N21 */
PINMUX_IPSR_NOFN(AB_9_8_PORT97, AB_A21, SEL_AB_9_8_00),
PINMUX_IPSR_NOFN(AB_9_8_PORT97, SDI2_CKO, SEL_AB_9_8_01),
@@ -541,13 +541,13 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_NOFN(AB_13_12_PORT104, AB_A28, SEL_AB_13_12_00),
PINMUX_IPSR_NOFN(AB_13_12_PORT104, AB_BEN1, SEL_AB_13_12_10),
/* B8 */
- PINMUX_DATA(USI0_CS1_MARK, FN_USI0_CS1),
+ PINMUX_SINGLE(USI0_CS1),
/* B9 */
- PINMUX_DATA(USI0_CS2_MARK, FN_USI0_CS2),
+ PINMUX_SINGLE(USI0_CS2),
/* C10 */
- PINMUX_DATA(USI1_DI_MARK, FN_USI1_DI),
+ PINMUX_SINGLE(USI1_DI),
/* D10 */
- PINMUX_DATA(USI1_DO_MARK, FN_USI1_DO),
+ PINMUX_SINGLE(USI1_DO),
/* AB5 */
PINMUX_IPSR_NOFN(USI_1_0_PORT109, USI2_CLK, SEL_USI_1_0_00),
PINMUX_IPSR_NOFN(USI_1_0_PORT109, DTV_BCLK_B, SEL_USI_1_0_01),
@@ -587,49 +587,49 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_NOFN(USI_9_8_PORT121, PWM1, SEL_USI_9_8_00),
PINMUX_IPSR_NOFN(USI_9_8_PORT121, USI4_DO, SEL_USI_9_8_01),
/* V20 */
- PINMUX_DATA(NTSC_CLK_MARK, FN_NTSC_CLK),
+ PINMUX_SINGLE(NTSC_CLK),
/* P20 */
- PINMUX_DATA(NTSC_DATA0_MARK, FN_NTSC_DATA0),
+ PINMUX_SINGLE(NTSC_DATA0),
/* P18 */
- PINMUX_DATA(NTSC_DATA1_MARK, FN_NTSC_DATA1),
+ PINMUX_SINGLE(NTSC_DATA1),
/* R20 */
- PINMUX_DATA(NTSC_DATA2_MARK, FN_NTSC_DATA2),
+ PINMUX_SINGLE(NTSC_DATA2),
/* R18 */
- PINMUX_DATA(NTSC_DATA3_MARK, FN_NTSC_DATA3),
+ PINMUX_SINGLE(NTSC_DATA3),
/* T20 */
- PINMUX_DATA(NTSC_DATA4_MARK, FN_NTSC_DATA4),
+ PINMUX_SINGLE(NTSC_DATA4),
/* GPRS3 */
/* T18 */
- PINMUX_DATA(NTSC_DATA5_MARK, FN_NTSC_DATA5),
+ PINMUX_SINGLE(NTSC_DATA5),
/* U20 */
- PINMUX_DATA(NTSC_DATA6_MARK, FN_NTSC_DATA6),
+ PINMUX_SINGLE(NTSC_DATA6),
/* U18 */
- PINMUX_DATA(NTSC_DATA7_MARK, FN_NTSC_DATA7),
+ PINMUX_SINGLE(NTSC_DATA7),
/* W23 */
- PINMUX_DATA(CAM_CLKO_MARK, FN_CAM_CLKO),
+ PINMUX_SINGLE(CAM_CLKO),
/* Y23 */
- PINMUX_DATA(CAM_CLKI_MARK, FN_CAM_CLKI),
+ PINMUX_SINGLE(CAM_CLKI),
/* W22 */
- PINMUX_DATA(CAM_VS_MARK, FN_CAM_VS),
+ PINMUX_SINGLE(CAM_VS),
/* V21 */
- PINMUX_DATA(CAM_HS_MARK, FN_CAM_HS),
+ PINMUX_SINGLE(CAM_HS),
/* T21 */
- PINMUX_DATA(CAM_YUV0_MARK, FN_CAM_YUV0),
+ PINMUX_SINGLE(CAM_YUV0),
/* T22 */
- PINMUX_DATA(CAM_YUV1_MARK, FN_CAM_YUV1),
+ PINMUX_SINGLE(CAM_YUV1),
/* T23 */
- PINMUX_DATA(CAM_YUV2_MARK, FN_CAM_YUV2),
+ PINMUX_SINGLE(CAM_YUV2),
/* U21 */
- PINMUX_DATA(CAM_YUV3_MARK, FN_CAM_YUV3),
+ PINMUX_SINGLE(CAM_YUV3),
/* U22 */
- PINMUX_DATA(CAM_YUV4_MARK, FN_CAM_YUV4),
+ PINMUX_SINGLE(CAM_YUV4),
/* U23 */
- PINMUX_DATA(CAM_YUV5_MARK, FN_CAM_YUV5),
+ PINMUX_SINGLE(CAM_YUV5),
/* V22 */
- PINMUX_DATA(CAM_YUV6_MARK, FN_CAM_YUV6),
+ PINMUX_SINGLE(CAM_YUV6),
/* V23 */
- PINMUX_DATA(CAM_YUV7_MARK, FN_CAM_YUV7),
+ PINMUX_SINGLE(CAM_YUV7),
/* K22 */
PINMUX_IPSR_NOFN(HSI_1_0_PORT143, USI5_CLK_B, SEL_HSI_1_0_01),
/* K23 */
@@ -647,17 +647,17 @@ static const u16 pinmux_data[] = {
/* M22 */
PINMUX_IPSR_NOFN(HSI_1_0_PORT150, USI5_DI_B, SEL_HSI_1_0_01),
/* D13 */
- PINMUX_DATA(JT_TDO_MARK, FN_JT_TDO),
+ PINMUX_SINGLE(JT_TDO),
/* F13 */
- PINMUX_DATA(JT_TDOEN_MARK, FN_JT_TDOEN),
+ PINMUX_SINGLE(JT_TDOEN),
/* AA12 */
- PINMUX_DATA(USB_VBUS_MARK, FN_USB_VBUS),
+ PINMUX_SINGLE(USB_VBUS),
/* A12 */
- PINMUX_DATA(LOWPWR_MARK, FN_LOWPWR),
+ PINMUX_SINGLE(LOWPWR),
/* Y11 */
- PINMUX_DATA(UART1_RX_MARK, FN_UART1_RX),
+ PINMUX_SINGLE(UART1_RX),
/* Y10 */
- PINMUX_DATA(UART1_TX_MARK, FN_UART1_TX),
+ PINMUX_SINGLE(UART1_TX),
/* AA10 */
PINMUX_IPSR_NOFN(UART_1_0_PORT157, UART1_CTSB, SEL_UART_1_0_00),
PINMUX_IPSR_NOFN(UART_1_0_PORT157, UART2_RX, SEL_UART_1_0_01),
@@ -749,7 +749,7 @@ static const unsigned int cf_ctrl_mux[] = {
};
static const unsigned int cf_data8_pins[] = {
- /* CF_D[0:8] */
+ /* CF_D[0:7] */
77, 78, 79, 80,
81, 82, 83, 84,
};
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
index 279e9dd442e4..7f7c8a6e76e8 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
@@ -2214,7 +2214,7 @@ static const unsigned int lcd1_data9_mux[] = {
LCD1_D8_MARK,
};
static const unsigned int lcd1_data12_pins[] = {
- /* D[0:12] */
+ /* D[0:11] */
4, 3, 2, 1, 0, 91, 92, 23,
93, 94, 21, 201,
};
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c
index bbd35dc1a0c4..ad09a670c2ff 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c
@@ -548,17 +548,17 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(PENC0_MARK, FN_PENC0),
- PINMUX_DATA(PENC1_MARK, FN_PENC1),
- PINMUX_DATA(A1_MARK, FN_A1),
- PINMUX_DATA(A2_MARK, FN_A2),
- PINMUX_DATA(A3_MARK, FN_A3),
- PINMUX_DATA(WE0_MARK, FN_WE0),
- PINMUX_DATA(AUDIO_CLKA_MARK, FN_AUDIO_CLKA),
- PINMUX_DATA(AUDIO_CLKB_MARK, FN_AUDIO_CLKB),
- PINMUX_DATA(SSI_SCK34_MARK, FN_SSI_SCK34),
- PINMUX_DATA(AVS1_MARK, FN_AVS1),
- PINMUX_DATA(AVS2_MARK, FN_AVS2),
+ PINMUX_SINGLE(PENC0),
+ PINMUX_SINGLE(PENC1),
+ PINMUX_SINGLE(A1),
+ PINMUX_SINGLE(A2),
+ PINMUX_SINGLE(A3),
+ PINMUX_SINGLE(WE0),
+ PINMUX_SINGLE(AUDIO_CLKA),
+ PINMUX_SINGLE(AUDIO_CLKB),
+ PINMUX_SINGLE(SSI_SCK34),
+ PINMUX_SINGLE(AVS1),
+ PINMUX_SINGLE(AVS2),
/* IPSR0 */
PINMUX_IPSR_DATA(IP0_1_0, PRESETOUT),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
index ed4e0788035c..bd17eccb6a89 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
@@ -23,13 +23,6 @@
#include "sh_pfc.h"
-#define PORT_GP_9(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
PORT_GP_32(0, fn, sfx), \
PORT_GP_32(1, fn, sfx), \
@@ -609,14 +602,14 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(AVS1_MARK, FN_AVS1),
- PINMUX_DATA(AVS1_MARK, FN_AVS1),
- PINMUX_DATA(A17_MARK, FN_A17),
- PINMUX_DATA(A18_MARK, FN_A18),
- PINMUX_DATA(A19_MARK, FN_A19),
+ PINMUX_SINGLE(AVS1),
+ PINMUX_SINGLE(AVS1),
+ PINMUX_SINGLE(A17),
+ PINMUX_SINGLE(A18),
+ PINMUX_SINGLE(A19),
- PINMUX_DATA(USB_PENC0_MARK, FN_USB_PENC0),
- PINMUX_DATA(USB_PENC1_MARK, FN_USB_PENC1),
+ PINMUX_SINGLE(USB_PENC0),
+ PINMUX_SINGLE(USB_PENC1),
PINMUX_IPSR_DATA(IP0_2_0, USB_PENC2),
PINMUX_IPSR_MSEL(IP0_2_0, SCK0, SEL_SCIF0_0),
@@ -2289,6 +2282,35 @@ static const unsigned int scif5_clk_d_pins[] = {
static const unsigned int scif5_clk_d_mux[] = {
SCK5_D_MARK,
};
+/* - SCIF Clock ------------------------------------------------------------- */
+static const unsigned int scif_clk_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(4, 28),
+};
+static const unsigned int scif_clk_mux[] = {
+ SCIF_CLK_MARK,
+};
+static const unsigned int scif_clk_b_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(4, 5),
+};
+static const unsigned int scif_clk_b_mux[] = {
+ SCIF_CLK_B_MARK,
+};
+static const unsigned int scif_clk_c_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(4, 18),
+};
+static const unsigned int scif_clk_c_mux[] = {
+ SCIF_CLK_C_MARK,
+};
+static const unsigned int scif_clk_d_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(2, 29),
+};
+static const unsigned int scif_clk_d_mux[] = {
+ SCIF_CLK_D_MARK,
+};
/* - SDHI0 ------------------------------------------------------------------ */
static const unsigned int sdhi0_data1_pins[] = {
/* D0 */
@@ -2700,6 +2722,10 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(scif5_clk_c),
SH_PFC_PIN_GROUP(scif5_data_d),
SH_PFC_PIN_GROUP(scif5_clk_d),
+ SH_PFC_PIN_GROUP(scif_clk),
+ SH_PFC_PIN_GROUP(scif_clk_b),
+ SH_PFC_PIN_GROUP(scif_clk_c),
+ SH_PFC_PIN_GROUP(scif_clk_d),
SH_PFC_PIN_GROUP(sdhi0_data1),
SH_PFC_PIN_GROUP(sdhi0_data4),
SH_PFC_PIN_GROUP(sdhi0_ctrl),
@@ -2909,6 +2935,13 @@ static const char * const scif5_groups[] = {
"scif5_clk_d",
};
+static const char * const scif_clk_groups[] = {
+ "scif_clk",
+ "scif_clk_b",
+ "scif_clk_c",
+ "scif_clk_d",
+};
+
static const char * const sdhi0_groups[] = {
"sdhi0_data1",
"sdhi0_data4",
@@ -3004,6 +3037,7 @@ static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(scif3),
SH_PFC_FUNCTION(scif4),
SH_PFC_FUNCTION(scif5),
+ SH_PFC_FUNCTION(scif_clk),
SH_PFC_FUNCTION(usb0),
SH_PFC_FUNCTION(usb1),
SH_PFC_FUNCTION(usb2),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
index d9924b0d53b7..a8b629bc7a55 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7790.c
@@ -26,23 +26,6 @@
#include "core.h"
#include "sh_pfc.h"
-#define PORT_GP_30(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \
- PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx), \
- PORT_GP_1(bank, 12, fn, sfx), PORT_GP_1(bank, 13, fn, sfx), \
- PORT_GP_1(bank, 14, fn, sfx), PORT_GP_1(bank, 15, fn, sfx), \
- PORT_GP_1(bank, 16, fn, sfx), PORT_GP_1(bank, 17, fn, sfx), \
- PORT_GP_1(bank, 18, fn, sfx), PORT_GP_1(bank, 19, fn, sfx), \
- PORT_GP_1(bank, 20, fn, sfx), PORT_GP_1(bank, 21, fn, sfx), \
- PORT_GP_1(bank, 22, fn, sfx), PORT_GP_1(bank, 23, fn, sfx), \
- PORT_GP_1(bank, 24, fn, sfx), PORT_GP_1(bank, 25, fn, sfx), \
- PORT_GP_1(bank, 26, fn, sfx), PORT_GP_1(bank, 27, fn, sfx), \
- PORT_GP_1(bank, 28, fn, sfx), PORT_GP_1(bank, 29, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
PORT_GP_32(0, fn, sfx), \
PORT_GP_30(1, fn, sfx), \
@@ -806,15 +789,15 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(VI1_DATA7_VI1_B7_MARK, FN_VI1_DATA7_VI1_B7),
- PINMUX_DATA(USB0_PWEN_MARK, FN_USB0_PWEN),
- PINMUX_DATA(USB0_OVC_VBUS_MARK, FN_USB0_OVC_VBUS),
- PINMUX_DATA(USB2_PWEN_MARK, FN_USB2_PWEN),
- PINMUX_DATA(USB2_OVC_MARK, FN_USB2_OVC),
- PINMUX_DATA(AVS1_MARK, FN_AVS1),
- PINMUX_DATA(AVS2_MARK, FN_AVS2),
- PINMUX_DATA(DU_DOTCLKIN0_MARK, FN_DU_DOTCLKIN0),
- PINMUX_DATA(DU_DOTCLKIN2_MARK, FN_DU_DOTCLKIN2),
+ PINMUX_SINGLE(VI1_DATA7_VI1_B7),
+ PINMUX_SINGLE(USB0_PWEN),
+ PINMUX_SINGLE(USB0_OVC_VBUS),
+ PINMUX_SINGLE(USB2_PWEN),
+ PINMUX_SINGLE(USB2_OVC),
+ PINMUX_SINGLE(AVS1),
+ PINMUX_SINGLE(AVS2),
+ PINMUX_SINGLE(DU_DOTCLKIN0),
+ PINMUX_SINGLE(DU_DOTCLKIN2),
PINMUX_IPSR_DATA(IP0_2_0, D0),
PINMUX_IPSR_MSEL(IP0_2_0, MSIOF3_SCK_B, SEL_SOF3_1),
@@ -3236,6 +3219,21 @@ static const unsigned int scifb2_data_c_pins[] = {
static const unsigned int scifb2_data_c_mux[] = {
SCIFB2_RXD_C_MARK, SCIFB2_TXD_C_MARK,
};
+/* - SCIF Clock ------------------------------------------------------------- */
+static const unsigned int scif_clk_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(4, 26),
+};
+static const unsigned int scif_clk_mux[] = {
+ SCIF_CLK_MARK,
+};
+static const unsigned int scif_clk_b_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(5, 4),
+};
+static const unsigned int scif_clk_b_mux[] = {
+ SCIF_CLK_B_MARK,
+};
/* - SDHI0 ------------------------------------------------------------------ */
static const unsigned int sdhi0_data1_pins[] = {
/* D0 */
@@ -4139,6 +4137,8 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(scifb2_clk_b),
SH_PFC_PIN_GROUP(scifb2_ctrl_b),
SH_PFC_PIN_GROUP(scifb2_data_c),
+ SH_PFC_PIN_GROUP(scif_clk),
+ SH_PFC_PIN_GROUP(scif_clk_b),
SH_PFC_PIN_GROUP(sdhi0_data1),
SH_PFC_PIN_GROUP(sdhi0_data4),
SH_PFC_PIN_GROUP(sdhi0_ctrl),
@@ -4555,6 +4555,11 @@ static const char * const scifb2_groups[] = {
"scifb2_data_c",
};
+static const char * const scif_clk_groups[] = {
+ "scif_clk",
+ "scif_clk_b",
+};
+
static const char * const sdhi0_groups[] = {
"sdhi0_data1",
"sdhi0_data4",
@@ -4729,6 +4734,7 @@ static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(scifb0),
SH_PFC_FUNCTION(scifb1),
SH_PFC_FUNCTION(scifb2),
+ SH_PFC_FUNCTION(scif_clk),
SH_PFC_FUNCTION(sdhi0),
SH_PFC_FUNCTION(sdhi1),
SH_PFC_FUNCTION(sdhi2),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c
index 87a4f44147c1..4cfbb94ad5d0 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c
@@ -2,6 +2,7 @@
* r8a7791 processor support - PFC hardware block.
*
* Copyright (C) 2013 Renesas Electronics Corporation
+ * Copyright (C) 2014-2015 Cogent Embedded, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
@@ -13,21 +14,6 @@
#include "core.h"
#include "sh_pfc.h"
-#define PORT_GP_26(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \
- PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx), \
- PORT_GP_1(bank, 12, fn, sfx), PORT_GP_1(bank, 13, fn, sfx), \
- PORT_GP_1(bank, 14, fn, sfx), PORT_GP_1(bank, 15, fn, sfx), \
- PORT_GP_1(bank, 16, fn, sfx), PORT_GP_1(bank, 17, fn, sfx), \
- PORT_GP_1(bank, 18, fn, sfx), PORT_GP_1(bank, 19, fn, sfx), \
- PORT_GP_1(bank, 20, fn, sfx), PORT_GP_1(bank, 21, fn, sfx), \
- PORT_GP_1(bank, 22, fn, sfx), PORT_GP_1(bank, 23, fn, sfx), \
- PORT_GP_1(bank, 24, fn, sfx), PORT_GP_1(bank, 25, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
PORT_GP_32(0, fn, sfx), \
PORT_GP_26(1, fn, sfx), \
@@ -787,23 +773,23 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(EX_CS0_N_MARK, FN_EX_CS0_N),
- PINMUX_DATA(RD_N_MARK, FN_RD_N),
- PINMUX_DATA(AUDIO_CLKA_MARK, FN_AUDIO_CLKA),
- PINMUX_DATA(VI0_CLK_MARK, FN_VI0_CLK),
- PINMUX_DATA(VI0_DATA0_VI0_B0_MARK, FN_VI0_DATA0_VI0_B0),
- PINMUX_DATA(VI0_DATA1_VI0_B1_MARK, FN_VI0_DATA1_VI0_B1),
- PINMUX_DATA(VI0_DATA2_VI0_B2_MARK, FN_VI0_DATA2_VI0_B2),
- PINMUX_DATA(VI0_DATA4_VI0_B4_MARK, FN_VI0_DATA4_VI0_B4),
- PINMUX_DATA(VI0_DATA5_VI0_B5_MARK, FN_VI0_DATA5_VI0_B5),
- PINMUX_DATA(VI0_DATA6_VI0_B6_MARK, FN_VI0_DATA6_VI0_B6),
- PINMUX_DATA(VI0_DATA7_VI0_B7_MARK, FN_VI0_DATA7_VI0_B7),
- PINMUX_DATA(USB0_PWEN_MARK, FN_USB0_PWEN),
- PINMUX_DATA(USB0_OVC_MARK, FN_USB0_OVC),
- PINMUX_DATA(USB1_PWEN_MARK, FN_USB1_PWEN),
- PINMUX_DATA(USB1_OVC_MARK, FN_USB1_OVC),
- PINMUX_DATA(DU0_DOTCLKIN_MARK, FN_DU0_DOTCLKIN),
- PINMUX_DATA(SD1_CLK_MARK, FN_SD1_CLK),
+ PINMUX_SINGLE(EX_CS0_N),
+ PINMUX_SINGLE(RD_N),
+ PINMUX_SINGLE(AUDIO_CLKA),
+ PINMUX_SINGLE(VI0_CLK),
+ PINMUX_SINGLE(VI0_DATA0_VI0_B0),
+ PINMUX_SINGLE(VI0_DATA1_VI0_B1),
+ PINMUX_SINGLE(VI0_DATA2_VI0_B2),
+ PINMUX_SINGLE(VI0_DATA4_VI0_B4),
+ PINMUX_SINGLE(VI0_DATA5_VI0_B5),
+ PINMUX_SINGLE(VI0_DATA6_VI0_B6),
+ PINMUX_SINGLE(VI0_DATA7_VI0_B7),
+ PINMUX_SINGLE(USB0_PWEN),
+ PINMUX_SINGLE(USB0_OVC),
+ PINMUX_SINGLE(USB1_PWEN),
+ PINMUX_SINGLE(USB1_OVC),
+ PINMUX_SINGLE(DU0_DOTCLKIN),
+ PINMUX_SINGLE(SD1_CLK),
/* IPSR0 */
PINMUX_IPSR_DATA(IP0_0, D0),
@@ -1740,6 +1726,82 @@ static const unsigned int audio_clkout_mux[] = {
AUDIO_CLKOUT_MARK,
};
+/* - AVB -------------------------------------------------------------------- */
+static const unsigned int avb_link_pins[] = {
+ RCAR_GP_PIN(5, 14),
+};
+static const unsigned int avb_link_mux[] = {
+ AVB_LINK_MARK,
+};
+static const unsigned int avb_magic_pins[] = {
+ RCAR_GP_PIN(5, 11),
+};
+static const unsigned int avb_magic_mux[] = {
+ AVB_MAGIC_MARK,
+};
+static const unsigned int avb_phy_int_pins[] = {
+ RCAR_GP_PIN(5, 16),
+};
+static const unsigned int avb_phy_int_mux[] = {
+ AVB_PHY_INT_MARK,
+};
+static const unsigned int avb_mdio_pins[] = {
+ RCAR_GP_PIN(5, 12), RCAR_GP_PIN(5, 9),
+};
+static const unsigned int avb_mdio_mux[] = {
+ AVB_MDC_MARK, AVB_MDIO_MARK,
+};
+static const unsigned int avb_mii_pins[] = {
+ RCAR_GP_PIN(5, 18), RCAR_GP_PIN(5, 19), RCAR_GP_PIN(5, 20),
+ RCAR_GP_PIN(5, 21),
+
+ RCAR_GP_PIN(5, 0), RCAR_GP_PIN(5, 1), RCAR_GP_PIN(5, 2),
+ RCAR_GP_PIN(5, 3),
+
+ RCAR_GP_PIN(5, 8), RCAR_GP_PIN(5, 13), RCAR_GP_PIN(5, 10),
+ RCAR_GP_PIN(5, 15), RCAR_GP_PIN(5, 26), RCAR_GP_PIN(5, 27),
+ RCAR_GP_PIN(5, 28), RCAR_GP_PIN(5, 29),
+};
+static const unsigned int avb_mii_mux[] = {
+ AVB_TXD0_MARK, AVB_TXD1_MARK, AVB_TXD2_MARK,
+ AVB_TXD3_MARK,
+
+ AVB_RXD0_MARK, AVB_RXD1_MARK, AVB_RXD2_MARK,
+ AVB_RXD3_MARK,
+
+ AVB_RX_ER_MARK, AVB_RX_CLK_MARK, AVB_RX_DV_MARK,
+ AVB_CRS_MARK, AVB_TX_EN_MARK, AVB_TX_ER_MARK,
+ AVB_TX_CLK_MARK, AVB_COL_MARK,
+};
+static const unsigned int avb_gmii_pins[] = {
+ RCAR_GP_PIN(5, 18), RCAR_GP_PIN(5, 19), RCAR_GP_PIN(5, 20),
+ RCAR_GP_PIN(5, 21), RCAR_GP_PIN(5, 22), RCAR_GP_PIN(5, 23),
+ RCAR_GP_PIN(5, 24), RCAR_GP_PIN(5, 25),
+
+ RCAR_GP_PIN(5, 0), RCAR_GP_PIN(5, 1), RCAR_GP_PIN(5, 2),
+ RCAR_GP_PIN(5, 3), RCAR_GP_PIN(5, 4), RCAR_GP_PIN(5, 5),
+ RCAR_GP_PIN(5, 6), RCAR_GP_PIN(5, 7),
+
+ RCAR_GP_PIN(5, 8), RCAR_GP_PIN(5, 13), RCAR_GP_PIN(5, 10),
+ RCAR_GP_PIN(5, 15), RCAR_GP_PIN(5, 30), RCAR_GP_PIN(5, 17),
+ RCAR_GP_PIN(5, 26), RCAR_GP_PIN(5, 27), RCAR_GP_PIN(5, 28),
+ RCAR_GP_PIN(5, 29),
+};
+static const unsigned int avb_gmii_mux[] = {
+ AVB_TXD0_MARK, AVB_TXD1_MARK, AVB_TXD2_MARK,
+ AVB_TXD3_MARK, AVB_TXD4_MARK, AVB_TXD5_MARK,
+ AVB_TXD6_MARK, AVB_TXD7_MARK,
+
+ AVB_RXD0_MARK, AVB_RXD1_MARK, AVB_RXD2_MARK,
+ AVB_RXD3_MARK, AVB_RXD4_MARK, AVB_RXD5_MARK,
+ AVB_RXD6_MARK, AVB_RXD7_MARK,
+
+ AVB_RX_ER_MARK, AVB_RX_CLK_MARK, AVB_RX_DV_MARK,
+ AVB_CRS_MARK, AVB_GTX_CLK_MARK, AVB_GTXREFCLK_MARK,
+ AVB_TX_EN_MARK, AVB_TX_ER_MARK, AVB_TX_CLK_MARK,
+ AVB_COL_MARK,
+};
+
/* - CAN -------------------------------------------------------------------- */
static const unsigned int can0_data_pins[] = {
@@ -3602,6 +3664,23 @@ static const unsigned int scifb2_data_d_pins[] = {
static const unsigned int scifb2_data_d_mux[] = {
SCIFB2_RXD_D_MARK, SCIFB2_TXD_D_MARK,
};
+
+/* - SCIF Clock ------------------------------------------------------------- */
+static const unsigned int scif_clk_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(2, 29),
+};
+static const unsigned int scif_clk_mux[] = {
+ SCIF_CLK_MARK,
+};
+static const unsigned int scif_clk_b_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(7, 19),
+};
+static const unsigned int scif_clk_b_mux[] = {
+ SCIF_CLK_B_MARK,
+};
+
/* - SDHI0 ------------------------------------------------------------------ */
static const unsigned int sdhi0_data1_pins[] = {
/* D0 */
@@ -4258,6 +4337,12 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(audio_clk_b_b),
SH_PFC_PIN_GROUP(audio_clk_c),
SH_PFC_PIN_GROUP(audio_clkout),
+ SH_PFC_PIN_GROUP(avb_link),
+ SH_PFC_PIN_GROUP(avb_magic),
+ SH_PFC_PIN_GROUP(avb_phy_int),
+ SH_PFC_PIN_GROUP(avb_mdio),
+ SH_PFC_PIN_GROUP(avb_mii),
+ SH_PFC_PIN_GROUP(avb_gmii),
SH_PFC_PIN_GROUP(can0_data),
SH_PFC_PIN_GROUP(can0_data_b),
SH_PFC_PIN_GROUP(can0_data_c),
@@ -4510,6 +4595,8 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(scifb2_data_c),
SH_PFC_PIN_GROUP(scifb2_clk_c),
SH_PFC_PIN_GROUP(scifb2_data_d),
+ SH_PFC_PIN_GROUP(scif_clk),
+ SH_PFC_PIN_GROUP(scif_clk_b),
SH_PFC_PIN_GROUP(sdhi0_data1),
SH_PFC_PIN_GROUP(sdhi0_data4),
SH_PFC_PIN_GROUP(sdhi0_ctrl),
@@ -4597,6 +4684,15 @@ static const char * const audio_clk_groups[] = {
"audio_clkout",
};
+static const char * const avb_groups[] = {
+ "avb_link",
+ "avb_magic",
+ "avb_phy_int",
+ "avb_mdio",
+ "avb_mii",
+ "avb_gmii",
+};
+
static const char * const can0_groups[] = {
"can0_data",
"can0_data_b",
@@ -4976,6 +5072,11 @@ static const char * const scifb2_groups[] = {
"scifb2_data_d",
};
+static const char * const scif_clk_groups[] = {
+ "scif_clk",
+ "scif_clk_b",
+};
+
static const char * const sdhi0_groups[] = {
"sdhi0_data1",
"sdhi0_data4",
@@ -5081,6 +5182,7 @@ static const char * const vin2_groups[] = {
static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(audio_clk),
+ SH_PFC_FUNCTION(avb),
SH_PFC_FUNCTION(can0),
SH_PFC_FUNCTION(can1),
SH_PFC_FUNCTION(du),
@@ -5126,6 +5228,7 @@ static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(scifb0),
SH_PFC_FUNCTION(scifb1),
SH_PFC_FUNCTION(scifb2),
+ SH_PFC_FUNCTION(scif_clk),
SH_PFC_FUNCTION(sdhi0),
SH_PFC_FUNCTION(sdhi1),
SH_PFC_FUNCTION(sdhi2),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
index 086f6798b129..3718c7846bfd 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c
@@ -15,25 +15,6 @@
#include "core.h"
#include "sh_pfc.h"
-#define PORT_GP_26(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \
- PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx), \
- PORT_GP_1(bank, 12, fn, sfx), PORT_GP_1(bank, 13, fn, sfx), \
- PORT_GP_1(bank, 14, fn, sfx), PORT_GP_1(bank, 15, fn, sfx), \
- PORT_GP_1(bank, 16, fn, sfx), PORT_GP_1(bank, 17, fn, sfx), \
- PORT_GP_1(bank, 18, fn, sfx), PORT_GP_1(bank, 19, fn, sfx), \
- PORT_GP_1(bank, 20, fn, sfx), PORT_GP_1(bank, 21, fn, sfx), \
- PORT_GP_1(bank, 22, fn, sfx), PORT_GP_1(bank, 23, fn, sfx), \
- PORT_GP_1(bank, 24, fn, sfx), PORT_GP_1(bank, 25, fn, sfx)
-
-#define PORT_GP_28(bank, fn, sfx) \
- PORT_GP_26(bank, fn, sfx), \
- PORT_GP_1(bank, 26, fn, sfx), PORT_GP_1(bank, 27, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
PORT_GP_32(0, fn, sfx), \
PORT_GP_26(1, fn, sfx), \
@@ -618,28 +599,28 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(A2_MARK, FN_A2),
- PINMUX_DATA(WE0_N_MARK, FN_WE0_N),
- PINMUX_DATA(WE1_N_MARK, FN_WE1_N),
- PINMUX_DATA(DACK0_MARK, FN_DACK0),
- PINMUX_DATA(USB0_PWEN_MARK, FN_USB0_PWEN),
- PINMUX_DATA(USB0_OVC_MARK, FN_USB0_OVC),
- PINMUX_DATA(USB1_PWEN_MARK, FN_USB1_PWEN),
- PINMUX_DATA(USB1_OVC_MARK, FN_USB1_OVC),
- PINMUX_DATA(SD0_CLK_MARK, FN_SD0_CLK),
- PINMUX_DATA(SD0_CMD_MARK, FN_SD0_CMD),
- PINMUX_DATA(SD0_DATA0_MARK, FN_SD0_DATA0),
- PINMUX_DATA(SD0_DATA1_MARK, FN_SD0_DATA1),
- PINMUX_DATA(SD0_DATA2_MARK, FN_SD0_DATA2),
- PINMUX_DATA(SD0_DATA3_MARK, FN_SD0_DATA3),
- PINMUX_DATA(SD0_CD_MARK, FN_SD0_CD),
- PINMUX_DATA(SD0_WP_MARK, FN_SD0_WP),
- PINMUX_DATA(SD1_CLK_MARK, FN_SD1_CLK),
- PINMUX_DATA(SD1_CMD_MARK, FN_SD1_CMD),
- PINMUX_DATA(SD1_DATA0_MARK, FN_SD1_DATA0),
- PINMUX_DATA(SD1_DATA1_MARK, FN_SD1_DATA1),
- PINMUX_DATA(SD1_DATA2_MARK, FN_SD1_DATA2),
- PINMUX_DATA(SD1_DATA3_MARK, FN_SD1_DATA3),
+ PINMUX_SINGLE(A2),
+ PINMUX_SINGLE(WE0_N),
+ PINMUX_SINGLE(WE1_N),
+ PINMUX_SINGLE(DACK0),
+ PINMUX_SINGLE(USB0_PWEN),
+ PINMUX_SINGLE(USB0_OVC),
+ PINMUX_SINGLE(USB1_PWEN),
+ PINMUX_SINGLE(USB1_OVC),
+ PINMUX_SINGLE(SD0_CLK),
+ PINMUX_SINGLE(SD0_CMD),
+ PINMUX_SINGLE(SD0_DATA0),
+ PINMUX_SINGLE(SD0_DATA1),
+ PINMUX_SINGLE(SD0_DATA2),
+ PINMUX_SINGLE(SD0_DATA3),
+ PINMUX_SINGLE(SD0_CD),
+ PINMUX_SINGLE(SD0_WP),
+ PINMUX_SINGLE(SD1_CLK),
+ PINMUX_SINGLE(SD1_CMD),
+ PINMUX_SINGLE(SD1_DATA0),
+ PINMUX_SINGLE(SD1_DATA1),
+ PINMUX_SINGLE(SD1_DATA2),
+ PINMUX_SINGLE(SD1_DATA3),
/* IPSR0 */
PINMUX_IPSR_DATA(IP0_0, SD1_CD),
@@ -2644,6 +2625,21 @@ static const unsigned int scifb2_ctrl_pins[] = {
static const unsigned int scifb2_ctrl_mux[] = {
SCIFB2_RTS_N_MARK, SCIFB2_CTS_N_MARK,
};
+/* - SCIF Clock ------------------------------------------------------------- */
+static const unsigned int scif_clk_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(1, 23),
+};
+static const unsigned int scif_clk_mux[] = {
+ SCIF_CLK_MARK,
+};
+static const unsigned int scif_clk_b_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(3, 29),
+};
+static const unsigned int scif_clk_b_mux[] = {
+ SCIF_CLK_B_MARK,
+};
/* - SDHI0 ------------------------------------------------------------------ */
static const unsigned int sdhi0_data1_pins[] = {
/* D0 */
@@ -3071,6 +3067,8 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(scifb2_data),
SH_PFC_PIN_GROUP(scifb2_clk),
SH_PFC_PIN_GROUP(scifb2_ctrl),
+ SH_PFC_PIN_GROUP(scif_clk),
+ SH_PFC_PIN_GROUP(scif_clk_b),
SH_PFC_PIN_GROUP(sdhi0_data1),
SH_PFC_PIN_GROUP(sdhi0_data4),
SH_PFC_PIN_GROUP(sdhi0_ctrl),
@@ -3354,6 +3352,11 @@ static const char * const scifb2_groups[] = {
"scifb2_ctrl",
};
+static const char * const scif_clk_groups[] = {
+ "scif_clk",
+ "scif_clk_b",
+};
+
static const char * const sdhi0_groups[] = {
"sdhi0_data1",
"sdhi0_data4",
@@ -3441,6 +3444,7 @@ static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(scifb0),
SH_PFC_FUNCTION(scifb1),
SH_PFC_FUNCTION(scifb2),
+ SH_PFC_FUNCTION(scif_clk),
SH_PFC_FUNCTION(sdhi0),
SH_PFC_FUNCTION(sdhi1),
SH_PFC_FUNCTION(sdhi2),
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
index 7ddb2adfc5a5..ce4f5cdb0579 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7795.c
@@ -13,46 +13,15 @@
#include "core.h"
#include "sh_pfc.h"
-#define PORT_GP_3(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx)
-
-#define PORT_GP_14(bank, fn, sfx) \
- PORT_GP_3(bank, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \
- PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx), \
- PORT_GP_1(bank, 12, fn, sfx), PORT_GP_1(bank, 13, fn, sfx), \
- PORT_GP_1(bank, 14, fn, sfx)
-
-#define PORT_GP_15(bank, fn, sfx) \
- PORT_GP_14(bank, fn, sfx), PORT_GP_1(bank, 15, fn, sfx)
-
-#define PORT_GP_17(bank, fn, sfx) \
- PORT_GP_15(bank, fn, sfx), \
- PORT_GP_1(bank, 16, fn, sfx), PORT_GP_1(bank, 17, fn, sfx)
-
-#define PORT_GP_25(bank, fn, sfx) \
- PORT_GP_17(bank, fn, sfx), \
- PORT_GP_1(bank, 18, fn, sfx), PORT_GP_1(bank, 19, fn, sfx), \
- PORT_GP_1(bank, 20, fn, sfx), PORT_GP_1(bank, 21, fn, sfx), \
- PORT_GP_1(bank, 22, fn, sfx), PORT_GP_1(bank, 23, fn, sfx), \
- PORT_GP_1(bank, 24, fn, sfx), PORT_GP_1(bank, 25, fn, sfx)
-
-#define PORT_GP_27(bank, fn, sfx) \
- PORT_GP_25(bank, fn, sfx), \
- PORT_GP_1(bank, 26, fn, sfx), PORT_GP_1(bank, 27, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
- PORT_GP_15(0, fn, sfx), \
- PORT_GP_27(1, fn, sfx), \
- PORT_GP_14(2, fn, sfx), \
- PORT_GP_15(3, fn, sfx), \
- PORT_GP_17(4, fn, sfx), \
- PORT_GP_25(5, fn, sfx), \
+ PORT_GP_16(0, fn, sfx), \
+ PORT_GP_28(1, fn, sfx), \
+ PORT_GP_15(2, fn, sfx), \
+ PORT_GP_16(3, fn, sfx), \
+ PORT_GP_18(4, fn, sfx), \
+ PORT_GP_26(5, fn, sfx), \
PORT_GP_32(6, fn, sfx), \
- PORT_GP_3(7, fn, sfx)
+ PORT_GP_4(7, fn, sfx)
/*
* F_() : just information
* FM() : macro for FN_xxx / xxx_MARK
@@ -495,7 +464,7 @@ FM(IP16_31_28) IP16_31_28
#define MOD_SEL1_13 FM(SEL_SCIF3_0) FM(SEL_SCIF3_1)
#define MOD_SEL1_12 FM(SEL_SCIF2_0) FM(SEL_SCIF2_1)
#define MOD_SEL1_11 FM(SEL_SCIF1_0) FM(SEL_SCIF1_1)
-#define MOD_SEL1_10 FM(SEL_SCIF_0) FM(SEL_SCIF_1)
+#define MOD_SEL1_10 FM(SEL_SATA_0) FM(SEL_SATA_1)
#define MOD_SEL1_9 FM(SEL_REMOCON_0) FM(SEL_REMOCON_1)
#define MOD_SEL1_6 FM(SEL_RCAN0_0) FM(SEL_RCAN0_1)
#define MOD_SEL1_5 FM(SEL_PWM6_0) FM(SEL_PWM6_1)
@@ -580,6 +549,25 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(),
+ PINMUX_SINGLE(AVS1),
+ PINMUX_SINGLE(AVS2),
+ PINMUX_SINGLE(HDMI0_CEC),
+ PINMUX_SINGLE(HDMI1_CEC),
+ PINMUX_SINGLE(MSIOF0_RXD),
+ PINMUX_SINGLE(MSIOF0_SCK),
+ PINMUX_SINGLE(MSIOF0_TXD),
+ PINMUX_SINGLE(SD2_CMD),
+ PINMUX_SINGLE(SD3_CLK),
+ PINMUX_SINGLE(SD3_CMD),
+ PINMUX_SINGLE(SD3_DAT0),
+ PINMUX_SINGLE(SD3_DAT1),
+ PINMUX_SINGLE(SD3_DAT2),
+ PINMUX_SINGLE(SD3_DAT3),
+ PINMUX_SINGLE(SD3_DS),
+ PINMUX_SINGLE(SSI_SCK5),
+ PINMUX_SINGLE(SSI_SDATA5),
+ PINMUX_SINGLE(SSI_WS5),
+
/* IPSR0 */
PINMUX_IPSR_DATA(IP0_3_0, AVB_MDC),
PINMUX_IPSR_MSEL(IP0_3_0, MSIOF2_SS2_C, SEL_MSIOF2_2),
@@ -1033,7 +1021,7 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_DATA(IP9_19_16, SD2_DAT3),
PINMUX_IPSR_DATA(IP9_23_20, SD2_DS),
- PINMUX_IPSR_MSEL(IP9_23_20, SATA_DEVSLP_B, SEL_SCIF_1),
+ PINMUX_IPSR_MSEL(IP9_23_20, SATA_DEVSLP_B, SEL_SATA_1),
PINMUX_IPSR_DATA(IP9_27_24, SD3_DAT4),
PINMUX_IPSR_MSEL(IP9_27_24, SD2_CD_A, SEL_SDHI2_0),
@@ -1293,7 +1281,7 @@ static const u16 pinmux_data[] = {
PINMUX_IPSR_DATA(IP15_11_8, SSI_SDATA6),
PINMUX_IPSR_MSEL(IP15_11_8, SIM0_CLK_D, SEL_SIMCARD_3),
- PINMUX_IPSR_MSEL(IP15_11_8, SATA_DEVSLP_A, SEL_SCIF_0),
+ PINMUX_IPSR_MSEL(IP15_11_8, SATA_DEVSLP_A, SEL_SATA_0),
PINMUX_IPSR_DATA(IP15_15_12, SSI_SCK78),
PINMUX_IPSR_MSEL(IP15_15_12, HRX2_B, SEL_HSCIF2_1),
@@ -1612,6 +1600,191 @@ static const unsigned int avb_avtp_capture_b_mux[] = {
AVB_AVTP_CAPTURE_B_MARK,
};
+/* - HSCIF0 ----------------------------------------------------------------- */
+static const unsigned int hscif0_data_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(5, 13), RCAR_GP_PIN(5, 14),
+};
+static const unsigned int hscif0_data_mux[] = {
+ HRX0_MARK, HTX0_MARK,
+};
+static const unsigned int hscif0_clk_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 12),
+};
+static const unsigned int hscif0_clk_mux[] = {
+ HSCK0_MARK,
+};
+static const unsigned int hscif0_ctrl_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(5, 16), RCAR_GP_PIN(5, 15),
+};
+static const unsigned int hscif0_ctrl_mux[] = {
+ HRTS0_N_MARK, HCTS0_N_MARK,
+};
+/* - HSCIF1 ----------------------------------------------------------------- */
+static const unsigned int hscif1_data_a_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(5, 5), RCAR_GP_PIN(5, 6),
+};
+static const unsigned int hscif1_data_a_mux[] = {
+ HRX1_A_MARK, HTX1_A_MARK,
+};
+static const unsigned int hscif1_clk_a_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(6, 21),
+};
+static const unsigned int hscif1_clk_a_mux[] = {
+ HSCK1_A_MARK,
+};
+static const unsigned int hscif1_ctrl_a_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(5, 8), RCAR_GP_PIN(5, 7),
+};
+static const unsigned int hscif1_ctrl_a_mux[] = {
+ HRTS1_N_A_MARK, HCTS1_N_A_MARK,
+};
+
+static const unsigned int hscif1_data_b_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(5, 1), RCAR_GP_PIN(5, 2),
+};
+static const unsigned int hscif1_data_b_mux[] = {
+ HRX1_B_MARK, HTX1_B_MARK,
+};
+static const unsigned int hscif1_clk_b_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 0),
+};
+static const unsigned int hscif1_clk_b_mux[] = {
+ HSCK1_B_MARK,
+};
+static const unsigned int hscif1_ctrl_b_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(5, 4), RCAR_GP_PIN(5, 3),
+};
+static const unsigned int hscif1_ctrl_b_mux[] = {
+ HRTS1_N_B_MARK, HCTS1_N_B_MARK,
+};
+/* - HSCIF2 ----------------------------------------------------------------- */
+static const unsigned int hscif2_data_a_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(6, 8), RCAR_GP_PIN(6, 9),
+};
+static const unsigned int hscif2_data_a_mux[] = {
+ HRX2_A_MARK, HTX2_A_MARK,
+};
+static const unsigned int hscif2_clk_a_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(6, 10),
+};
+static const unsigned int hscif2_clk_a_mux[] = {
+ HSCK2_A_MARK,
+};
+static const unsigned int hscif2_ctrl_a_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(6, 7), RCAR_GP_PIN(6, 6),
+};
+static const unsigned int hscif2_ctrl_a_mux[] = {
+ HRTS2_N_A_MARK, HCTS2_N_A_MARK,
+};
+
+static const unsigned int hscif2_data_b_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(6, 17), RCAR_GP_PIN(6, 18),
+};
+static const unsigned int hscif2_data_b_mux[] = {
+ HRX2_B_MARK, HTX2_B_MARK,
+};
+static const unsigned int hscif2_clk_b_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(6, 21),
+};
+static const unsigned int hscif2_clk_b_mux[] = {
+ HSCK1_B_MARK,
+};
+static const unsigned int hscif2_ctrl_b_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(6, 20), RCAR_GP_PIN(6, 19),
+};
+static const unsigned int hscif2_ctrl_b_mux[] = {
+ HRTS2_N_B_MARK, HCTS2_N_B_MARK,
+};
+/* - HSCIF3 ----------------------------------------------------------------- */
+static const unsigned int hscif3_data_a_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(1, 23), RCAR_GP_PIN(1, 24),
+};
+static const unsigned int hscif3_data_a_mux[] = {
+ HRX3_A_MARK, HTX3_A_MARK,
+};
+static const unsigned int hscif3_clk_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 22),
+};
+static const unsigned int hscif3_clk_mux[] = {
+ HSCK3_MARK,
+};
+static const unsigned int hscif3_ctrl_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(1, 26), RCAR_GP_PIN(1, 25),
+};
+static const unsigned int hscif3_ctrl_mux[] = {
+ HRTS3_N_MARK, HCTS3_N_MARK,
+};
+
+static const unsigned int hscif3_data_b_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(0, 10), RCAR_GP_PIN(0, 11),
+};
+static const unsigned int hscif3_data_b_mux[] = {
+ HRX3_B_MARK, HTX3_B_MARK,
+};
+static const unsigned int hscif3_data_c_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(0, 14), RCAR_GP_PIN(0, 15),
+};
+static const unsigned int hscif3_data_c_mux[] = {
+ HRX3_C_MARK, HTX3_C_MARK,
+};
+static const unsigned int hscif3_data_d_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(2, 7), RCAR_GP_PIN(2, 8),
+};
+static const unsigned int hscif3_data_d_mux[] = {
+ HRX3_D_MARK, HTX3_D_MARK,
+};
+/* - HSCIF4 ----------------------------------------------------------------- */
+static const unsigned int hscif4_data_a_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(1, 12), RCAR_GP_PIN(1, 13),
+};
+static const unsigned int hscif4_data_a_mux[] = {
+ HRX4_A_MARK, HTX4_A_MARK,
+};
+static const unsigned int hscif4_clk_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 11),
+};
+static const unsigned int hscif4_clk_mux[] = {
+ HSCK4_MARK,
+};
+static const unsigned int hscif4_ctrl_pins[] = {
+ /* RTS, CTS */
+ RCAR_GP_PIN(1, 15), RCAR_GP_PIN(1, 14),
+};
+static const unsigned int hscif4_ctrl_mux[] = {
+ HRTS4_N_MARK, HCTS3_N_MARK,
+};
+
+static const unsigned int hscif4_data_b_pins[] = {
+ /* RX, TX */
+ RCAR_GP_PIN(1, 8), RCAR_GP_PIN(1, 11),
+};
+static const unsigned int hscif4_data_b_mux[] = {
+ HRX4_B_MARK, HTX4_B_MARK,
+};
+
/* - I2C -------------------------------------------------------------------- */
static const unsigned int i2c1_a_pins[] = {
/* SDA, SCL */
@@ -1663,6 +1836,678 @@ static const unsigned int i2c6_c_mux[] = {
SDA6_C_MARK, SCL6_C_MARK,
};
+/* - MSIOF0 ----------------------------------------------------------------- */
+static const unsigned int msiof0_clk_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 17),
+};
+static const unsigned int msiof0_clk_mux[] = {
+ MSIOF0_SCK_MARK,
+};
+static const unsigned int msiof0_sync_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(5, 18),
+};
+static const unsigned int msiof0_sync_mux[] = {
+ MSIOF0_SYNC_MARK,
+};
+static const unsigned int msiof0_ss1_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(5, 19),
+};
+static const unsigned int msiof0_ss1_mux[] = {
+ MSIOF0_SS1_MARK,
+};
+static const unsigned int msiof0_ss2_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(5, 21),
+};
+static const unsigned int msiof0_ss2_mux[] = {
+ MSIOF0_SS2_MARK,
+};
+static const unsigned int msiof0_txd_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(5, 20),
+};
+static const unsigned int msiof0_txd_mux[] = {
+ MSIOF0_TXD_MARK,
+};
+static const unsigned int msiof0_rxd_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(5, 22),
+};
+static const unsigned int msiof0_rxd_mux[] = {
+ MSIOF0_RXD_MARK,
+};
+/* - MSIOF1 ----------------------------------------------------------------- */
+static const unsigned int msiof1_clk_a_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(6, 8),
+};
+static const unsigned int msiof1_clk_a_mux[] = {
+ MSIOF1_SCK_A_MARK,
+};
+static const unsigned int msiof1_sync_a_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(6, 9),
+};
+static const unsigned int msiof1_sync_a_mux[] = {
+ MSIOF1_SYNC_A_MARK,
+};
+static const unsigned int msiof1_ss1_a_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(6, 5),
+};
+static const unsigned int msiof1_ss1_a_mux[] = {
+ MSIOF1_SS1_A_MARK,
+};
+static const unsigned int msiof1_ss2_a_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(6, 6),
+};
+static const unsigned int msiof1_ss2_a_mux[] = {
+ MSIOF1_SS2_A_MARK,
+};
+static const unsigned int msiof1_txd_a_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(6, 7),
+};
+static const unsigned int msiof1_txd_a_mux[] = {
+ MSIOF1_TXD_A_MARK,
+};
+static const unsigned int msiof1_rxd_a_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(6, 10),
+};
+static const unsigned int msiof1_rxd_a_mux[] = {
+ MSIOF1_RXD_A_MARK,
+};
+static const unsigned int msiof1_clk_b_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 9),
+};
+static const unsigned int msiof1_clk_b_mux[] = {
+ MSIOF1_SCK_B_MARK,
+};
+static const unsigned int msiof1_sync_b_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(5, 3),
+};
+static const unsigned int msiof1_sync_b_mux[] = {
+ MSIOF1_SYNC_B_MARK,
+};
+static const unsigned int msiof1_ss1_b_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(5, 4),
+};
+static const unsigned int msiof1_ss1_b_mux[] = {
+ MSIOF1_SS1_B_MARK,
+};
+static const unsigned int msiof1_ss2_b_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(5, 0),
+};
+static const unsigned int msiof1_ss2_b_mux[] = {
+ MSIOF1_SS2_B_MARK,
+};
+static const unsigned int msiof1_txd_b_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(5, 8),
+};
+static const unsigned int msiof1_txd_b_mux[] = {
+ MSIOF1_TXD_B_MARK,
+};
+static const unsigned int msiof1_rxd_b_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(5, 7),
+};
+static const unsigned int msiof1_rxd_b_mux[] = {
+ MSIOF1_RXD_B_MARK,
+};
+static const unsigned int msiof1_clk_c_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(6, 17),
+};
+static const unsigned int msiof1_clk_c_mux[] = {
+ MSIOF1_SCK_C_MARK,
+};
+static const unsigned int msiof1_sync_c_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(6, 18),
+};
+static const unsigned int msiof1_sync_c_mux[] = {
+ MSIOF1_SYNC_C_MARK,
+};
+static const unsigned int msiof1_ss1_c_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(6, 21),
+};
+static const unsigned int msiof1_ss1_c_mux[] = {
+ MSIOF1_SS1_C_MARK,
+};
+static const unsigned int msiof1_ss2_c_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(6, 27),
+};
+static const unsigned int msiof1_ss2_c_mux[] = {
+ MSIOF1_SS2_C_MARK,
+};
+static const unsigned int msiof1_txd_c_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(6, 20),
+};
+static const unsigned int msiof1_txd_c_mux[] = {
+ MSIOF1_TXD_C_MARK,
+};
+static const unsigned int msiof1_rxd_c_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(6, 19),
+};
+static const unsigned int msiof1_rxd_c_mux[] = {
+ MSIOF1_RXD_C_MARK,
+};
+static const unsigned int msiof1_clk_d_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 12),
+};
+static const unsigned int msiof1_clk_d_mux[] = {
+ MSIOF1_SCK_D_MARK,
+};
+static const unsigned int msiof1_sync_d_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(5, 15),
+};
+static const unsigned int msiof1_sync_d_mux[] = {
+ MSIOF1_SYNC_D_MARK,
+};
+static const unsigned int msiof1_ss1_d_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(5, 16),
+};
+static const unsigned int msiof1_ss1_d_mux[] = {
+ MSIOF1_SS1_D_MARK,
+};
+static const unsigned int msiof1_ss2_d_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(5, 21),
+};
+static const unsigned int msiof1_ss2_d_mux[] = {
+ MSIOF1_SS2_D_MARK,
+};
+static const unsigned int msiof1_txd_d_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(5, 14),
+};
+static const unsigned int msiof1_txd_d_mux[] = {
+ MSIOF1_TXD_D_MARK,
+};
+static const unsigned int msiof1_rxd_d_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(5, 13),
+};
+static const unsigned int msiof1_rxd_d_mux[] = {
+ MSIOF1_RXD_D_MARK,
+};
+static const unsigned int msiof1_clk_e_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(3, 0),
+};
+static const unsigned int msiof1_clk_e_mux[] = {
+ MSIOF1_SCK_E_MARK,
+};
+static const unsigned int msiof1_sync_e_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(3, 1),
+};
+static const unsigned int msiof1_sync_e_mux[] = {
+ MSIOF1_SYNC_E_MARK,
+};
+static const unsigned int msiof1_ss1_e_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(3, 4),
+};
+static const unsigned int msiof1_ss1_e_mux[] = {
+ MSIOF1_SS1_E_MARK,
+};
+static const unsigned int msiof1_ss2_e_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(3, 5),
+};
+static const unsigned int msiof1_ss2_e_mux[] = {
+ MSIOF1_SS2_E_MARK,
+};
+static const unsigned int msiof1_txd_e_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(3, 3),
+};
+static const unsigned int msiof1_txd_e_mux[] = {
+ MSIOF1_TXD_E_MARK,
+};
+static const unsigned int msiof1_rxd_e_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(3, 2),
+};
+static const unsigned int msiof1_rxd_e_mux[] = {
+ MSIOF1_RXD_E_MARK,
+};
+static const unsigned int msiof1_clk_f_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(5, 23),
+};
+static const unsigned int msiof1_clk_f_mux[] = {
+ MSIOF1_SCK_F_MARK,
+};
+static const unsigned int msiof1_sync_f_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(5, 24),
+};
+static const unsigned int msiof1_sync_f_mux[] = {
+ MSIOF1_SYNC_F_MARK,
+};
+static const unsigned int msiof1_ss1_f_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(6, 1),
+};
+static const unsigned int msiof1_ss1_f_mux[] = {
+ MSIOF1_SS1_F_MARK,
+};
+static const unsigned int msiof1_ss2_f_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(6, 2),
+};
+static const unsigned int msiof1_ss2_f_mux[] = {
+ MSIOF1_SS2_F_MARK,
+};
+static const unsigned int msiof1_txd_f_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(6, 0),
+};
+static const unsigned int msiof1_txd_f_mux[] = {
+ MSIOF1_TXD_F_MARK,
+};
+static const unsigned int msiof1_rxd_f_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(5, 25),
+};
+static const unsigned int msiof1_rxd_f_mux[] = {
+ MSIOF1_RXD_F_MARK,
+};
+static const unsigned int msiof1_clk_g_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(3, 6),
+};
+static const unsigned int msiof1_clk_g_mux[] = {
+ MSIOF1_SCK_G_MARK,
+};
+static const unsigned int msiof1_sync_g_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(3, 7),
+};
+static const unsigned int msiof1_sync_g_mux[] = {
+ MSIOF1_SYNC_G_MARK,
+};
+static const unsigned int msiof1_ss1_g_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(3, 10),
+};
+static const unsigned int msiof1_ss1_g_mux[] = {
+ MSIOF1_SS1_G_MARK,
+};
+static const unsigned int msiof1_ss2_g_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(3, 11),
+};
+static const unsigned int msiof1_ss2_g_mux[] = {
+ MSIOF1_SS2_G_MARK,
+};
+static const unsigned int msiof1_txd_g_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(3, 9),
+};
+static const unsigned int msiof1_txd_g_mux[] = {
+ MSIOF1_TXD_G_MARK,
+};
+static const unsigned int msiof1_rxd_g_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(3, 8),
+};
+static const unsigned int msiof1_rxd_g_mux[] = {
+ MSIOF1_RXD_G_MARK,
+};
+/* - MSIOF2 ----------------------------------------------------------------- */
+static const unsigned int msiof2_clk_a_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 9),
+};
+static const unsigned int msiof2_clk_a_mux[] = {
+ MSIOF2_SCK_A_MARK,
+};
+static const unsigned int msiof2_sync_a_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(1, 8),
+};
+static const unsigned int msiof2_sync_a_mux[] = {
+ MSIOF2_SYNC_A_MARK,
+};
+static const unsigned int msiof2_ss1_a_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(1, 6),
+};
+static const unsigned int msiof2_ss1_a_mux[] = {
+ MSIOF2_SS1_A_MARK,
+};
+static const unsigned int msiof2_ss2_a_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(1, 7),
+};
+static const unsigned int msiof2_ss2_a_mux[] = {
+ MSIOF2_SS2_A_MARK,
+};
+static const unsigned int msiof2_txd_a_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(1, 11),
+};
+static const unsigned int msiof2_txd_a_mux[] = {
+ MSIOF2_TXD_A_MARK,
+};
+static const unsigned int msiof2_rxd_a_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(1, 10),
+};
+static const unsigned int msiof2_rxd_a_mux[] = {
+ MSIOF2_RXD_A_MARK,
+};
+static const unsigned int msiof2_clk_b_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(0, 4),
+};
+static const unsigned int msiof2_clk_b_mux[] = {
+ MSIOF2_SCK_B_MARK,
+};
+static const unsigned int msiof2_sync_b_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(0, 5),
+};
+static const unsigned int msiof2_sync_b_mux[] = {
+ MSIOF2_SYNC_B_MARK,
+};
+static const unsigned int msiof2_ss1_b_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(0, 0),
+};
+static const unsigned int msiof2_ss1_b_mux[] = {
+ MSIOF2_SS1_B_MARK,
+};
+static const unsigned int msiof2_ss2_b_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(0, 1),
+};
+static const unsigned int msiof2_ss2_b_mux[] = {
+ MSIOF2_SS2_B_MARK,
+};
+static const unsigned int msiof2_txd_b_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(0, 7),
+};
+static const unsigned int msiof2_txd_b_mux[] = {
+ MSIOF2_TXD_B_MARK,
+};
+static const unsigned int msiof2_rxd_b_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(0, 6),
+};
+static const unsigned int msiof2_rxd_b_mux[] = {
+ MSIOF2_RXD_B_MARK,
+};
+static const unsigned int msiof2_clk_c_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(2, 12),
+};
+static const unsigned int msiof2_clk_c_mux[] = {
+ MSIOF2_SCK_C_MARK,
+};
+static const unsigned int msiof2_sync_c_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(2, 11),
+};
+static const unsigned int msiof2_sync_c_mux[] = {
+ MSIOF2_SYNC_C_MARK,
+};
+static const unsigned int msiof2_ss1_c_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(2, 10),
+};
+static const unsigned int msiof2_ss1_c_mux[] = {
+ MSIOF2_SS1_C_MARK,
+};
+static const unsigned int msiof2_ss2_c_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(2, 9),
+};
+static const unsigned int msiof2_ss2_c_mux[] = {
+ MSIOF2_SS2_C_MARK,
+};
+static const unsigned int msiof2_txd_c_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(2, 14),
+};
+static const unsigned int msiof2_txd_c_mux[] = {
+ MSIOF2_TXD_C_MARK,
+};
+static const unsigned int msiof2_rxd_c_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(2, 13),
+};
+static const unsigned int msiof2_rxd_c_mux[] = {
+ MSIOF2_RXD_C_MARK,
+};
+static const unsigned int msiof2_clk_d_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(0, 8),
+};
+static const unsigned int msiof2_clk_d_mux[] = {
+ MSIOF2_SCK_D_MARK,
+};
+static const unsigned int msiof2_sync_d_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(0, 9),
+};
+static const unsigned int msiof2_sync_d_mux[] = {
+ MSIOF2_SYNC_D_MARK,
+};
+static const unsigned int msiof2_ss1_d_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(0, 12),
+};
+static const unsigned int msiof2_ss1_d_mux[] = {
+ MSIOF2_SS1_D_MARK,
+};
+static const unsigned int msiof2_ss2_d_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(0, 13),
+};
+static const unsigned int msiof2_ss2_d_mux[] = {
+ MSIOF2_SS2_D_MARK,
+};
+static const unsigned int msiof2_txd_d_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(0, 11),
+};
+static const unsigned int msiof2_txd_d_mux[] = {
+ MSIOF2_TXD_D_MARK,
+};
+static const unsigned int msiof2_rxd_d_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(0, 10),
+};
+static const unsigned int msiof2_rxd_d_mux[] = {
+ MSIOF2_RXD_D_MARK,
+};
+/* - MSIOF3 ----------------------------------------------------------------- */
+static const unsigned int msiof3_clk_a_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(0, 0),
+};
+static const unsigned int msiof3_clk_a_mux[] = {
+ MSIOF3_SCK_A_MARK,
+};
+static const unsigned int msiof3_sync_a_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(0, 1),
+};
+static const unsigned int msiof3_sync_a_mux[] = {
+ MSIOF3_SYNC_A_MARK,
+};
+static const unsigned int msiof3_ss1_a_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(0, 14),
+};
+static const unsigned int msiof3_ss1_a_mux[] = {
+ MSIOF3_SS1_A_MARK,
+};
+static const unsigned int msiof3_ss2_a_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(0, 15),
+};
+static const unsigned int msiof3_ss2_a_mux[] = {
+ MSIOF3_SS2_A_MARK,
+};
+static const unsigned int msiof3_txd_a_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(0, 3),
+};
+static const unsigned int msiof3_txd_a_mux[] = {
+ MSIOF3_TXD_A_MARK,
+};
+static const unsigned int msiof3_rxd_a_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(0, 2),
+};
+static const unsigned int msiof3_rxd_a_mux[] = {
+ MSIOF3_RXD_A_MARK,
+};
+static const unsigned int msiof3_clk_b_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 2),
+};
+static const unsigned int msiof3_clk_b_mux[] = {
+ MSIOF3_SCK_B_MARK,
+};
+static const unsigned int msiof3_sync_b_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(1, 0),
+};
+static const unsigned int msiof3_sync_b_mux[] = {
+ MSIOF3_SYNC_B_MARK,
+};
+static const unsigned int msiof3_ss1_b_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(1, 4),
+};
+static const unsigned int msiof3_ss1_b_mux[] = {
+ MSIOF3_SS1_B_MARK,
+};
+static const unsigned int msiof3_ss2_b_pins[] = {
+ /* SS2 */
+ RCAR_GP_PIN(1, 5),
+};
+static const unsigned int msiof3_ss2_b_mux[] = {
+ MSIOF3_SS2_B_MARK,
+};
+static const unsigned int msiof3_txd_b_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(1, 1),
+};
+static const unsigned int msiof3_txd_b_mux[] = {
+ MSIOF3_TXD_B_MARK,
+};
+static const unsigned int msiof3_rxd_b_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(1, 3),
+};
+static const unsigned int msiof3_rxd_b_mux[] = {
+ MSIOF3_RXD_B_MARK,
+};
+static const unsigned int msiof3_clk_c_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 12),
+};
+static const unsigned int msiof3_clk_c_mux[] = {
+ MSIOF3_SCK_C_MARK,
+};
+static const unsigned int msiof3_sync_c_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(1, 13),
+};
+static const unsigned int msiof3_sync_c_mux[] = {
+ MSIOF3_SYNC_C_MARK,
+};
+static const unsigned int msiof3_txd_c_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(1, 15),
+};
+static const unsigned int msiof3_txd_c_mux[] = {
+ MSIOF3_TXD_C_MARK,
+};
+static const unsigned int msiof3_rxd_c_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(1, 14),
+};
+static const unsigned int msiof3_rxd_c_mux[] = {
+ MSIOF3_RXD_C_MARK,
+};
+static const unsigned int msiof3_clk_d_pins[] = {
+ /* SCK */
+ RCAR_GP_PIN(1, 22),
+};
+static const unsigned int msiof3_clk_d_mux[] = {
+ MSIOF3_SCK_D_MARK,
+};
+static const unsigned int msiof3_sync_d_pins[] = {
+ /* SYNC */
+ RCAR_GP_PIN(1, 23),
+};
+static const unsigned int msiof3_sync_d_mux[] = {
+ MSIOF3_SYNC_D_MARK,
+};
+static const unsigned int msiof3_ss1_d_pins[] = {
+ /* SS1 */
+ RCAR_GP_PIN(1, 26),
+};
+static const unsigned int msiof3_ss1_d_mux[] = {
+ MSIOF3_SS1_D_MARK,
+};
+static const unsigned int msiof3_txd_d_pins[] = {
+ /* TXD */
+ RCAR_GP_PIN(1, 25),
+};
+static const unsigned int msiof3_txd_d_mux[] = {
+ MSIOF3_TXD_D_MARK,
+};
+static const unsigned int msiof3_rxd_d_pins[] = {
+ /* RXD */
+ RCAR_GP_PIN(1, 24),
+};
+static const unsigned int msiof3_rxd_d_mux[] = {
+ MSIOF3_RXD_D_MARK,
+};
+
+/* - SATA --------------------------------------------------------------------*/
+static const unsigned int sata0_devslp_a_pins[] = {
+ /* DEVSLP */
+ RCAR_GP_PIN(6, 16),
+};
+static const unsigned int sata0_devslp_a_mux[] = {
+ SATA_DEVSLP_A_MARK,
+};
+static const unsigned int sata0_devslp_b_pins[] = {
+ /* DEVSLP */
+ RCAR_GP_PIN(4, 6),
+};
+static const unsigned int sata0_devslp_b_mux[] = {
+ SATA_DEVSLP_B_MARK,
+};
+
/* - SCIF0 ------------------------------------------------------------------ */
static const unsigned int scif0_data_pins[] = {
/* RX, TX */
@@ -1845,6 +2690,228 @@ static const unsigned int scif5_clk_pins[] = {
static const unsigned int scif5_clk_mux[] = {
SCK5_MARK,
};
+/* - SDHI0 ------------------------------------------------------------------ */
+static const unsigned int sdhi0_data1_pins[] = {
+ /* D0 */
+ RCAR_GP_PIN(3, 2),
+};
+static const unsigned int sdhi0_data1_mux[] = {
+ SD0_DAT0_MARK,
+};
+static const unsigned int sdhi0_data4_pins[] = {
+ /* D[0:3] */
+ RCAR_GP_PIN(3, 2), RCAR_GP_PIN(3, 3),
+ RCAR_GP_PIN(3, 4), RCAR_GP_PIN(3, 5),
+};
+static const unsigned int sdhi0_data4_mux[] = {
+ SD0_DAT0_MARK, SD0_DAT1_MARK,
+ SD0_DAT2_MARK, SD0_DAT3_MARK,
+};
+static const unsigned int sdhi0_ctrl_pins[] = {
+ /* CLK, CMD */
+ RCAR_GP_PIN(3, 0), RCAR_GP_PIN(3, 1),
+};
+static const unsigned int sdhi0_ctrl_mux[] = {
+ SD0_CLK_MARK, SD0_CMD_MARK,
+};
+static const unsigned int sdhi0_cd_pins[] = {
+ /* CD */
+ RCAR_GP_PIN(3, 12),
+};
+static const unsigned int sdhi0_cd_mux[] = {
+ SD0_CD_MARK,
+};
+static const unsigned int sdhi0_wp_pins[] = {
+ /* WP */
+ RCAR_GP_PIN(3, 13),
+};
+static const unsigned int sdhi0_wp_mux[] = {
+ SD0_WP_MARK,
+};
+/* - SDHI1 ------------------------------------------------------------------ */
+static const unsigned int sdhi1_data1_pins[] = {
+ /* D0 */
+ RCAR_GP_PIN(3, 8),
+};
+static const unsigned int sdhi1_data1_mux[] = {
+ SD1_DAT0_MARK,
+};
+static const unsigned int sdhi1_data4_pins[] = {
+ /* D[0:3] */
+ RCAR_GP_PIN(3, 8), RCAR_GP_PIN(3, 9),
+ RCAR_GP_PIN(3, 10), RCAR_GP_PIN(3, 11),
+};
+static const unsigned int sdhi1_data4_mux[] = {
+ SD1_DAT0_MARK, SD1_DAT1_MARK,
+ SD1_DAT2_MARK, SD1_DAT3_MARK,
+};
+static const unsigned int sdhi1_ctrl_pins[] = {
+ /* CLK, CMD */
+ RCAR_GP_PIN(3, 6), RCAR_GP_PIN(3, 7),
+};
+static const unsigned int sdhi1_ctrl_mux[] = {
+ SD1_CLK_MARK, SD1_CMD_MARK,
+};
+static const unsigned int sdhi1_cd_pins[] = {
+ /* CD */
+ RCAR_GP_PIN(3, 14),
+};
+static const unsigned int sdhi1_cd_mux[] = {
+ SD1_CD_MARK,
+};
+static const unsigned int sdhi1_wp_pins[] = {
+ /* WP */
+ RCAR_GP_PIN(3, 15),
+};
+static const unsigned int sdhi1_wp_mux[] = {
+ SD1_WP_MARK,
+};
+/* - SDHI2 ------------------------------------------------------------------ */
+static const unsigned int sdhi2_data1_pins[] = {
+ /* D0 */
+ RCAR_GP_PIN(4, 2),
+};
+static const unsigned int sdhi2_data1_mux[] = {
+ SD2_DAT0_MARK,
+};
+static const unsigned int sdhi2_data4_pins[] = {
+ /* D[0:3] */
+ RCAR_GP_PIN(4, 2), RCAR_GP_PIN(4, 3),
+ RCAR_GP_PIN(4, 4), RCAR_GP_PIN(4, 5),
+};
+static const unsigned int sdhi2_data4_mux[] = {
+ SD2_DAT0_MARK, SD2_DAT1_MARK,
+ SD2_DAT2_MARK, SD2_DAT3_MARK,
+};
+static const unsigned int sdhi2_data8_pins[] = {
+ /* D[0:7] */
+ RCAR_GP_PIN(4, 2), RCAR_GP_PIN(4, 3),
+ RCAR_GP_PIN(4, 4), RCAR_GP_PIN(4, 5),
+ RCAR_GP_PIN(3, 8), RCAR_GP_PIN(3, 9),
+ RCAR_GP_PIN(3, 10), RCAR_GP_PIN(3, 11),
+};
+static const unsigned int sdhi2_data8_mux[] = {
+ SD2_DAT0_MARK, SD2_DAT1_MARK,
+ SD2_DAT2_MARK, SD2_DAT3_MARK,
+ SD2_DAT4_MARK, SD2_DAT5_MARK,
+ SD2_DAT6_MARK, SD2_DAT7_MARK,
+};
+static const unsigned int sdhi2_ctrl_pins[] = {
+ /* CLK, CMD */
+ RCAR_GP_PIN(4, 0), RCAR_GP_PIN(4, 1),
+};
+static const unsigned int sdhi2_ctrl_mux[] = {
+ SD2_CLK_MARK, SD2_CMD_MARK,
+};
+static const unsigned int sdhi2_cd_a_pins[] = {
+ /* CD */
+ RCAR_GP_PIN(4, 13),
+};
+static const unsigned int sdhi2_cd_a_mux[] = {
+ SD2_CD_A_MARK,
+};
+static const unsigned int sdhi2_cd_b_pins[] = {
+ /* CD */
+ RCAR_GP_PIN(5, 10),
+};
+static const unsigned int sdhi2_cd_b_mux[] = {
+ SD2_CD_B_MARK,
+};
+static const unsigned int sdhi2_wp_a_pins[] = {
+ /* WP */
+ RCAR_GP_PIN(4, 14),
+};
+static const unsigned int sdhi2_wp_a_mux[] = {
+ SD2_WP_A_MARK,
+};
+static const unsigned int sdhi2_wp_b_pins[] = {
+ /* WP */
+ RCAR_GP_PIN(5, 11),
+};
+static const unsigned int sdhi2_wp_b_mux[] = {
+ SD2_WP_B_MARK,
+};
+static const unsigned int sdhi2_ds_pins[] = {
+ /* DS */
+ RCAR_GP_PIN(4, 6),
+};
+static const unsigned int sdhi2_ds_mux[] = {
+ SD2_DS_MARK,
+};
+/* - SDHI3 ------------------------------------------------------------------ */
+static const unsigned int sdhi3_data1_pins[] = {
+ /* D0 */
+ RCAR_GP_PIN(4, 9),
+};
+static const unsigned int sdhi3_data1_mux[] = {
+ SD3_DAT0_MARK,
+};
+static const unsigned int sdhi3_data4_pins[] = {
+ /* D[0:3] */
+ RCAR_GP_PIN(4, 9), RCAR_GP_PIN(4, 10),
+ RCAR_GP_PIN(4, 11), RCAR_GP_PIN(4, 12),
+};
+static const unsigned int sdhi3_data4_mux[] = {
+ SD3_DAT0_MARK, SD3_DAT1_MARK,
+ SD3_DAT2_MARK, SD3_DAT3_MARK,
+};
+static const unsigned int sdhi3_data8_pins[] = {
+ /* D[0:7] */
+ RCAR_GP_PIN(4, 9), RCAR_GP_PIN(4, 10),
+ RCAR_GP_PIN(4, 11), RCAR_GP_PIN(4, 12),
+ RCAR_GP_PIN(4, 13), RCAR_GP_PIN(4, 14),
+ RCAR_GP_PIN(4, 15), RCAR_GP_PIN(4, 16),
+};
+static const unsigned int sdhi3_data8_mux[] = {
+ SD3_DAT0_MARK, SD3_DAT1_MARK,
+ SD3_DAT2_MARK, SD3_DAT3_MARK,
+ SD3_DAT4_MARK, SD3_DAT5_MARK,
+ SD3_DAT6_MARK, SD3_DAT7_MARK,
+};
+static const unsigned int sdhi3_ctrl_pins[] = {
+ /* CLK, CMD */
+ RCAR_GP_PIN(4, 7), RCAR_GP_PIN(4, 8),
+};
+static const unsigned int sdhi3_ctrl_mux[] = {
+ SD3_CLK_MARK, SD3_CMD_MARK,
+};
+static const unsigned int sdhi3_cd_pins[] = {
+ /* CD */
+ RCAR_GP_PIN(4, 15),
+};
+static const unsigned int sdhi3_cd_mux[] = {
+ SD3_CD_MARK,
+};
+static const unsigned int sdhi3_wp_pins[] = {
+ /* WP */
+ RCAR_GP_PIN(4, 16),
+};
+static const unsigned int sdhi3_wp_mux[] = {
+ SD3_WP_MARK,
+};
+static const unsigned int sdhi3_ds_pins[] = {
+ /* DS */
+ RCAR_GP_PIN(4, 17),
+};
+static const unsigned int sdhi3_ds_mux[] = {
+ SD3_DS_MARK,
+};
+
+/* - SCIF Clock ------------------------------------------------------------- */
+static const unsigned int scif_clk_a_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(6, 23),
+};
+static const unsigned int scif_clk_a_mux[] = {
+ SCIF_CLK_A_MARK,
+};
+static const unsigned int scif_clk_b_pins[] = {
+ /* SCIF_CLK */
+ RCAR_GP_PIN(5, 9),
+};
+static const unsigned int scif_clk_b_mux[] = {
+ SCIF_CLK_B_MARK,
+};
/* - SSI -------------------------------------------------------------------- */
static const unsigned int ssi0_data_pins[] = {
@@ -2050,6 +3117,31 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(avb_avtp_capture_a),
SH_PFC_PIN_GROUP(avb_avtp_match_b),
SH_PFC_PIN_GROUP(avb_avtp_capture_b),
+ SH_PFC_PIN_GROUP(hscif0_data),
+ SH_PFC_PIN_GROUP(hscif0_clk),
+ SH_PFC_PIN_GROUP(hscif0_ctrl),
+ SH_PFC_PIN_GROUP(hscif1_data_a),
+ SH_PFC_PIN_GROUP(hscif1_clk_a),
+ SH_PFC_PIN_GROUP(hscif1_ctrl_a),
+ SH_PFC_PIN_GROUP(hscif1_data_b),
+ SH_PFC_PIN_GROUP(hscif1_clk_b),
+ SH_PFC_PIN_GROUP(hscif1_ctrl_b),
+ SH_PFC_PIN_GROUP(hscif2_data_a),
+ SH_PFC_PIN_GROUP(hscif2_clk_a),
+ SH_PFC_PIN_GROUP(hscif2_ctrl_a),
+ SH_PFC_PIN_GROUP(hscif2_data_b),
+ SH_PFC_PIN_GROUP(hscif2_clk_b),
+ SH_PFC_PIN_GROUP(hscif2_ctrl_b),
+ SH_PFC_PIN_GROUP(hscif3_data_a),
+ SH_PFC_PIN_GROUP(hscif3_clk),
+ SH_PFC_PIN_GROUP(hscif3_ctrl),
+ SH_PFC_PIN_GROUP(hscif3_data_b),
+ SH_PFC_PIN_GROUP(hscif3_data_c),
+ SH_PFC_PIN_GROUP(hscif3_data_d),
+ SH_PFC_PIN_GROUP(hscif4_data_a),
+ SH_PFC_PIN_GROUP(hscif4_clk),
+ SH_PFC_PIN_GROUP(hscif4_ctrl),
+ SH_PFC_PIN_GROUP(hscif4_data_b),
SH_PFC_PIN_GROUP(i2c1_a),
SH_PFC_PIN_GROUP(i2c1_b),
SH_PFC_PIN_GROUP(i2c2_a),
@@ -2057,6 +3149,101 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(i2c6_a),
SH_PFC_PIN_GROUP(i2c6_b),
SH_PFC_PIN_GROUP(i2c6_c),
+ SH_PFC_PIN_GROUP(msiof0_clk),
+ SH_PFC_PIN_GROUP(msiof0_sync),
+ SH_PFC_PIN_GROUP(msiof0_ss1),
+ SH_PFC_PIN_GROUP(msiof0_ss2),
+ SH_PFC_PIN_GROUP(msiof0_txd),
+ SH_PFC_PIN_GROUP(msiof0_rxd),
+ SH_PFC_PIN_GROUP(msiof1_clk_a),
+ SH_PFC_PIN_GROUP(msiof1_sync_a),
+ SH_PFC_PIN_GROUP(msiof1_ss1_a),
+ SH_PFC_PIN_GROUP(msiof1_ss2_a),
+ SH_PFC_PIN_GROUP(msiof1_txd_a),
+ SH_PFC_PIN_GROUP(msiof1_rxd_a),
+ SH_PFC_PIN_GROUP(msiof1_clk_b),
+ SH_PFC_PIN_GROUP(msiof1_sync_b),
+ SH_PFC_PIN_GROUP(msiof1_ss1_b),
+ SH_PFC_PIN_GROUP(msiof1_ss2_b),
+ SH_PFC_PIN_GROUP(msiof1_txd_b),
+ SH_PFC_PIN_GROUP(msiof1_rxd_b),
+ SH_PFC_PIN_GROUP(msiof1_clk_c),
+ SH_PFC_PIN_GROUP(msiof1_sync_c),
+ SH_PFC_PIN_GROUP(msiof1_ss1_c),
+ SH_PFC_PIN_GROUP(msiof1_ss2_c),
+ SH_PFC_PIN_GROUP(msiof1_txd_c),
+ SH_PFC_PIN_GROUP(msiof1_rxd_c),
+ SH_PFC_PIN_GROUP(msiof1_clk_d),
+ SH_PFC_PIN_GROUP(msiof1_sync_d),
+ SH_PFC_PIN_GROUP(msiof1_ss1_d),
+ SH_PFC_PIN_GROUP(msiof1_ss2_d),
+ SH_PFC_PIN_GROUP(msiof1_txd_d),
+ SH_PFC_PIN_GROUP(msiof1_rxd_d),
+ SH_PFC_PIN_GROUP(msiof1_clk_e),
+ SH_PFC_PIN_GROUP(msiof1_sync_e),
+ SH_PFC_PIN_GROUP(msiof1_ss1_e),
+ SH_PFC_PIN_GROUP(msiof1_ss2_e),
+ SH_PFC_PIN_GROUP(msiof1_txd_e),
+ SH_PFC_PIN_GROUP(msiof1_rxd_e),
+ SH_PFC_PIN_GROUP(msiof1_clk_f),
+ SH_PFC_PIN_GROUP(msiof1_sync_f),
+ SH_PFC_PIN_GROUP(msiof1_ss1_f),
+ SH_PFC_PIN_GROUP(msiof1_ss2_f),
+ SH_PFC_PIN_GROUP(msiof1_txd_f),
+ SH_PFC_PIN_GROUP(msiof1_rxd_f),
+ SH_PFC_PIN_GROUP(msiof1_clk_g),
+ SH_PFC_PIN_GROUP(msiof1_sync_g),
+ SH_PFC_PIN_GROUP(msiof1_ss1_g),
+ SH_PFC_PIN_GROUP(msiof1_ss2_g),
+ SH_PFC_PIN_GROUP(msiof1_txd_g),
+ SH_PFC_PIN_GROUP(msiof1_rxd_g),
+ SH_PFC_PIN_GROUP(msiof2_clk_a),
+ SH_PFC_PIN_GROUP(msiof2_sync_a),
+ SH_PFC_PIN_GROUP(msiof2_ss1_a),
+ SH_PFC_PIN_GROUP(msiof2_ss2_a),
+ SH_PFC_PIN_GROUP(msiof2_txd_a),
+ SH_PFC_PIN_GROUP(msiof2_rxd_a),
+ SH_PFC_PIN_GROUP(msiof2_clk_b),
+ SH_PFC_PIN_GROUP(msiof2_sync_b),
+ SH_PFC_PIN_GROUP(msiof2_ss1_b),
+ SH_PFC_PIN_GROUP(msiof2_ss2_b),
+ SH_PFC_PIN_GROUP(msiof2_txd_b),
+ SH_PFC_PIN_GROUP(msiof2_rxd_b),
+ SH_PFC_PIN_GROUP(msiof2_clk_c),
+ SH_PFC_PIN_GROUP(msiof2_sync_c),
+ SH_PFC_PIN_GROUP(msiof2_ss1_c),
+ SH_PFC_PIN_GROUP(msiof2_ss2_c),
+ SH_PFC_PIN_GROUP(msiof2_txd_c),
+ SH_PFC_PIN_GROUP(msiof2_rxd_c),
+ SH_PFC_PIN_GROUP(msiof2_clk_d),
+ SH_PFC_PIN_GROUP(msiof2_sync_d),
+ SH_PFC_PIN_GROUP(msiof2_ss1_d),
+ SH_PFC_PIN_GROUP(msiof2_ss2_d),
+ SH_PFC_PIN_GROUP(msiof2_txd_d),
+ SH_PFC_PIN_GROUP(msiof2_rxd_d),
+ SH_PFC_PIN_GROUP(msiof3_clk_a),
+ SH_PFC_PIN_GROUP(msiof3_sync_a),
+ SH_PFC_PIN_GROUP(msiof3_ss1_a),
+ SH_PFC_PIN_GROUP(msiof3_ss2_a),
+ SH_PFC_PIN_GROUP(msiof3_txd_a),
+ SH_PFC_PIN_GROUP(msiof3_rxd_a),
+ SH_PFC_PIN_GROUP(msiof3_clk_b),
+ SH_PFC_PIN_GROUP(msiof3_sync_b),
+ SH_PFC_PIN_GROUP(msiof3_ss1_b),
+ SH_PFC_PIN_GROUP(msiof3_ss2_b),
+ SH_PFC_PIN_GROUP(msiof3_txd_b),
+ SH_PFC_PIN_GROUP(msiof3_rxd_b),
+ SH_PFC_PIN_GROUP(msiof3_clk_c),
+ SH_PFC_PIN_GROUP(msiof3_sync_c),
+ SH_PFC_PIN_GROUP(msiof3_txd_c),
+ SH_PFC_PIN_GROUP(msiof3_rxd_c),
+ SH_PFC_PIN_GROUP(msiof3_clk_d),
+ SH_PFC_PIN_GROUP(msiof3_sync_d),
+ SH_PFC_PIN_GROUP(msiof3_ss1_d),
+ SH_PFC_PIN_GROUP(msiof3_txd_d),
+ SH_PFC_PIN_GROUP(msiof3_rxd_d),
+ SH_PFC_PIN_GROUP(sata0_devslp_a),
+ SH_PFC_PIN_GROUP(sata0_devslp_b),
SH_PFC_PIN_GROUP(scif0_data),
SH_PFC_PIN_GROUP(scif0_clk),
SH_PFC_PIN_GROUP(scif0_ctrl),
@@ -2082,6 +3269,34 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(scif4_ctrl_c),
SH_PFC_PIN_GROUP(scif5_data),
SH_PFC_PIN_GROUP(scif5_clk),
+ SH_PFC_PIN_GROUP(scif_clk_a),
+ SH_PFC_PIN_GROUP(scif_clk_b),
+ SH_PFC_PIN_GROUP(sdhi0_data1),
+ SH_PFC_PIN_GROUP(sdhi0_data4),
+ SH_PFC_PIN_GROUP(sdhi0_ctrl),
+ SH_PFC_PIN_GROUP(sdhi0_cd),
+ SH_PFC_PIN_GROUP(sdhi0_wp),
+ SH_PFC_PIN_GROUP(sdhi1_data1),
+ SH_PFC_PIN_GROUP(sdhi1_data4),
+ SH_PFC_PIN_GROUP(sdhi1_ctrl),
+ SH_PFC_PIN_GROUP(sdhi1_cd),
+ SH_PFC_PIN_GROUP(sdhi1_wp),
+ SH_PFC_PIN_GROUP(sdhi2_data1),
+ SH_PFC_PIN_GROUP(sdhi2_data4),
+ SH_PFC_PIN_GROUP(sdhi2_data8),
+ SH_PFC_PIN_GROUP(sdhi2_ctrl),
+ SH_PFC_PIN_GROUP(sdhi2_cd_a),
+ SH_PFC_PIN_GROUP(sdhi2_wp_a),
+ SH_PFC_PIN_GROUP(sdhi2_cd_b),
+ SH_PFC_PIN_GROUP(sdhi2_wp_b),
+ SH_PFC_PIN_GROUP(sdhi2_ds),
+ SH_PFC_PIN_GROUP(sdhi3_data1),
+ SH_PFC_PIN_GROUP(sdhi3_data4),
+ SH_PFC_PIN_GROUP(sdhi3_data8),
+ SH_PFC_PIN_GROUP(sdhi3_ctrl),
+ SH_PFC_PIN_GROUP(sdhi3_cd),
+ SH_PFC_PIN_GROUP(sdhi3_wp),
+ SH_PFC_PIN_GROUP(sdhi3_ds),
SH_PFC_PIN_GROUP(ssi0_data),
SH_PFC_PIN_GROUP(ssi01239_ctrl),
SH_PFC_PIN_GROUP(ssi1_data_a),
@@ -2141,6 +3356,46 @@ static const char * const avb_groups[] = {
"avb_avtp_capture_b",
};
+static const char * const hscif0_groups[] = {
+ "hscif0_data",
+ "hscif0_clk",
+ "hscif0_ctrl",
+};
+
+static const char * const hscif1_groups[] = {
+ "hscif1_data_a",
+ "hscif1_clk_a",
+ "hscif1_ctrl_a",
+ "hscif1_data_b",
+ "hscif1_clk_b",
+ "hscif1_ctrl_b",
+};
+
+static const char * const hscif2_groups[] = {
+ "hscif2_data_a",
+ "hscif2_clk_a",
+ "hscif2_ctrl_a",
+ "hscif2_data_b",
+ "hscif2_clk_b",
+ "hscif2_ctrl_b",
+};
+
+static const char * const hscif3_groups[] = {
+ "hscif3_data_a",
+ "hscif3_clk",
+ "hscif3_ctrl",
+ "hscif3_data_b",
+ "hscif3_data_c",
+ "hscif3_data_d",
+};
+
+static const char * const hscif4_groups[] = {
+ "hscif4_data_a",
+ "hscif4_clk",
+ "hscif4_ctrl",
+ "hscif4_data_b",
+};
+
static const char * const i2c1_groups[] = {
"i2c1_a",
"i2c1_b",
@@ -2157,6 +3412,116 @@ static const char * const i2c6_groups[] = {
"i2c6_c",
};
+static const char * const msiof0_groups[] = {
+ "msiof0_clk",
+ "msiof0_sync",
+ "msiof0_ss1",
+ "msiof0_ss2",
+ "msiof0_txd",
+ "msiof0_rxd",
+};
+
+static const char * const msiof1_groups[] = {
+ "msiof1_clk_a",
+ "msiof1_sync_a",
+ "msiof1_ss1_a",
+ "msiof1_ss2_a",
+ "msiof1_txd_a",
+ "msiof1_rxd_a",
+ "msiof1_clk_b",
+ "msiof1_sync_b",
+ "msiof1_ss1_b",
+ "msiof1_ss2_b",
+ "msiof1_txd_b",
+ "msiof1_rxd_b",
+ "msiof1_clk_c",
+ "msiof1_sync_c",
+ "msiof1_ss1_c",
+ "msiof1_ss2_c",
+ "msiof1_txd_c",
+ "msiof1_rxd_c",
+ "msiof1_clk_d",
+ "msiof1_sync_d",
+ "msiof1_ss1_d",
+ "msiof1_ss2_d",
+ "msiof1_txd_d",
+ "msiof1_rxd_d",
+ "msiof1_clk_e",
+ "msiof1_sync_e",
+ "msiof1_ss1_e",
+ "msiof1_ss2_e",
+ "msiof1_txd_e",
+ "msiof1_rxd_e",
+ "msiof1_clk_f",
+ "msiof1_sync_f",
+ "msiof1_ss1_f",
+ "msiof1_ss2_f",
+ "msiof1_txd_f",
+ "msiof1_rxd_f",
+ "msiof1_clk_g",
+ "msiof1_sync_g",
+ "msiof1_ss1_g",
+ "msiof1_ss2_g",
+ "msiof1_txd_g",
+ "msiof1_rxd_g",
+};
+
+static const char * const msiof2_groups[] = {
+ "msiof2_clk_a",
+ "msiof2_sync_a",
+ "msiof2_ss1_a",
+ "msiof2_ss2_a",
+ "msiof2_txd_a",
+ "msiof2_rxd_a",
+ "msiof2_clk_b",
+ "msiof2_sync_b",
+ "msiof2_ss1_b",
+ "msiof2_ss2_b",
+ "msiof2_txd_b",
+ "msiof2_rxd_b",
+ "msiof2_clk_c",
+ "msiof2_sync_c",
+ "msiof2_ss1_c",
+ "msiof2_ss2_c",
+ "msiof2_txd_c",
+ "msiof2_rxd_c",
+ "msiof2_clk_d",
+ "msiof2_sync_d",
+ "msiof2_ss1_d",
+ "msiof2_ss2_d",
+ "msiof2_txd_d",
+ "msiof2_rxd_d",
+};
+
+static const char * const msiof3_groups[] = {
+ "msiof3_clk_a",
+ "msiof3_sync_a",
+ "msiof3_ss1_a",
+ "msiof3_ss2_a",
+ "msiof3_txd_a",
+ "msiof3_rxd_a",
+ "msiof3_clk_b",
+ "msiof3_sync_b",
+ "msiof3_ss1_b",
+ "msiof3_ss2_b",
+ "msiof3_txd_b",
+ "msiof3_rxd_b",
+ "msiof3_clk_c",
+ "msiof3_sync_c",
+ "msiof3_txd_c",
+ "msiof3_rxd_c",
+ "msiof3_clk_d",
+ "msiof3_sync_d",
+ "msiof3_ss1_d",
+ "msiof3_txd_d",
+ "msiof3_rxd_d",
+};
+
+static const char * const sata0_groups[] = {
+ "sata0_devslp_a",
+ "sata0_devslp_b",
+};
+
static const char * const scif0_groups[] = {
"scif0_data",
"scif0_clk",
@@ -2200,6 +3565,49 @@ static const char * const scif5_groups[] = {
"scif5_clk",
};
+static const char * const scif_clk_groups[] = {
+ "scif_clk_a",
+ "scif_clk_b",
+};
+
+static const char * const sdhi0_groups[] = {
+ "sdhi0_data1",
+ "sdhi0_data4",
+ "sdhi0_ctrl",
+ "sdhi0_cd",
+ "sdhi0_wp",
+};
+
+static const char * const sdhi1_groups[] = {
+ "sdhi1_data1",
+ "sdhi1_data4",
+ "sdhi1_ctrl",
+ "sdhi1_cd",
+ "sdhi1_wp",
+};
+
+static const char * const sdhi2_groups[] = {
+ "sdhi2_data1",
+ "sdhi2_data4",
+ "sdhi2_data8",
+ "sdhi2_ctrl",
+ "sdhi2_cd_a",
+ "sdhi2_wp_a",
+ "sdhi2_cd_b",
+ "sdhi2_wp_b",
+ "sdhi2_ds",
+};
+
+static const char * const sdhi3_groups[] = {
+ "sdhi3_data1",
+ "sdhi3_data4",
+ "sdhi3_data8",
+ "sdhi3_ctrl",
+ "sdhi3_cd",
+ "sdhi3_wp",
+ "sdhi3_ds",
+};
+
static const char * const ssi_groups[] = {
"ssi0_data",
"ssi01239_ctrl",
@@ -2231,15 +3639,30 @@ static const char * const ssi_groups[] = {
static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(audio_clk),
SH_PFC_FUNCTION(avb),
+ SH_PFC_FUNCTION(hscif0),
+ SH_PFC_FUNCTION(hscif1),
+ SH_PFC_FUNCTION(hscif2),
+ SH_PFC_FUNCTION(hscif3),
+ SH_PFC_FUNCTION(hscif4),
SH_PFC_FUNCTION(i2c1),
SH_PFC_FUNCTION(i2c2),
SH_PFC_FUNCTION(i2c6),
+ SH_PFC_FUNCTION(msiof0),
+ SH_PFC_FUNCTION(msiof1),
+ SH_PFC_FUNCTION(msiof2),
+ SH_PFC_FUNCTION(msiof3),
+ SH_PFC_FUNCTION(sata0),
SH_PFC_FUNCTION(scif0),
SH_PFC_FUNCTION(scif1),
SH_PFC_FUNCTION(scif2),
SH_PFC_FUNCTION(scif3),
SH_PFC_FUNCTION(scif4),
SH_PFC_FUNCTION(scif5),
+ SH_PFC_FUNCTION(scif_clk),
+ SH_PFC_FUNCTION(sdhi0),
+ SH_PFC_FUNCTION(sdhi1),
+ SH_PFC_FUNCTION(sdhi2),
+ SH_PFC_FUNCTION(sdhi3),
SH_PFC_FUNCTION(ssi),
};
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
index 6a69c8c5d943..d25e6f674d0a 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
@@ -2059,7 +2059,7 @@ static const unsigned int lcd2_data9_mux[] = {
LCD2D8_MARK,
};
static const unsigned int lcd2_data12_pins[] = {
- /* D[0:12] */
+ /* D[0:11] */
128, 129, 142, 143, 144, 145, 138, 139,
140, 141, 130, 131,
};
@@ -2198,6 +2198,420 @@ static const unsigned int mmc0_ctrl_1_pins[] = {
static const unsigned int mmc0_ctrl_1_mux[] = {
MMCCMD1_MARK, MMCCLK1_MARK,
};
+/* - MSIOF0 ----------------------------------------------------------------- */
+static const unsigned int msiof0_rsck_pins[] = {
+ /* RSCK */
+ 66,
+};
+static const unsigned int msiof0_rsck_mux[] = {
+ MSIOF0_RSCK_MARK,
+};
+static const unsigned int msiof0_tsck_pins[] = {
+ /* TSCK */
+ 64,
+};
+static const unsigned int msiof0_tsck_mux[] = {
+ MSIOF0_TSCK_MARK,
+};
+static const unsigned int msiof0_rsync_pins[] = {
+ /* RSYNC */
+ 67,
+};
+static const unsigned int msiof0_rsync_mux[] = {
+ MSIOF0_RSYNC_MARK,
+};
+static const unsigned int msiof0_tsync_pins[] = {
+ /* TSYNC */
+ 63,
+};
+static const unsigned int msiof0_tsync_mux[] = {
+ MSIOF0_TSYNC_MARK,
+};
+static const unsigned int msiof0_ss1_pins[] = {
+ /* SS1 */
+ 62,
+};
+static const unsigned int msiof0_ss1_mux[] = {
+ MSIOF0_SS1_MARK,
+};
+static const unsigned int msiof0_ss2_pins[] = {
+ /* SS2 */
+ 71,
+};
+static const unsigned int msiof0_ss2_mux[] = {
+ MSIOF0_SS2_MARK,
+};
+static const unsigned int msiof0_rxd_pins[] = {
+ /* RXD */
+ 70,
+};
+static const unsigned int msiof0_rxd_mux[] = {
+ MSIOF0_RXD_MARK,
+};
+static const unsigned int msiof0_txd_pins[] = {
+ /* TXD */
+ 65,
+};
+static const unsigned int msiof0_txd_mux[] = {
+ MSIOF0_TXD_MARK,
+};
+static const unsigned int msiof0_mck0_pins[] = {
+ /* MSCK0 */
+ 68,
+};
+static const unsigned int msiof0_mck0_mux[] = {
+ MSIOF0_MCK0_MARK,
+};
+
+static const unsigned int msiof0_mck1_pins[] = {
+ /* MSCK1 */
+ 69,
+};
+static const unsigned int msiof0_mck1_mux[] = {
+ MSIOF0_MCK1_MARK,
+};
+
+static const unsigned int msiof0l_rsck_pins[] = {
+ /* RSCK */
+ 214,
+};
+static const unsigned int msiof0l_rsck_mux[] = {
+ MSIOF0L_RSCK_MARK,
+};
+static const unsigned int msiof0l_tsck_pins[] = {
+ /* TSCK */
+ 219,
+};
+static const unsigned int msiof0l_tsck_mux[] = {
+ MSIOF0L_TSCK_MARK,
+};
+static const unsigned int msiof0l_rsync_pins[] = {
+ /* RSYNC */
+ 215,
+};
+static const unsigned int msiof0l_rsync_mux[] = {
+ MSIOF0L_RSYNC_MARK,
+};
+static const unsigned int msiof0l_tsync_pins[] = {
+ /* TSYNC */
+ 217,
+};
+static const unsigned int msiof0l_tsync_mux[] = {
+ MSIOF0L_TSYNC_MARK,
+};
+static const unsigned int msiof0l_ss1_a_pins[] = {
+ /* SS1 */
+ 207,
+};
+static const unsigned int msiof0l_ss1_a_mux[] = {
+ PORT207_MSIOF0L_SS1_MARK,
+};
+static const unsigned int msiof0l_ss1_b_pins[] = {
+ /* SS1 */
+ 210,
+};
+static const unsigned int msiof0l_ss1_b_mux[] = {
+ PORT210_MSIOF0L_SS1_MARK,
+};
+static const unsigned int msiof0l_ss2_a_pins[] = {
+ /* SS2 */
+ 208,
+};
+static const unsigned int msiof0l_ss2_a_mux[] = {
+ PORT208_MSIOF0L_SS2_MARK,
+};
+static const unsigned int msiof0l_ss2_b_pins[] = {
+ /* SS2 */
+ 211,
+};
+static const unsigned int msiof0l_ss2_b_mux[] = {
+ PORT211_MSIOF0L_SS2_MARK,
+};
+static const unsigned int msiof0l_rxd_pins[] = {
+ /* RXD */
+ 221,
+};
+static const unsigned int msiof0l_rxd_mux[] = {
+ MSIOF0L_RXD_MARK,
+};
+static const unsigned int msiof0l_txd_pins[] = {
+ /* TXD */
+ 222,
+};
+static const unsigned int msiof0l_txd_mux[] = {
+ MSIOF0L_TXD_MARK,
+};
+static const unsigned int msiof0l_mck0_pins[] = {
+ /* MSCK0 */
+ 212,
+};
+static const unsigned int msiof0l_mck0_mux[] = {
+ MSIOF0L_MCK0_MARK,
+};
+static const unsigned int msiof0l_mck1_pins[] = {
+ /* MSCK1 */
+ 213,
+};
+static const unsigned int msiof0l_mck1_mux[] = {
+ MSIOF0L_MCK1_MARK,
+};
+/* - MSIOF1 ----------------------------------------------------------------- */
+static const unsigned int msiof1_rsck_pins[] = {
+ /* RSCK */
+ 234,
+};
+static const unsigned int msiof1_rsck_mux[] = {
+ MSIOF1_RSCK_MARK,
+};
+static const unsigned int msiof1_tsck_pins[] = {
+ /* TSCK */
+ 232,
+};
+static const unsigned int msiof1_tsck_mux[] = {
+ MSIOF1_TSCK_MARK,
+};
+static const unsigned int msiof1_rsync_pins[] = {
+ /* RSYNC */
+ 235,
+};
+static const unsigned int msiof1_rsync_mux[] = {
+ MSIOF1_RSYNC_MARK,
+};
+static const unsigned int msiof1_tsync_pins[] = {
+ /* TSYNC */
+ 231,
+};
+static const unsigned int msiof1_tsync_mux[] = {
+ MSIOF1_TSYNC_MARK,
+};
+static const unsigned int msiof1_ss1_pins[] = {
+ /* SS1 */
+ 238,
+};
+static const unsigned int msiof1_ss1_mux[] = {
+ MSIOF1_SS1_MARK,
+};
+static const unsigned int msiof1_ss2_pins[] = {
+ /* SS2 */
+ 239,
+};
+static const unsigned int msiof1_ss2_mux[] = {
+ MSIOF1_SS2_MARK,
+};
+static const unsigned int msiof1_rxd_pins[] = {
+ /* RXD */
+ 233,
+};
+static const unsigned int msiof1_rxd_mux[] = {
+ MSIOF1_RXD_MARK,
+};
+static const unsigned int msiof1_txd_pins[] = {
+ /* TXD */
+ 230,
+};
+static const unsigned int msiof1_txd_mux[] = {
+ MSIOF1_TXD_MARK,
+};
+static const unsigned int msiof1_mck0_pins[] = {
+ /* MSCK0 */
+ 236,
+};
+static const unsigned int msiof1_mck0_mux[] = {
+ MSIOF1_MCK0_MARK,
+};
+static const unsigned int msiof1_mck1_pins[] = {
+ /* MSCK1 */
+ 237,
+};
+static const unsigned int msiof1_mck1_mux[] = {
+ MSIOF1_MCK1_MARK,
+};
+/* - MSIOF2 ----------------------------------------------------------------- */
+static const unsigned int msiof2_rsck_pins[] = {
+ /* RSCK */
+ 151,
+};
+static const unsigned int msiof2_rsck_mux[] = {
+ MSIOF2_RSCK_MARK,
+};
+static const unsigned int msiof2_tsck_pins[] = {
+ /* TSCK */
+ 135,
+};
+static const unsigned int msiof2_tsck_mux[] = {
+ MSIOF2_TSCK_MARK,
+};
+static const unsigned int msiof2_rsync_pins[] = {
+ /* RSYNC */
+ 152,
+};
+static const unsigned int msiof2_rsync_mux[] = {
+ MSIOF2_RSYNC_MARK,
+};
+static const unsigned int msiof2_tsync_pins[] = {
+ /* TSYNC */
+ 133,
+};
+static const unsigned int msiof2_tsync_mux[] = {
+ MSIOF2_TSYNC_MARK,
+};
+static const unsigned int msiof2_ss1_a_pins[] = {
+ /* SS1 */
+ 131,
+};
+static const unsigned int msiof2_ss1_a_mux[] = {
+ PORT131_MSIOF2_SS1_MARK,
+};
+static const unsigned int msiof2_ss1_b_pins[] = {
+ /* SS1 */
+ 153,
+};
+static const unsigned int msiof2_ss1_b_mux[] = {
+ PORT153_MSIOF2_SS1_MARK,
+};
+static const unsigned int msiof2_ss2_a_pins[] = {
+ /* SS2 */
+ 132,
+};
+static const unsigned int msiof2_ss2_a_mux[] = {
+ PORT132_MSIOF2_SS2_MARK,
+};
+static const unsigned int msiof2_ss2_b_pins[] = {
+ /* SS2 */
+ 156,
+};
+static const unsigned int msiof2_ss2_b_mux[] = {
+ PORT156_MSIOF2_SS2_MARK,
+};
+static const unsigned int msiof2_rxd_a_pins[] = {
+ /* RXD */
+ 130,
+};
+static const unsigned int msiof2_rxd_a_mux[] = {
+ PORT130_MSIOF2_RXD_MARK,
+};
+static const unsigned int msiof2_rxd_b_pins[] = {
+ /* RXD */
+ 157,
+};
+static const unsigned int msiof2_rxd_b_mux[] = {
+ PORT157_MSIOF2_RXD_MARK,
+};
+static const unsigned int msiof2_txd_pins[] = {
+ /* TXD */
+ 134,
+};
+static const unsigned int msiof2_txd_mux[] = {
+ MSIOF2_TXD_MARK,
+};
+static const unsigned int msiof2_mck0_pins[] = {
+ /* MSCK0 */
+ 154,
+};
+static const unsigned int msiof2_mck0_mux[] = {
+ MSIOF2_MCK0_MARK,
+};
+static const unsigned int msiof2_mck1_pins[] = {
+ /* MSCK1 */
+ 155,
+};
+static const unsigned int msiof2_mck1_mux[] = {
+ MSIOF2_MCK1_MARK,
+};
+
+static const unsigned int msiof2r_tsck_pins[] = {
+ /* TSCK */
+ 248,
+};
+static const unsigned int msiof2r_tsck_mux[] = {
+ MSIOF2R_TSCK_MARK,
+};
+static const unsigned int msiof2r_tsync_pins[] = {
+ /* TSYNC */
+ 249,
+};
+static const unsigned int msiof2r_tsync_mux[] = {
+ MSIOF2R_TSYNC_MARK,
+};
+static const unsigned int msiof2r_rxd_pins[] = {
+ /* RXD */
+ 244,
+};
+static const unsigned int msiof2r_rxd_mux[] = {
+ MSIOF2R_RXD_MARK,
+};
+static const unsigned int msiof2r_txd_pins[] = {
+ /* TXD */
+ 245,
+};
+static const unsigned int msiof2r_txd_mux[] = {
+ MSIOF2R_TXD_MARK,
+};
+/* - MSIOF3 (Pin function name of MSIOF3 is named BBIF1) -------------------- */
+static const unsigned int msiof3_rsck_pins[] = {
+ /* RSCK */
+ 115,
+};
+static const unsigned int msiof3_rsck_mux[] = {
+ BBIF1_RSCK_MARK,
+};
+static const unsigned int msiof3_tsck_pins[] = {
+ /* TSCK */
+ 112,
+};
+static const unsigned int msiof3_tsck_mux[] = {
+ BBIF1_TSCK_MARK,
+};
+static const unsigned int msiof3_rsync_pins[] = {
+ /* RSYNC */
+ 116,
+};
+static const unsigned int msiof3_rsync_mux[] = {
+ BBIF1_RSYNC_MARK,
+};
+static const unsigned int msiof3_tsync_pins[] = {
+ /* TSYNC */
+ 113,
+};
+static const unsigned int msiof3_tsync_mux[] = {
+ BBIF1_TSYNC_MARK,
+};
+static const unsigned int msiof3_ss1_pins[] = {
+ /* SS1 */
+ 117,
+};
+static const unsigned int msiof3_ss1_mux[] = {
+ BBIF1_SS1_MARK,
+};
+static const unsigned int msiof3_ss2_pins[] = {
+ /* SS2 */
+ 109,
+};
+static const unsigned int msiof3_ss2_mux[] = {
+ BBIF1_SS2_MARK,
+};
+static const unsigned int msiof3_rxd_pins[] = {
+ /* RXD */
+ 111,
+};
+static const unsigned int msiof3_rxd_mux[] = {
+ BBIF1_RXD_MARK,
+};
+static const unsigned int msiof3_txd_pins[] = {
+ /* TXD */
+ 114,
+};
+static const unsigned int msiof3_txd_mux[] = {
+ BBIF1_TXD_MARK,
+};
+static const unsigned int msiof3_flow_pins[] = {
+ /* FLOW */
+ 117,
+};
+static const unsigned int msiof3_flow_mux[] = {
+ BBIF1_FLOW_MARK,
+};
+
/* - SCIFA0 ----------------------------------------------------------------- */
static const unsigned int scifa0_data_pins[] = {
/* RXD, TXD */
@@ -2782,6 +3196,64 @@ static const struct sh_pfc_pin_group pinmux_groups[] = {
SH_PFC_PIN_GROUP(mmc0_data4_1),
SH_PFC_PIN_GROUP(mmc0_data8_1),
SH_PFC_PIN_GROUP(mmc0_ctrl_1),
+ SH_PFC_PIN_GROUP(msiof0_rsck),
+ SH_PFC_PIN_GROUP(msiof0_tsck),
+ SH_PFC_PIN_GROUP(msiof0_rsync),
+ SH_PFC_PIN_GROUP(msiof0_tsync),
+ SH_PFC_PIN_GROUP(msiof0_ss1),
+ SH_PFC_PIN_GROUP(msiof0_ss2),
+ SH_PFC_PIN_GROUP(msiof0_rxd),
+ SH_PFC_PIN_GROUP(msiof0_txd),
+ SH_PFC_PIN_GROUP(msiof0_mck0),
+ SH_PFC_PIN_GROUP(msiof0_mck1),
+ SH_PFC_PIN_GROUP(msiof0l_rsck),
+ SH_PFC_PIN_GROUP(msiof0l_tsck),
+ SH_PFC_PIN_GROUP(msiof0l_rsync),
+ SH_PFC_PIN_GROUP(msiof0l_tsync),
+ SH_PFC_PIN_GROUP(msiof0l_ss1_a),
+ SH_PFC_PIN_GROUP(msiof0l_ss1_b),
+ SH_PFC_PIN_GROUP(msiof0l_ss2_a),
+ SH_PFC_PIN_GROUP(msiof0l_ss2_b),
+ SH_PFC_PIN_GROUP(msiof0l_rxd),
+ SH_PFC_PIN_GROUP(msiof0l_txd),
+ SH_PFC_PIN_GROUP(msiof0l_mck0),
+ SH_PFC_PIN_GROUP(msiof0l_mck1),
+ SH_PFC_PIN_GROUP(msiof1_rsck),
+ SH_PFC_PIN_GROUP(msiof1_tsck),
+ SH_PFC_PIN_GROUP(msiof1_rsync),
+ SH_PFC_PIN_GROUP(msiof1_tsync),
+ SH_PFC_PIN_GROUP(msiof1_ss1),
+ SH_PFC_PIN_GROUP(msiof1_ss2),
+ SH_PFC_PIN_GROUP(msiof1_rxd),
+ SH_PFC_PIN_GROUP(msiof1_txd),
+ SH_PFC_PIN_GROUP(msiof1_mck0),
+ SH_PFC_PIN_GROUP(msiof1_mck1),
+ SH_PFC_PIN_GROUP(msiof2_rsck),
+ SH_PFC_PIN_GROUP(msiof2_tsck),
+ SH_PFC_PIN_GROUP(msiof2_rsync),
+ SH_PFC_PIN_GROUP(msiof2_tsync),
+ SH_PFC_PIN_GROUP(msiof2_ss1_a),
+ SH_PFC_PIN_GROUP(msiof2_ss1_b),
+ SH_PFC_PIN_GROUP(msiof2_ss2_a),
+ SH_PFC_PIN_GROUP(msiof2_ss2_b),
+ SH_PFC_PIN_GROUP(msiof2_rxd_a),
+ SH_PFC_PIN_GROUP(msiof2_rxd_b),
+ SH_PFC_PIN_GROUP(msiof2_txd),
+ SH_PFC_PIN_GROUP(msiof2_mck0),
+ SH_PFC_PIN_GROUP(msiof2_mck1),
+ SH_PFC_PIN_GROUP(msiof2r_tsck),
+ SH_PFC_PIN_GROUP(msiof2r_tsync),
+ SH_PFC_PIN_GROUP(msiof2r_rxd),
+ SH_PFC_PIN_GROUP(msiof2r_txd),
+ SH_PFC_PIN_GROUP(msiof3_rsck),
+ SH_PFC_PIN_GROUP(msiof3_tsck),
+ SH_PFC_PIN_GROUP(msiof3_rsync),
+ SH_PFC_PIN_GROUP(msiof3_tsync),
+ SH_PFC_PIN_GROUP(msiof3_ss1),
+ SH_PFC_PIN_GROUP(msiof3_ss2),
+ SH_PFC_PIN_GROUP(msiof3_rxd),
+ SH_PFC_PIN_GROUP(msiof3_txd),
+ SH_PFC_PIN_GROUP(msiof3_flow),
SH_PFC_PIN_GROUP(scifa0_data),
SH_PFC_PIN_GROUP(scifa0_clk),
SH_PFC_PIN_GROUP(scifa0_ctrl),
@@ -2982,6 +3454,76 @@ static const char * const mmc0_groups[] = {
"mmc0_ctrl_1",
};
+static const char * const msiof0_groups[] = {
+ "msiof0_rsck",
+ "msiof0_tsck",
+ "msiof0_rsync",
+ "msiof0_tsync",
+ "msiof0_ss1",
+ "msiof0_ss2",
+ "msiof0_rxd",
+ "msiof0_txd",
+ "msiof0_mck0",
+ "msiof0_mck1",
+ "msiof0l_rsck",
+ "msiof0l_tsck",
+ "msiof0l_rsync",
+ "msiof0l_tsync",
+ "msiof0l_ss1_a",
+ "msiof0l_ss1_b",
+ "msiof0l_ss2_a",
+ "msiof0l_ss2_b",
+ "msiof0l_rxd",
+ "msiof0l_txd",
+ "msiof0l_mck0",
+ "msiof0l_mck1",
+};
+
+static const char * const msiof1_groups[] = {
+ "msiof1_rsck",
+ "msiof1_tsck",
+ "msiof1_rsync",
+ "msiof1_tsync",
+ "msiof1_ss1",
+ "msiof1_ss2",
+ "msiof1_rxd",
+ "msiof1_txd",
+ "msiof1_mck0",
+ "msiof1_mck1",
+};
+
+static const char * const msiof2_groups[] = {
+ "msiof2_rsck",
+ "msiof2_tsck",
+ "msiof2_rsync",
+ "msiof2_tsync",
+ "msiof2_ss1_a",
+ "msiof2_ss1_b",
+ "msiof2_ss2_a",
+ "msiof2_ss2_b",
+ "msiof2_rxd_a",
+ "msiof2_rxd_b",
+ "msiof2_txd",
+ "msiof2_mck0",
+ "msiof2_mck1",
+ "msiof2r_tsck",
+ "msiof2r_tsync",
+ "msiof2r_rxd",
+ "msiof2r_txd",
+};
+
+static const char * const msiof3_groups[] = {
+ "msiof3_rsck",
+ "msiof3_tsck",
+ "msiof3_rsync",
+ "msiof3_tsync",
+ "msiof3_ss1",
+ "msiof3_ss2",
+ "msiof3_rxd",
+ "msiof3_txd",
+ "msiof3_flow",
+};
+
static const char * const scifa0_groups[] = {
"scifa0_data",
"scifa0_clk",
@@ -3116,6 +3658,10 @@ static const struct sh_pfc_function pinmux_functions[] = {
SH_PFC_FUNCTION(lcd),
SH_PFC_FUNCTION(lcd2),
SH_PFC_FUNCTION(mmc0),
+ SH_PFC_FUNCTION(msiof0),
+ SH_PFC_FUNCTION(msiof1),
+ SH_PFC_FUNCTION(msiof2),
+ SH_PFC_FUNCTION(msiof3),
SH_PFC_FUNCTION(scifa0),
SH_PFC_FUNCTION(scifa1),
SH_PFC_FUNCTION(scifa2),
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
index 9842bb106796..b0b328b3130b 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
@@ -14,14 +14,6 @@
#include "sh_pfc.h"
-#define PORT_GP_12(bank, fn, sfx) \
- PORT_GP_1(bank, 0, fn, sfx), PORT_GP_1(bank, 1, fn, sfx), \
- PORT_GP_1(bank, 2, fn, sfx), PORT_GP_1(bank, 3, fn, sfx), \
- PORT_GP_1(bank, 4, fn, sfx), PORT_GP_1(bank, 5, fn, sfx), \
- PORT_GP_1(bank, 6, fn, sfx), PORT_GP_1(bank, 7, fn, sfx), \
- PORT_GP_1(bank, 8, fn, sfx), PORT_GP_1(bank, 9, fn, sfx), \
- PORT_GP_1(bank, 10, fn, sfx), PORT_GP_1(bank, 11, fn, sfx)
-
#define CPU_ALL_PORT(fn, sfx) \
PORT_GP_32(0, fn, sfx), \
PORT_GP_32(1, fn, sfx), \
@@ -585,15 +577,18 @@ enum {
static const u16 pinmux_data[] = {
PINMUX_DATA_GP_ALL(), /* PINMUX_DATA(GP_M_N_DATA, GP_M_N_FN...), */
- PINMUX_DATA(CLKOUT_MARK, FN_CLKOUT),
- PINMUX_DATA(BS_MARK, FN_BS), PINMUX_DATA(CS0_MARK, FN_CS0),
- PINMUX_DATA(EX_CS0_MARK, FN_EX_CS0),
- PINMUX_DATA(RD_MARK, FN_RD), PINMUX_DATA(WE0_MARK, FN_WE0),
- PINMUX_DATA(WE1_MARK, FN_WE1),
- PINMUX_DATA(SCL0_MARK, FN_SCL0), PINMUX_DATA(PENC0_MARK, FN_PENC0),
- PINMUX_DATA(USB_OVC0_MARK, FN_USB_OVC0),
- PINMUX_DATA(IRQ2_B_MARK, FN_IRQ2_B),
- PINMUX_DATA(IRQ3_B_MARK, FN_IRQ3_B),
+ PINMUX_SINGLE(CLKOUT),
+ PINMUX_SINGLE(BS),
+ PINMUX_SINGLE(CS0),
+ PINMUX_SINGLE(EX_CS0),
+ PINMUX_SINGLE(RD),
+ PINMUX_SINGLE(WE0),
+ PINMUX_SINGLE(WE1),
+ PINMUX_SINGLE(SCL0),
+ PINMUX_SINGLE(PENC0),
+ PINMUX_SINGLE(USB_OVC0),
+ PINMUX_SINGLE(IRQ2_B),
+ PINMUX_SINGLE(IRQ3_B),
/* IPSR0 */
PINMUX_IPSR_DATA(IP0_1_0, A0),
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 863c3e30ce05..87b0a599afaf 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -273,8 +273,10 @@ static int sh_pfc_dt_node_to_map(struct pinctrl_dev *pctldev,
for_each_child_of_node(np, child) {
ret = sh_pfc_dt_subnode_to_map(pctldev, child, map, num_maps,
&index);
- if (ret < 0)
+ if (ret < 0) {
+ of_node_put(child);
goto done;
+ }
}
/* If no mapping has been found in child nodes try the config node. */
diff --git a/drivers/pinctrl/sh-pfc/sh_pfc.h b/drivers/pinctrl/sh-pfc/sh_pfc.h
index 7b373d43d981..2123ab49d6a5 100644
--- a/drivers/pinctrl/sh-pfc/sh_pfc.h
+++ b/drivers/pinctrl/sh-pfc/sh_pfc.h
@@ -199,28 +199,82 @@ struct sh_pfc_soc_info {
PINMUX_DATA(fn##_MARK, FN_##ms, FN_##ipsr, FN_##fn)
/*
+ * Describe a pinmux configuration for a single-function pin with GPIO
+ * capability.
+ * - fn: Function name
+ */
+#define PINMUX_SINGLE(fn) \
+ PINMUX_DATA(fn##_MARK, FN_##fn)
+
+/*
* GP port style (32 ports banks)
*/
#define PORT_GP_CFG_1(bank, pin, fn, sfx, cfg) fn(bank, pin, GP_##bank##_##pin, sfx, cfg)
#define PORT_GP_1(bank, pin, fn, sfx) PORT_GP_CFG_1(bank, pin, fn, sfx, 0)
-#define PORT_GP_CFG_32(bank, fn, sfx, cfg) \
+#define PORT_GP_CFG_4(bank, fn, sfx, cfg) \
PORT_GP_CFG_1(bank, 0, fn, sfx, cfg), PORT_GP_CFG_1(bank, 1, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 2, fn, sfx, cfg), PORT_GP_CFG_1(bank, 3, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 2, fn, sfx, cfg), PORT_GP_CFG_1(bank, 3, fn, sfx, cfg)
+#define PORT_GP_4(bank, fn, sfx) PORT_GP_CFG_4(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_8(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_4(bank, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 4, fn, sfx, cfg), PORT_GP_CFG_1(bank, 5, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 6, fn, sfx, cfg), PORT_GP_CFG_1(bank, 7, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 6, fn, sfx, cfg), PORT_GP_CFG_1(bank, 7, fn, sfx, cfg)
+#define PORT_GP_8(bank, fn, sfx) PORT_GP_CFG_8(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_9(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_8(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 8, fn, sfx, cfg)
+#define PORT_GP_9(bank, fn, sfx) PORT_GP_CFG_9(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_12(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_8(bank, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 8, fn, sfx, cfg), PORT_GP_CFG_1(bank, 9, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 10, fn, sfx, cfg), PORT_GP_CFG_1(bank, 11, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 12, fn, sfx, cfg), PORT_GP_CFG_1(bank, 13, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 14, fn, sfx, cfg), PORT_GP_CFG_1(bank, 15, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 16, fn, sfx, cfg), PORT_GP_CFG_1(bank, 17, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 10, fn, sfx, cfg), PORT_GP_CFG_1(bank, 11, fn, sfx, cfg)
+#define PORT_GP_12(bank, fn, sfx) PORT_GP_CFG_12(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_14(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_12(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 12, fn, sfx, cfg), PORT_GP_CFG_1(bank, 13, fn, sfx, cfg)
+#define PORT_GP_14(bank, fn, sfx) PORT_GP_CFG_14(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_15(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_14(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 14, fn, sfx, cfg)
+#define PORT_GP_15(bank, fn, sfx) PORT_GP_CFG_15(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_16(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_14(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 14, fn, sfx, cfg), PORT_GP_CFG_1(bank, 15, fn, sfx, cfg)
+#define PORT_GP_16(bank, fn, sfx) PORT_GP_CFG_16(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_18(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_16(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 16, fn, sfx, cfg), PORT_GP_CFG_1(bank, 17, fn, sfx, cfg)
+#define PORT_GP_18(bank, fn, sfx) PORT_GP_CFG_18(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_26(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_18(bank, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 18, fn, sfx, cfg), PORT_GP_CFG_1(bank, 19, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 20, fn, sfx, cfg), PORT_GP_CFG_1(bank, 21, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 22, fn, sfx, cfg), PORT_GP_CFG_1(bank, 23, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 24, fn, sfx, cfg), PORT_GP_CFG_1(bank, 25, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 26, fn, sfx, cfg), PORT_GP_CFG_1(bank, 27, fn, sfx, cfg), \
- PORT_GP_CFG_1(bank, 28, fn, sfx, cfg), PORT_GP_CFG_1(bank, 29, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 24, fn, sfx, cfg), PORT_GP_CFG_1(bank, 25, fn, sfx, cfg)
+#define PORT_GP_26(bank, fn, sfx) PORT_GP_CFG_26(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_28(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_26(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 26, fn, sfx, cfg), PORT_GP_CFG_1(bank, 27, fn, sfx, cfg)
+#define PORT_GP_28(bank, fn, sfx) PORT_GP_CFG_28(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_30(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_28(bank, fn, sfx, cfg), \
+ PORT_GP_CFG_1(bank, 28, fn, sfx, cfg), PORT_GP_CFG_1(bank, 29, fn, sfx, cfg)
+#define PORT_GP_30(bank, fn, sfx) PORT_GP_CFG_30(bank, fn, sfx, 0)
+
+#define PORT_GP_CFG_32(bank, fn, sfx, cfg) \
+ PORT_GP_CFG_30(bank, fn, sfx, cfg), \
PORT_GP_CFG_1(bank, 30, fn, sfx, cfg), PORT_GP_CFG_1(bank, 31, fn, sfx, cfg)
#define PORT_GP_32(bank, fn, sfx) PORT_GP_CFG_32(bank, fn, sfx, 0)
diff --git a/drivers/pinctrl/sirf/pinctrl-atlas7.c b/drivers/pinctrl/sirf/pinctrl-atlas7.c
index 829018c812bd..053d98e33944 100644
--- a/drivers/pinctrl/sirf/pinctrl-atlas7.c
+++ b/drivers/pinctrl/sirf/pinctrl-atlas7.c
@@ -161,6 +161,9 @@ enum altas7_pad_type {
#define IN_DISABLE_VAL_1_REG_SET 0x0A88
#define IN_DISABLE_VAL_1_REG_CLR 0x0A8C
+/* Offset of the SDIO9SEL*/
+#define SYS2PCI_SDIO9SEL 0x14
+
struct dt_params {
const char *property;
int value;
@@ -370,6 +373,7 @@ struct atlas7_pmx {
struct pinctrl_desc pctl_desc;
struct atlas7_pinctrl_data *pctl_data;
void __iomem *regs[ATLAS7_PINCTRL_REG_BANKS];
+ void __iomem *sys2pci_base;
u32 status_ds[NUM_OF_IN_DISABLE_REG];
u32 status_dsv[NUM_OF_IN_DISABLE_REG];
struct atlas7_pad_status sleep_data[ATLAS7_PINCTRL_TOTAL_PINS];
@@ -885,11 +889,12 @@ static const unsigned int lr_lcdrom_pins[] = { 73, 54, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 56, 53, 55, };
static const unsigned int lvds_analog_pins[] = { 149, 150, 151, 152, 153, 154,
155, 156, 157, 158, };
-static const unsigned int nd_df_pins[] = { 44, 43, 42, 41, 40, 39, 38, 37,
- 47, 46, 52, 51, 45, 49, 50, 48, 124, };
-static const unsigned int nd_df_nowp_pins[] = { 44, 43, 42, 41, 40, 39, 38,
- 37, 47, 46, 52, 51, 45, 49, 50, 48, };
+static const unsigned int nd_df_basic_pins[] = { 44, 43, 42, 41, 40, 39, 38,
+ 37, 47, 46, 52, 45, 49, 50, 48, };
+static const unsigned int nd_df_wp_pins[] = { 124, };
+static const unsigned int nd_df_cs_pins[] = { 51, };
static const unsigned int ps_pins[] = { 120, 119, 121, };
+static const unsigned int ps_no_dir_pins[] = { 119, };
static const unsigned int pwc_core_on_pins[] = { 8, };
static const unsigned int pwc_ext_on_pins[] = { 6, };
static const unsigned int pwc_gpio3_clk_pins[] = { 3, };
@@ -944,7 +949,7 @@ static const unsigned int sd2_cdb_pins0[] = { 124, };
static const unsigned int sd2_cdb_pins1[] = { 161, };
static const unsigned int sd2_wpb_pins0[] = { 123, };
static const unsigned int sd2_wpb_pins1[] = { 163, };
-static const unsigned int sd3_pins[] = { 85, 86, 87, 88, 89, 90, };
+static const unsigned int sd3_9_pins[] = { 85, 86, 87, 88, 89, 90, };
static const unsigned int sd5_pins[] = { 91, 92, 93, 94, 95, 96, };
static const unsigned int sd6_pins0[] = { 79, 78, 74, 75, 76, 77, };
static const unsigned int sd6_pins1[] = { 101, 99, 100, 110, 109, 111, };
@@ -998,9 +1003,9 @@ static const unsigned int vi_vip1_ext_pins[] = { 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 108, 103, 104, 105, 106, 107, 102, 97, 98,
99, 100, };
static const unsigned int vi_vip1_low8bit_pins[] = { 74, 75, 76, 77, 78, 79,
- 80, 81, };
-static const unsigned int vi_vip1_high8bit_pins[] = { 82, 83, 84, 108, 103,
- 104, 105, 106, };
+ 80, 81, 82, 83, 84, };
+static const unsigned int vi_vip1_high8bit_pins[] = { 82, 83, 84, 103, 104,
+ 105, 106, 107, 102, 97, 98, };
/* definition of pin group table */
struct atlas7_pin_group altas7_pin_groups[] = {
@@ -1142,9 +1147,11 @@ struct atlas7_pin_group altas7_pin_groups[] = {
GROUP("ld_ldd_lck_grp", ld_ldd_lck_pins),
GROUP("lr_lcdrom_grp", lr_lcdrom_pins),
GROUP("lvds_analog_grp", lvds_analog_pins),
- GROUP("nd_df_grp", nd_df_pins),
- GROUP("nd_df_nowp_grp", nd_df_nowp_pins),
+ GROUP("nd_df_basic_grp", nd_df_basic_pins),
+ GROUP("nd_df_wp_grp", nd_df_wp_pins),
+ GROUP("nd_df_cs_grp", nd_df_cs_pins),
GROUP("ps_grp", ps_pins),
+ GROUP("ps_no_dir_grp", ps_no_dir_pins),
GROUP("pwc_core_on_grp", pwc_core_on_pins),
GROUP("pwc_ext_on_grp", pwc_ext_on_pins),
GROUP("pwc_gpio3_clk_grp", pwc_gpio3_clk_pins),
@@ -1196,7 +1203,7 @@ struct atlas7_pin_group altas7_pin_groups[] = {
GROUP("sd2_cdb_grp1", sd2_cdb_pins1),
GROUP("sd2_wpb_grp0", sd2_wpb_pins0),
GROUP("sd2_wpb_grp1", sd2_wpb_pins1),
- GROUP("sd3_grp", sd3_pins),
+ GROUP("sd3_9_grp", sd3_9_pins),
GROUP("sd5_grp", sd5_pins),
GROUP("sd6_grp0", sd6_pins0),
GROUP("sd6_grp1", sd6_pins1),
@@ -1421,9 +1428,11 @@ static const char * const ld_ldd_fck_grp[] = { "ld_ldd_fck_grp", };
static const char * const ld_ldd_lck_grp[] = { "ld_ldd_lck_grp", };
static const char * const lr_lcdrom_grp[] = { "lr_lcdrom_grp", };
static const char * const lvds_analog_grp[] = { "lvds_analog_grp", };
-static const char * const nd_df_grp[] = { "nd_df_grp", };
-static const char * const nd_df_nowp_grp[] = { "nd_df_nowp_grp", };
+static const char * const nd_df_basic_grp[] = { "nd_df_basic_grp", };
+static const char * const nd_df_wp_grp[] = { "nd_df_wp_grp", };
+static const char * const nd_df_cs_grp[] = { "nd_df_cs_grp", };
static const char * const ps_grp[] = { "ps_grp", };
+static const char * const ps_no_dir_grp[] = { "ps_no_dir_grp", };
static const char * const pwc_core_on_grp[] = { "pwc_core_on_grp", };
static const char * const pwc_ext_on_grp[] = { "pwc_ext_on_grp", };
static const char * const pwc_gpio3_clk_grp[] = { "pwc_gpio3_clk_grp", };
@@ -1478,7 +1487,7 @@ static const char * const sd2_cdb_grp0[] = { "sd2_cdb_grp0", };
static const char * const sd2_cdb_grp1[] = { "sd2_cdb_grp1", };
static const char * const sd2_wpb_grp0[] = { "sd2_wpb_grp0", };
static const char * const sd2_wpb_grp1[] = { "sd2_wpb_grp1", };
-static const char * const sd3_grp[] = { "sd3_grp", };
+static const char * const sd3_9_grp[] = { "sd3_9_grp", };
static const char * const sd5_grp[] = { "sd5_grp", };
static const char * const sd6_grp0[] = { "sd6_grp0", };
static const char * const sd6_grp1[] = { "sd6_grp1", };
@@ -3174,7 +3183,7 @@ static struct atlas7_grp_mux lvds_analog_grp_mux = {
.pad_mux_list = lvds_analog_grp_pad_mux,
};
-static struct atlas7_pad_mux nd_df_grp_pad_mux[] = {
+static struct atlas7_pad_mux nd_df_basic_grp_pad_mux[] = {
MUX(1, 44, 1, N, N, N, N),
MUX(1, 43, 1, N, N, N, N),
MUX(1, 42, 1, N, N, N, N),
@@ -3186,41 +3195,33 @@ static struct atlas7_pad_mux nd_df_grp_pad_mux[] = {
MUX(1, 47, 1, N, N, N, N),
MUX(1, 46, 1, N, N, N, N),
MUX(1, 52, 1, N, N, N, N),
- MUX(1, 51, 1, N, N, N, N),
MUX(1, 45, 1, N, N, N, N),
MUX(1, 49, 1, N, N, N, N),
MUX(1, 50, 1, N, N, N, N),
MUX(1, 48, 1, N, N, N, N),
+};
+
+static struct atlas7_grp_mux nd_df_basic_grp_mux = {
+ .pad_mux_count = ARRAY_SIZE(nd_df_basic_grp_pad_mux),
+ .pad_mux_list = nd_df_basic_grp_pad_mux,
+};
+
+static struct atlas7_pad_mux nd_df_wp_grp_pad_mux[] = {
MUX(1, 124, 4, N, N, N, N),
};
-static struct atlas7_grp_mux nd_df_grp_mux = {
- .pad_mux_count = ARRAY_SIZE(nd_df_grp_pad_mux),
- .pad_mux_list = nd_df_grp_pad_mux,
+static struct atlas7_grp_mux nd_df_wp_grp_mux = {
+ .pad_mux_count = ARRAY_SIZE(nd_df_wp_grp_pad_mux),
+ .pad_mux_list = nd_df_wp_grp_pad_mux,
};
-static struct atlas7_pad_mux nd_df_nowp_grp_pad_mux[] = {
- MUX(1, 44, 1, N, N, N, N),
- MUX(1, 43, 1, N, N, N, N),
- MUX(1, 42, 1, N, N, N, N),
- MUX(1, 41, 1, N, N, N, N),
- MUX(1, 40, 1, N, N, N, N),
- MUX(1, 39, 1, N, N, N, N),
- MUX(1, 38, 1, N, N, N, N),
- MUX(1, 37, 1, N, N, N, N),
- MUX(1, 47, 1, N, N, N, N),
- MUX(1, 46, 1, N, N, N, N),
- MUX(1, 52, 1, N, N, N, N),
+static struct atlas7_pad_mux nd_df_cs_grp_pad_mux[] = {
MUX(1, 51, 1, N, N, N, N),
- MUX(1, 45, 1, N, N, N, N),
- MUX(1, 49, 1, N, N, N, N),
- MUX(1, 50, 1, N, N, N, N),
- MUX(1, 48, 1, N, N, N, N),
};
-static struct atlas7_grp_mux nd_df_nowp_grp_mux = {
- .pad_mux_count = ARRAY_SIZE(nd_df_nowp_grp_pad_mux),
- .pad_mux_list = nd_df_nowp_grp_pad_mux,
+static struct atlas7_grp_mux nd_df_cs_grp_mux = {
+ .pad_mux_count = ARRAY_SIZE(nd_df_cs_grp_pad_mux),
+ .pad_mux_list = nd_df_cs_grp_pad_mux,
};
static struct atlas7_pad_mux ps_grp_pad_mux[] = {
@@ -3234,6 +3235,15 @@ static struct atlas7_grp_mux ps_grp_mux = {
.pad_mux_list = ps_grp_pad_mux,
};
+static struct atlas7_pad_mux ps_no_dir_grp_pad_mux[] = {
+ MUX(1, 119, 2, N, N, N, N),
+};
+
+static struct atlas7_grp_mux ps_no_dir_grp_mux = {
+ .pad_mux_count = ARRAY_SIZE(ps_no_dir_grp_pad_mux),
+ .pad_mux_list = ps_no_dir_grp_pad_mux,
+};
+
static struct atlas7_pad_mux pwc_core_on_grp_pad_mux[] = {
MUX(0, 8, 1, N, N, N, N),
};
@@ -3743,7 +3753,7 @@ static struct atlas7_grp_mux sd2_wpb_grp1_mux = {
.pad_mux_list = sd2_wpb_grp1_pad_mux,
};
-static struct atlas7_pad_mux sd3_grp_pad_mux[] = {
+static struct atlas7_pad_mux sd3_9_grp_pad_mux[] = {
MUX(1, 85, 1, N, N, N, N),
MUX(1, 86, 1, N, N, N, N),
MUX(1, 87, 1, N, N, N, N),
@@ -3752,9 +3762,9 @@ static struct atlas7_pad_mux sd3_grp_pad_mux[] = {
MUX(1, 90, 1, N, N, N, N),
};
-static struct atlas7_grp_mux sd3_grp_mux = {
- .pad_mux_count = ARRAY_SIZE(sd3_grp_pad_mux),
- .pad_mux_list = sd3_grp_pad_mux,
+static struct atlas7_grp_mux sd3_9_grp_mux = {
+ .pad_mux_count = ARRAY_SIZE(sd3_9_grp_pad_mux),
+ .pad_mux_list = sd3_9_grp_pad_mux,
};
static struct atlas7_pad_mux sd5_grp_pad_mux[] = {
@@ -4296,6 +4306,9 @@ static struct atlas7_pad_mux vi_vip1_low8bit_grp_pad_mux[] = {
MUX(1, 79, 1, N, N, N, N),
MUX(1, 80, 1, N, N, N, N),
MUX(1, 81, 1, N, N, N, N),
+ MUX(1, 82, 1, N, N, N, N),
+ MUX(1, 83, 1, N, N, N, N),
+ MUX(1, 84, 1, N, N, N, N),
};
static struct atlas7_grp_mux vi_vip1_low8bit_grp_mux = {
@@ -4307,11 +4320,14 @@ static struct atlas7_pad_mux vi_vip1_high8bit_grp_pad_mux[] = {
MUX(1, 82, 1, N, N, N, N),
MUX(1, 83, 1, N, N, N, N),
MUX(1, 84, 1, N, N, N, N),
- MUX(1, 108, 2, N, N, N, N),
MUX(1, 103, 2, N, N, N, N),
MUX(1, 104, 2, N, N, N, N),
MUX(1, 105, 2, N, N, N, N),
MUX(1, 106, 2, N, N, N, N),
+ MUX(1, 107, 2, N, N, N, N),
+ MUX(1, 102, 2, N, N, N, N),
+ MUX(1, 97, 2, N, N, N, N),
+ MUX(1, 98, 2, N, N, N, N),
};
static struct atlas7_grp_mux vi_vip1_high8bit_grp_mux = {
@@ -4598,9 +4614,11 @@ static struct atlas7_pmx_func atlas7_pmx_functions[] = {
FUNCTION("ld_ldd_lck", ld_ldd_lck_grp, &ld_ldd_lck_grp_mux),
FUNCTION("lr_lcdrom", lr_lcdrom_grp, &lr_lcdrom_grp_mux),
FUNCTION("lvds_analog", lvds_analog_grp, &lvds_analog_grp_mux),
- FUNCTION("nd_df", nd_df_grp, &nd_df_grp_mux),
- FUNCTION("nd_df_nowp", nd_df_nowp_grp, &nd_df_nowp_grp_mux),
+ FUNCTION("nd_df_basic", nd_df_basic_grp, &nd_df_basic_grp_mux),
+ FUNCTION("nd_df_wp", nd_df_wp_grp, &nd_df_wp_grp_mux),
+ FUNCTION("nd_df_cs", nd_df_cs_grp, &nd_df_cs_grp_mux),
FUNCTION("ps", ps_grp, &ps_grp_mux),
+ FUNCTION("ps_no_dir", ps_no_dir_grp, &ps_no_dir_grp_mux),
FUNCTION("pwc_core_on", pwc_core_on_grp, &pwc_core_on_grp_mux),
FUNCTION("pwc_ext_on", pwc_ext_on_grp, &pwc_ext_on_grp_mux),
FUNCTION("pwc_gpio3_clk", pwc_gpio3_clk_grp, &pwc_gpio3_clk_grp_mux),
@@ -4686,10 +4704,11 @@ static struct atlas7_pmx_func atlas7_pmx_functions[] = {
FUNCTION("sd2_cdb_m1", sd2_cdb_grp1, &sd2_cdb_grp1_mux),
FUNCTION("sd2_wpb_m0", sd2_wpb_grp0, &sd2_wpb_grp0_mux),
FUNCTION("sd2_wpb_m1", sd2_wpb_grp1, &sd2_wpb_grp1_mux),
- FUNCTION("sd3", sd3_grp, &sd3_grp_mux),
+ FUNCTION("sd3", sd3_9_grp, &sd3_9_grp_mux),
FUNCTION("sd5", sd5_grp, &sd5_grp_mux),
FUNCTION("sd6_m0", sd6_grp0, &sd6_grp0_mux),
FUNCTION("sd6_m1", sd6_grp1, &sd6_grp1_mux),
+ FUNCTION("sd9", sd3_9_grp, &sd3_9_grp_mux),
FUNCTION("sp0_ext_ldo_on",
sp0_ext_ldo_on_grp,
&sp0_ext_ldo_on_grp_mux),
@@ -5097,6 +5116,14 @@ static int atlas7_pmx_set_mux(struct pinctrl_dev *pctldev,
pr_debug("PMX DUMP ### Function:[%s] Group:[%s] #### START >>>\n",
pmx_func->name, pin_grp->name);
+ /* the sd3 and sd9 pin select by SYS2PCI_SDIO9SEL register */
+ if (pin_grp->pins == (unsigned int *)&sd3_9_pins) {
+ if (!strcmp(pmx_func->name, "sd9"))
+ writel(1, pmx->sys2pci_base + SYS2PCI_SDIO9SEL);
+ else
+ writel(0, pmx->sys2pci_base + SYS2PCI_SDIO9SEL);
+ }
+
grp_mux = pmx_func->grpmux;
for (idx = 0; idx < grp_mux->pad_mux_count; idx++) {
@@ -5385,12 +5412,27 @@ static int atlas7_pinmux_probe(struct platform_device *pdev)
struct atlas7_pmx *pmx;
struct device_node *np = pdev->dev.of_node;
u32 banks = ATLAS7_PINCTRL_REG_BANKS;
+ struct device_node *sys2pci_np;
+ struct resource res;
/* Create state holders etc for this driver */
pmx = devm_kzalloc(&pdev->dev, sizeof(*pmx), GFP_KERNEL);
if (!pmx)
return -ENOMEM;
+ /* The sd3 and sd9 shared all pins, and the function select by
+ * SYS2PCI_SDIO9SEL register
+ */
+ sys2pci_np = of_find_node_by_name(NULL, "sys2pci");
+ if (!sys2pci_np)
+ return -EINVAL;
+ ret = of_address_to_resource(sys2pci_np, 0, &res);
+ if (ret)
+ return ret;
+ pmx->sys2pci_base = devm_ioremap_resource(&pdev->dev, &res);
+ if (IS_ERR(pmx->sys2pci_base))
+ return -ENOMEM;
+
pmx->dev = &pdev->dev;
pmx->pctl_data = &atlas7_ioc_data;
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index 2a8d69725de8..edf40df05ec0 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -85,12 +85,16 @@ static int sirfsoc_dt_node_to_map(struct pinctrl_dev *pctldev,
/* calculate number of maps required */
for_each_child_of_node(np_config, np) {
ret = of_property_read_string(np, "sirf,function", &function);
- if (ret < 0)
+ if (ret < 0) {
+ of_node_put(np);
return ret;
+ }
ret = of_property_count_strings(np, "sirf,pins");
- if (ret < 0)
+ if (ret < 0) {
+ of_node_put(np);
return ret;
+ }
count += ret;
}
diff --git a/drivers/pinctrl/spear/Makefile b/drivers/pinctrl/spear/Makefile
index 0e400ebeb8ff..37b8412ac8a3 100644
--- a/drivers/pinctrl/spear/Makefile
+++ b/drivers/pinctrl/spear/Makefile
@@ -1,7 +1,7 @@
# SPEAr pinmux support
obj-$(CONFIG_PINCTRL_SPEAR_PLGPIO) += pinctrl-plgpio.o
-obj-$(CONFIG_PINCTRL_SPEAR) += pinctrl-spear.o
+obj-y += pinctrl-spear.o
obj-$(CONFIG_PINCTRL_SPEAR3XX) += pinctrl-spear3xx.o
obj-$(CONFIG_PINCTRL_SPEAR300) += pinctrl-spear300.o
obj-$(CONFIG_PINCTRL_SPEAR310) += pinctrl-spear310.o
diff --git a/drivers/pinctrl/sunxi/Kconfig b/drivers/pinctrl/sunxi/Kconfig
index e68fd951129a..f8dbc8bec0e1 100644
--- a/drivers/pinctrl/sunxi/Kconfig
+++ b/drivers/pinctrl/sunxi/Kconfig
@@ -51,8 +51,17 @@ config PINCTRL_SUN8I_A23_R
depends on RESET_CONTROLLER
select PINCTRL_SUNXI_COMMON
+config PINCTRL_SUN8I_H3
+ def_bool MACH_SUN8I
+ select PINCTRL_SUNXI_COMMON
+
config PINCTRL_SUN9I_A80
def_bool MACH_SUN9I
select PINCTRL_SUNXI_COMMON
+config PINCTRL_SUN9I_A80_R
+ def_bool MACH_SUN9I
+ depends on RESET_CONTROLLER
+ select PINCTRL_SUNXI_COMMON
+
endif
diff --git a/drivers/pinctrl/sunxi/Makefile b/drivers/pinctrl/sunxi/Makefile
index e08029034510..ef82f22bb9ef 100644
--- a/drivers/pinctrl/sunxi/Makefile
+++ b/drivers/pinctrl/sunxi/Makefile
@@ -13,4 +13,6 @@ obj-$(CONFIG_PINCTRL_SUN8I_A23) += pinctrl-sun8i-a23.o
obj-$(CONFIG_PINCTRL_SUN8I_A23_R) += pinctrl-sun8i-a23-r.o
obj-$(CONFIG_PINCTRL_SUN8I_A33) += pinctrl-sun8i-a33.o
obj-$(CONFIG_PINCTRL_SUN8I_A83T) += pinctrl-sun8i-a83t.o
+obj-$(CONFIG_PINCTRL_SUN8I_H3) += pinctrl-sun8i-h3.o
obj-$(CONFIG_PINCTRL_SUN9I_A80) += pinctrl-sun9i-a80.o
+obj-$(CONFIG_PINCTRL_SUN9I_A80_R) += pinctrl-sun9i-a80-r.o
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
new file mode 100644
index 000000000000..77d4cf047cee
--- /dev/null
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-h3.c
@@ -0,0 +1,515 @@
+/*
+ * Allwinner H3 SoCs pinctrl driver.
+ *
+ * Copyright (C) 2015 Jens Kuske <jenskuske@gmail.com>
+ *
+ * Based on pinctrl-sun8i-a23.c, which is:
+ * Copyright (C) 2014 Chen-Yu Tsai <wens@csie.org>
+ * Copyright (C) 2014 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+
+#include "pinctrl-sunxi.h"
+
+static const struct sunxi_desc_pin sun8i_h3_pins[] = {
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart2"), /* TX */
+ SUNXI_FUNCTION(0x3, "jtag"), /* MS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 0)), /* PA_EINT0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart2"), /* RX */
+ SUNXI_FUNCTION(0x3, "jtag"), /* CK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 1)), /* PA_EINT1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart2"), /* RTS */
+ SUNXI_FUNCTION(0x3, "jtag"), /* DO */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 2)), /* PA_EINT2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart2"), /* CTS */
+ SUNXI_FUNCTION(0x3, "jtag"), /* DI */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 3)), /* PA_EINT3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart0"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 4)), /* PA_EINT4 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart0"), /* RX */
+ SUNXI_FUNCTION(0x3, "pwm0"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 5)), /* PA_EINT5 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "sim"), /* PWREN */
+ SUNXI_FUNCTION(0x3, "pwm1"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 6)), /* PA_EINT6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "sim"), /* CLK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 7)), /* PA_EINT7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "sim"), /* DATA */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 8)), /* PA_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "sim"), /* RST */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 9)), /* PA_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "sim"), /* DET */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 10)), /* PA_EINT10 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2c0"), /* SCK */
+ SUNXI_FUNCTION(0x3, "di"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 11)), /* PA_EINT11 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2c0"), /* SDA */
+ SUNXI_FUNCTION(0x3, "di"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 12)), /* PA_EINT12 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "spi1"), /* CS */
+ SUNXI_FUNCTION(0x3, "uart3"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 13)), /* PA_EINT13 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "spi1"), /* CLK */
+ SUNXI_FUNCTION(0x3, "uart3"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 14)), /* PA_EINT14 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "spi1"), /* MOSI */
+ SUNXI_FUNCTION(0x3, "uart3"), /* RTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 15)), /* PA_EINT15 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 16),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "spi1"), /* MISO */
+ SUNXI_FUNCTION(0x3, "uart3"), /* CTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 16)), /* PA_EINT16 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 17),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "spdif"), /* OUT */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 17)), /* PA_EINT17 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 18),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s0"), /* SYNC */
+ SUNXI_FUNCTION(0x3, "i2c1"), /* SCK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 18)), /* PA_EINT18 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 19),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s0"), /* CLK */
+ SUNXI_FUNCTION(0x3, "i2c1"), /* SDA */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 19)), /* PA_EINT19 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 20),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s0"), /* DOUT */
+ SUNXI_FUNCTION(0x3, "sim"), /* VPPEN */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 20)), /* PA_EINT20 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(A, 21),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s0"), /* DIN */
+ SUNXI_FUNCTION(0x3, "sim"), /* VPPPP */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 21)), /* PA_EINT21 */
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* WE */
+ SUNXI_FUNCTION(0x3, "spi0")), /* MOSI */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* ALE */
+ SUNXI_FUNCTION(0x3, "spi0")), /* MISO */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* CLE */
+ SUNXI_FUNCTION(0x3, "spi0")), /* CLK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* CE1 */
+ SUNXI_FUNCTION(0x3, "spi0")), /* CS */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0")), /* CE0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* RE */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* CLK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* RB0 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* CMD */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0")), /* RB1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ0 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ1 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ2 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ3 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ4 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D4 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ5 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D5 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "nand"), /* DQS */
+ SUNXI_FUNCTION(0x3, "mmc2")), /* RST */
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXD3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXD2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXD1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXD0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXCK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXCTL/RXDV */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* RXERR */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXD3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXD2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXD1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXD0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* CRS */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXCK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXCTL/TXEN */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* TXERR */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* CLKIN/COL */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 16),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* MDC */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 17),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "emac")), /* MDIO */
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* PCLK */
+ SUNXI_FUNCTION(0x3, "ts")), /* CLK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* MCLK */
+ SUNXI_FUNCTION(0x3, "ts")), /* ERR */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* HSYNC */
+ SUNXI_FUNCTION(0x3, "ts")), /* SYNC */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* VSYNC */
+ SUNXI_FUNCTION(0x3, "ts")), /* DVLD */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D0 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D1 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D2 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D3 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D4 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D4 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D5 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D5 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D6 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* D7 */
+ SUNXI_FUNCTION(0x3, "ts")), /* D7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* SCK */
+ SUNXI_FUNCTION(0x3, "i2c2")), /* SCK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "csi"), /* SDA */
+ SUNXI_FUNCTION(0x3, "i2c2")), /* SDA */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out")),
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out")),
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* D1 */
+ SUNXI_FUNCTION(0x3, "jtag")), /* MS */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* D0 */
+ SUNXI_FUNCTION(0x3, "jtag")), /* DI */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* CLK */
+ SUNXI_FUNCTION(0x3, "uart0")), /* TX */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* CMD */
+ SUNXI_FUNCTION(0x3, "jtag")), /* DO */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* D3 */
+ SUNXI_FUNCTION(0x3, "uart0")), /* RX */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc0"), /* D2 */
+ SUNXI_FUNCTION(0x3, "jtag")), /* CK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(F, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out")),
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* CLK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 0)), /* PG_EINT0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* CMD */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 1)), /* PG_EINT1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* D0 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 2)), /* PG_EINT2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* D1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 3)), /* PG_EINT3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* D2 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 4)), /* PG_EINT4 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "mmc1"), /* D3 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 5)), /* PG_EINT5 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart1"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 6)), /* PG_EINT6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart1"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 7)), /* PG_EINT7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart1"), /* RTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 8)), /* PG_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "uart1"), /* CTS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 9)), /* PG_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s1"), /* SYNC */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 10)), /* PG_EINT10 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s1"), /* CLK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 11)), /* PG_EINT11 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s1"), /* DOUT */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 12)), /* PG_EINT12 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "i2s1"), /* DIN */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 13)), /* PG_EINT13 */
+};
+
+static const struct sunxi_pinctrl_desc sun8i_h3_pinctrl_data = {
+ .pins = sun8i_h3_pins,
+ .npins = ARRAY_SIZE(sun8i_h3_pins),
+ .irq_banks = 2,
+};
+
+static int sun8i_h3_pinctrl_probe(struct platform_device *pdev)
+{
+ return sunxi_pinctrl_init(pdev,
+ &sun8i_h3_pinctrl_data);
+}
+
+static const struct of_device_id sun8i_h3_pinctrl_match[] = {
+ { .compatible = "allwinner,sun8i-h3-pinctrl", },
+ {}
+};
+
+static struct platform_driver sun8i_h3_pinctrl_driver = {
+ .probe = sun8i_h3_pinctrl_probe,
+ .driver = {
+ .name = "sun8i-h3-pinctrl",
+ .of_match_table = sun8i_h3_pinctrl_match,
+ },
+};
+builtin_platform_driver(sun8i_h3_pinctrl_driver);
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun9i-a80-r.c b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80-r.c
new file mode 100644
index 000000000000..42547ffa20a8
--- /dev/null
+++ b/drivers/pinctrl/sunxi/pinctrl-sun9i-a80-r.c
@@ -0,0 +1,181 @@
+/*
+ * Allwinner A80 SoCs special pins pinctrl driver.
+ *
+ * Copyright (C) 2014 Maxime Ripard
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/reset.h>
+
+#include "pinctrl-sunxi.h"
+
+static const struct sunxi_desc_pin sun9i_a80_r_pins[] = {
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_uart"), /* TX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 0)), /* PL_EINT0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_uart"), /* RX */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 1)), /* PL_EINT1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_jtag"), /* TMS */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 2)), /* PL_EINT2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_jtag"), /* TCK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 3)), /* PL_EINT3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_jtag"), /* TDO */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 4)), /* PL_EINT4 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 5),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_jtag"), /* TDI */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 5)), /* PL_EINT5 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 6),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_cir_rx"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 6)), /* PL_EINT6 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 7),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "1wire"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 7)), /* PL_EINT7 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_ps2"), /* SCK1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 8)), /* PL_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(L, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_ps2"), /* SDA1 */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 0, 9)), /* PL_EINT9 */
+
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 0)), /* PM_EINT0 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 1)), /* PM_EINT1 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 2),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 2)), /* PM_EINT2 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 3),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 3)), /* PM_EINT3 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 4),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_i2s1"), /* LRCKR */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 4)), /* PM_EINT4 */
+
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 8),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_i2c1"), /* SCK */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 8)), /* PM_EINT8 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 9),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x3, "s_i2c1"), /* SDA */
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 9)), /* PM_EINT9 */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 10),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2s0"), /* MCLK */
+ SUNXI_FUNCTION(0x3, "s_i2s1")), /* MCLK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 11),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2s0"), /* BCLK */
+ SUNXI_FUNCTION(0x3, "s_i2s1")), /* BCLK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 12),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2s0"), /* LRCK */
+ SUNXI_FUNCTION(0x3, "s_i2s1")), /* LRCK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 13),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2s0"), /* DIN */
+ SUNXI_FUNCTION(0x3, "s_i2s1")), /* DIN */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 14),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2s0"), /* DOUT */
+ SUNXI_FUNCTION(0x3, "s_i2s1")), /* DOUT */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(M, 15),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION_IRQ_BANK(0x6, 1, 15)), /* PM_EINT15 */
+
+ /* Hole */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(N, 0),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2c0"), /* SCK */
+ SUNXI_FUNCTION(0x3, "s_rsb")), /* SCK */
+ SUNXI_PIN(SUNXI_PINCTRL_PIN(N, 1),
+ SUNXI_FUNCTION(0x0, "gpio_in"),
+ SUNXI_FUNCTION(0x1, "gpio_out"),
+ SUNXI_FUNCTION(0x2, "s_i2c0"), /* SDA */
+ SUNXI_FUNCTION(0x3, "s_rsb")), /* SDA */
+};
+
+static const struct sunxi_pinctrl_desc sun9i_a80_r_pinctrl_data = {
+ .pins = sun9i_a80_r_pins,
+ .npins = ARRAY_SIZE(sun9i_a80_r_pins),
+ .pin_base = PL_BASE,
+ .irq_banks = 2,
+};
+
+static int sun9i_a80_r_pinctrl_probe(struct platform_device *pdev)
+{
+ return sunxi_pinctrl_init(pdev,
+ &sun9i_a80_r_pinctrl_data);
+}
+
+static const struct of_device_id sun9i_a80_r_pinctrl_match[] = {
+ { .compatible = "allwinner,sun9i-a80-r-pinctrl", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, sun9i_a80_r_pinctrl_match);
+
+static struct platform_driver sun9i_a80_r_pinctrl_driver = {
+ .probe = sun9i_a80_r_pinctrl_probe,
+ .driver = {
+ .name = "sun9i-a80-r-pinctrl",
+ .owner = THIS_MODULE,
+ .of_match_table = sun9i_a80_r_pinctrl_match,
+ },
+};
+module_platform_driver(sun9i_a80_r_pinctrl_driver);
+
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com");
+MODULE_DESCRIPTION("Allwinner A80 R_PIO pinctrl driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pinctrl/uniphier/Kconfig b/drivers/pinctrl/uniphier/Kconfig
index ad907072e09f..7abd614dc383 100644
--- a/drivers/pinctrl/uniphier/Kconfig
+++ b/drivers/pinctrl/uniphier/Kconfig
@@ -1,32 +1,35 @@
-if ARCH_UNIPHIER
-
-config PINCTRL_UNIPHIER
- bool
+menuconfig PINCTRL_UNIPHIER
+ bool "UniPhier SoC pinctrl drivers"
+ depends on ARCH_UNIPHIER
+ depends on OF && MFD_SYSCON
+ default y
select PINMUX
select GENERIC_PINCONF
+if PINCTRL_UNIPHIER
+
config PINCTRL_UNIPHIER_PH1_LD4
tristate "UniPhier PH1-LD4 SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
config PINCTRL_UNIPHIER_PH1_PRO4
tristate "UniPhier PH1-Pro4 SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
config PINCTRL_UNIPHIER_PH1_SLD8
tristate "UniPhier PH1-sLD8 SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
config PINCTRL_UNIPHIER_PH1_PRO5
tristate "UniPhier PH1-Pro5 SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
config PINCTRL_UNIPHIER_PROXSTREAM2
tristate "UniPhier ProXstream2 SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
config PINCTRL_UNIPHIER_PH1_LD6B
tristate "UniPhier PH1-LD6b SoC pinctrl driver"
- select PINCTRL_UNIPHIER
+ default y
endif
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3d22fc3e3c1a..4e08d1cd704d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2885,10 +2885,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
/*
* Use the device's preferred I/O size for reads and writes
- * unless the reported value is unreasonably large (or garbage).
+ * unless the reported value is unreasonably small, large, or
+ * garbage.
*/
- if (sdkp->opt_xfer_blocks && sdkp->opt_xfer_blocks <= dev_max &&
- sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS)
+ if (sdkp->opt_xfer_blocks &&
+ sdkp->opt_xfer_blocks <= dev_max &&
+ sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
+ sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE)
rw_max = q->limits.io_opt =
logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
else
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 69b203651905..e489a3271f06 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -118,12 +118,20 @@ failed:
return rc;
}
-static const char *ll_follow_link(struct dentry *dentry, void **cookie)
+static void ll_put_link(void *p)
+{
+ ptlrpc_req_finished(p);
+}
+
+static const char *ll_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct ptlrpc_request *request = NULL;
int rc;
char *symname = NULL;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
CDEBUG(D_VFSTRACE, "VFS Op\n");
ll_inode_size_lock(inode);
@@ -135,22 +143,16 @@ static const char *ll_follow_link(struct dentry *dentry, void **cookie)
}
/* symname may contain a pointer to the request message buffer,
- * we delay request releasing until ll_put_link then.
+ * we delay request releasing then.
*/
- *cookie = request;
+ set_delayed_call(done, ll_put_link, request);
return symname;
}
-static void ll_put_link(struct inode *unused, void *cookie)
-{
- ptlrpc_req_finished(cookie);
-}
-
struct inode_operations ll_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.setattr = ll_setattr,
- .follow_link = ll_follow_link,
- .put_link = ll_put_link,
+ .get_link = ll_get_link,
.getattr = ll_getattr,
.permission = ll_inode_permission,
.setxattr = ll_setxattr,
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 4b7eb33f7d01..660b8ac37ae0 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -60,10 +60,10 @@
static
int get_xattr_type(const char *name)
{
- if (!strcmp(name, POSIX_ACL_XATTR_ACCESS))
+ if (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS))
return XATTR_ACL_ACCESS_T;
- if (!strcmp(name, POSIX_ACL_XATTR_DEFAULT))
+ if (!strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT))
return XATTR_ACL_DEFAULT_T;
if (!strncmp(name, XATTR_USER_PREFIX,
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 5381a728d23e..e5139402e7f8 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -133,6 +133,12 @@ static void sysrq_handle_crash(int key)
{
char *killer = NULL;
+ /* we need to release the RCU read lock here,
+ * otherwise we get an annoying
+ * 'BUG: sleeping function called from invalid context'
+ * complaint from the kernel before the panic.
+ */
+ rcu_read_unlock();
panic_on_oops = 1; /* force panic */
wmb();
*killer = 1;
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index a7e28890f5ef..9da967f38387 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -67,8 +67,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
return 0;
}
/* get the default/access acl values and cache them */
- dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
- pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+ dacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_DEFAULT);
+ pacl = __v9fs_get_acl(fid, XATTR_NAME_POSIX_ACL_ACCESS);
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
@@ -133,10 +133,10 @@ static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
goto err_free_out;
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -220,15 +220,12 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* We allow set/get/list of acl when access=client is not specified
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_get(dentry, handler->prefix, buffer, size);
+ return v9fs_xattr_get(dentry, handler->name, buffer, size);
acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags);
if (IS_ERR(acl))
@@ -250,16 +247,13 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
struct v9fs_session_info *v9ses;
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
v9ses = v9fs_dentry2v9ses(dentry);
/*
* set the attribute on the remote. Without even looking at the
* xattr value. We leave it to the server to validate
*/
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
- return v9fs_xattr_set(dentry, handler->prefix, value, size,
+ return v9fs_xattr_set(dentry, handler->name, value, size,
flags);
if (S_ISLNK(inode->i_mode))
@@ -319,7 +313,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
default:
BUG();
}
- retval = v9fs_xattr_set(dentry, handler->prefix, value, size, flags);
+ retval = v9fs_xattr_set(dentry, handler->name, value, size, flags);
if (!retval)
set_cached_acl(inode, handler->flags, acl);
err_out:
@@ -328,14 +322,14 @@ err_out:
}
const struct xattr_handler v9fs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
};
const struct xattr_handler v9fs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.get = v9fs_xattr_get_acl,
.set = v9fs_xattr_set_acl,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 511078586fa1..c7cc7c30f0c8 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1223,18 +1223,26 @@ ino_t v9fs_qid2ino(struct p9_qid *qid)
}
/**
- * v9fs_vfs_follow_link - follow a symlink path
+ * v9fs_vfs_get_link - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: delayed call for when we are done with the return value
*/
-static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *v9fs_vfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry);
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid;
struct p9_wstat *st;
char *res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ v9ses = v9fs_dentry2v9ses(dentry);
+ fid = v9fs_fid_lookup(dentry);
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
if (IS_ERR(fid))
@@ -1259,7 +1267,8 @@ static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie)
p9stat_free(st);
kfree(st);
- return *cookie = res;
+ set_delayed_call(done, kfree_link, res);
+ return res;
}
/**
@@ -1452,8 +1461,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cb899af1babc..a34702c998f5 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -899,26 +899,34 @@ error:
}
/**
- * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * v9fs_vfs_get_link_dotl - follow a symlink path
* @dentry: dentry for symlink
- * @cookie: place to pass the data to put_link()
+ * @inode: inode for symlink
+ * @done: destructor for return value
*/
static const char *
-v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie)
+v9fs_vfs_get_link_dotl(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct p9_fid *fid = v9fs_fid_lookup(dentry);
+ struct p9_fid *fid;
char *target;
int retval;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
+ fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return ERR_CAST(fid);
retval = p9_client_readlink(fid, &target);
if (retval)
return ERR_PTR(retval);
- return *cookie = target;
+ set_delayed_call(done, kfree_link, target);
+ return target;
}
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
@@ -984,8 +992,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
const struct inode_operations v9fs_symlink_inode_operations_dotl = {
.readlink = generic_readlink,
- .follow_link = v9fs_vfs_follow_link_dotl,
- .put_link = kfree_put_link,
+ .get_link = v9fs_vfs_get_link_dotl,
.getattr = v9fs_vfs_getattr_dotl,
.setattr = v9fs_vfs_setattr_dotl,
.setxattr = generic_setxattr,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index e3d026ac382e..9dd9b47a6c1a 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -143,8 +143,6 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_get(dentry, full_name, buffer, size);
}
@@ -154,8 +152,6 @@ static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, name);
- if (strcmp(name, "") == 0)
- return -EINVAL;
return v9fs_xattr_set(dentry, full_name, value, size, flags);
}
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 17349500592d..0fdb0f5b2239 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -140,6 +140,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
break;
case ST_SOFTLINK:
inode->i_mode |= S_IFLNK;
+ inode_nohighmem(inode);
inode->i_op = &affs_symlink_inode_operations;
inode->i_data.a_ops = &affs_symlink_aops;
break;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 181e05b46e72..00d3002a6780 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -344,6 +344,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
return -ENOSPC;
inode->i_op = &affs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
mode_to_prot(inode);
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index ea5b69a18ba9..69b03dbb792f 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -14,13 +14,13 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
{
struct buffer_head *bh;
struct inode *inode = page->mapping->host;
- char *link = kmap(page);
+ char *link = page_address(page);
struct slink_front *lf;
int i, j;
char c;
char lc;
- pr_debug("follow_link(ino=%lu)\n", inode->i_ino);
+ pr_debug("get_link(ino=%lu)\n", inode->i_ino);
bh = affs_bread(inode->i_sb, inode->i_ino);
if (!bh)
@@ -57,12 +57,10 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
link[i] = '\0';
affs_brelse(bh);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
@@ -73,7 +71,6 @@ const struct address_space_operations affs_symlink_aops = {
const struct inode_operations affs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = affs_notify_change,
};
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e06f5a23352a..86cc7264c21c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -56,6 +56,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
case AFS_FTYPE_SYMLINK:
inode->i_mode = S_IFLNK | vnode->status.mode;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
default:
printk("kAFS: AFS vnode with undefined type\n");
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index da0c33481bc0..84e037d1d129 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -12,10 +12,16 @@
#include "autofs_i.h"
-static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
+static const char *autofs4_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_sb_info *sbi;
+ struct autofs_info *ino;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ sbi = autofs4_sbi(dentry->d_sb);
+ ino = autofs4_dentry_ino(dentry);
if (ino && !autofs4_oz_mode(sbi))
ino->last_used = jiffies;
return d_inode(dentry)->i_private;
@@ -23,5 +29,5 @@ static const char *autofs4_follow_link(struct dentry *dentry, void **cookie)
const struct inode_operations autofs4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = autofs4_follow_link
+ .get_link = autofs4_get_link
};
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 46aedacfa6a8..25250fa87086 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -42,7 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long);
static struct inode *befs_alloc_inode(struct super_block *sb);
static void befs_destroy_inode(struct inode *inode);
static void befs_destroy_inodecache(void);
-static const char *befs_follow_link(struct dentry *, void **);
+static int befs_symlink_readpage(struct file *, struct page *);
static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
@@ -79,10 +79,8 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static const struct inode_operations befs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = befs_follow_link,
- .put_link = kfree_put_link,
+static const struct address_space_operations befs_symlink_aops = {
+ .readpage = befs_symlink_readpage,
};
/*
@@ -398,7 +396,9 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &befs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
- inode->i_op = &befs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
+ inode->i_mapping->a_ops = &befs_symlink_aops;
} else {
inode->i_link = befs_ino->i_data.symlink;
inode->i_op = &simple_symlink_inode_operations;
@@ -463,31 +463,33 @@ befs_destroy_inodecache(void)
* The data stream become link name. Unless the LONG_SYMLINK
* flag is set.
*/
-static const char *
-befs_follow_link(struct dentry *dentry, void **cookie)
+static int befs_symlink_readpage(struct file *unused, struct page *page)
{
- struct super_block *sb = dentry->d_sb;
- struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
+ struct inode *inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct befs_inode_info *befs_ino = BEFS_I(inode);
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
- char *link;
+ char *link = page_address(page);
- if (len == 0) {
+ if (len == 0 || len > PAGE_SIZE) {
befs_error(sb, "Long symlink with illegal length");
- return ERR_PTR(-EIO);
+ goto fail;
}
befs_debug(sb, "Follow long symlink");
- link = kmalloc(len, GFP_NOFS);
- if (!link)
- return ERR_PTR(-ENOMEM);
if (befs_read_lsymlink(sb, data, link, len) != len) {
- kfree(link);
befs_error(sb, "Failed to read entire long symlink");
- return ERR_PTR(-EIO);
+ goto fail;
}
link[len - 1] = '\0';
- return *cookie = link;
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+fail:
+ SetPageError(page);
+ unlock_page(page);
+ return -EIO;
}
/*
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9a0124a95851..f89db0c21b51 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,10 +37,10 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -81,7 +81,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &inode->i_mode);
if (ret < 0)
@@ -94,7 +94,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
return acl ? -EINVAL : 0;
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 974be09e7556..42a378a4eefb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -923,7 +923,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
- if (cpu_has_xmm4_2)
+ if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a70c5790f8f5..3b8856e182ae 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3550,10 +3550,10 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
int scanned = 0;
if (!xattr_access) {
- xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
- xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT));
+ xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
+ xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
}
slot++;
@@ -3774,6 +3774,7 @@ cache_acl:
break;
case S_IFLNK:
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
break;
default:
@@ -9705,6 +9706,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
btrfs_free_path(path);
inode->i_op = &btrfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len);
@@ -9994,7 +9996,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10071,7 +10073,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
@@ -10085,7 +10087,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.get_acl = btrfs_get_acl,
@@ -10094,13 +10096,12 @@ static const struct inode_operations btrfs_special_inode_operations = {
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
+ .getxattr = generic_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.update_time = btrfs_update_time,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 1fcd7b6e7564..7cbef1a14fe1 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -351,137 +351,89 @@ err:
return ret;
}
-/*
- * List of handlers for synthetic system.* attributes. All real ondisk
- * attributes are handled directly.
- */
-const struct xattr_handler *btrfs_xattr_handlers[] = {
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
- NULL,
-};
-
-/*
- * Check if the attribute is in a supported namespace.
- *
- * This is applied after the check for the synthetic attributes in the system
- * namespace.
- */
-static int btrfs_is_valid_xattr(const char *name)
+static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
{
- int len = strlen(name);
- int prefixlen = 0;
-
- if (!strncmp(name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN))
- prefixlen = XATTR_SECURITY_PREFIX_LEN;
- else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- prefixlen = XATTR_SYSTEM_PREFIX_LEN;
- else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
- prefixlen = XATTR_TRUSTED_PREFIX_LEN;
- else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- prefixlen = XATTR_USER_PREFIX_LEN;
- else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- prefixlen = XATTR_BTRFS_PREFIX_LEN;
- else
- return -EOPNOTSUPP;
-
- /*
- * The name cannot consist of just prefix
- */
- if (len <= prefixlen)
- return -EINVAL;
+ struct inode *inode = d_inode(dentry);
- return 0;
+ name = xattr_full_name(handler, name);
+ return __btrfs_getxattr(inode, name, buffer, size);
}
-ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ const void *buffer, size_t size,
+ int flags)
{
- int ret;
+ struct inode *inode = d_inode(dentry);
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
+ name = xattr_full_name(handler, name);
+ return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+}
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
- return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
+static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ name = xattr_full_name(handler, name);
+ return btrfs_set_prop(d_inode(dentry), name, value, size, flags);
}
+static const struct xattr_handler btrfs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set,
+};
+
+static const struct xattr_handler btrfs_btrfs_xattr_handler = {
+ .prefix = XATTR_BTRFS_PREFIX,
+ .get = btrfs_xattr_handler_get,
+ .set = btrfs_xattr_handler_set_prop,
+};
+
+const struct xattr_handler *btrfs_xattr_handlers[] = {
+ &btrfs_security_xattr_handler,
+#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &btrfs_trusted_xattr_handler,
+ &btrfs_user_xattr_handler,
+ &btrfs_btrfs_xattr_handler,
+ NULL,
+};
+
int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- value, size, flags);
-
- if (size == 0)
- value = ""; /* empty EA, do not remove */
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
- flags);
+ return generic_setxattr(dentry, name, value, size, flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
{
struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- int ret;
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
if (btrfs_root_readonly(root))
return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
-
- ret = btrfs_is_valid_xattr(name);
- if (ret)
- return ret;
-
- if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- return btrfs_set_prop(d_inode(dentry), name,
- NULL, 0, XATTR_REPLACE);
-
- return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
- XATTR_REPLACE);
+ return generic_removexattr(dentry, name);
}
static int btrfs_initxattrs(struct inode *inode,
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 5049608d1388..96807b3d22f5 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -28,8 +28,6 @@ extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
struct inode *inode, const char *name,
const void *value, size_t size, int flags);
-extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8f84646f10e9..f19708487e2f 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -49,10 +49,10 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -92,7 +92,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_equiv_mode(acl, &new_mode);
if (ret < 0)
@@ -106,7 +106,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
ret = acl ? -EINVAL : 0;
goto out;
}
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
ret = -EINVAL;
@@ -202,11 +202,11 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1);
if (acl) {
- size_t len = strlen(POSIX_ACL_XATTR_ACCESS);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_ACCESS);
err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8);
if (err)
goto out_err;
- ceph_pagelist_encode_string(pagelist, POSIX_ACL_XATTR_ACCESS,
+ ceph_pagelist_encode_string(pagelist, XATTR_NAME_POSIX_ACL_ACCESS,
len);
err = posix_acl_to_xattr(&init_user_ns, acl,
tmp_buf, val_size1);
@@ -216,12 +216,12 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
ceph_pagelist_append(pagelist, tmp_buf, val_size1);
}
if (default_acl) {
- size_t len = strlen(POSIX_ACL_XATTR_DEFAULT);
+ size_t len = strlen(XATTR_NAME_POSIX_ACL_DEFAULT);
err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
if (err)
goto out_err;
err = ceph_pagelist_encode_string(pagelist,
- POSIX_ACL_XATTR_DEFAULT, len);
+ XATTR_NAME_POSIX_ACL_DEFAULT, len);
err = posix_acl_to_xattr(&init_user_ns, default_acl,
tmp_buf, val_size2);
if (err < 0)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 498dcfa2dcdb..da55eb8bcffa 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1756,7 +1756,7 @@ retry:
*/
static const struct inode_operations ceph_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ceph_setattr,
.getattr = ceph_getattr,
.setxattr = ceph_setxattr,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cbc0f4bca0c0..90e4e2b398b6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -900,8 +900,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = cifs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = cifs_get_link,
.permission = cifs_permission,
/* BB add the following two eventually */
/* revalidate: cifs_revalidate,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c3cc1609025f..26a1187d4323 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -120,9 +120,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#endif
/* Functions related to symlinks */
-extern const char *cifs_follow_link(struct dentry *direntry, void **cookie);
-extern int cifs_readlink(struct dentry *direntry, char __user *buffer,
- int buflen);
+extern const char *cifs_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
const char *symname);
extern int cifs_removexattr(struct dentry *, const char *);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e3548f73bdea..062c2375549a 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -627,9 +627,9 @@ cifs_hl_exit:
}
const char *
-cifs_follow_link(struct dentry *direntry, void **cookie)
+cifs_get_link(struct dentry *direntry, struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(direntry);
int rc = -ENOMEM;
unsigned int xid;
char *full_path = NULL;
@@ -639,6 +639,9 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
+ if (!direntry)
+ return ERR_PTR(-ECHILD);
+
xid = get_xid();
tlink = cifs_sb_tlink(cifs_sb);
@@ -678,7 +681,8 @@ cifs_follow_link(struct dentry *direntry, void **cookie)
kfree(target_path);
return ERR_PTR(rc);
}
- return *cookie = target_path;
+ set_delayed_call(done, kfree_link, target_path);
+ return target_path;
}
int
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index ff9e1f8b16a4..f5dc2f0df4ad 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -190,8 +190,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#endif /* CONFIG_CIFS_ACL */
} else {
int temp;
- temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS));
+ temp = strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS));
if (temp == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
@@ -203,8 +203,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "set POSIX ACL not supported\n");
#endif
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBSetPosixACL(xid, pTcon, full_path,
@@ -292,8 +292,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
full_path, ea_name, ea_value, buf_size,
cifs_sb->local_nls, cifs_remap(cifs_sb));
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_ACCESS,
+ strlen(XATTR_NAME_POSIX_ACL_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
@@ -303,8 +303,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
#else
cifs_dbg(FYI, "Query POSIX ACL not supported yet\n");
#endif /* CONFIG_CIFS_POSIX */
- } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
+ } else if (strncmp(ea_name, XATTR_NAME_POSIX_ACL_DEFAULT,
+ strlen(XATTR_NAME_POSIX_ACL_DEFAULT)) == 0) {
#ifdef CONFIG_CIFS_POSIX
if (sb->s_flags & MS_POSIXACL)
rc = CIFSSMBGetPosixACL(xid, pTcon, full_path,
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 7740b1c871c1..1bfb7ba4e85e 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -8,6 +8,7 @@
#include <linux/coda.h>
#include <linux/coda_psdev.h>
+#include <linux/pagemap.h>
#include "coda_linux.h"
static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
@@ -17,8 +18,7 @@ static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
static const struct inode_operations coda_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = coda_setattr,
};
@@ -35,6 +35,7 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr)
inode->i_fop = &coda_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &coda_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &coda_symlink_aops;
inode->i_mapping = &inode->i_data;
} else
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c
index ab94ef63caef..03736e20d720 100644
--- a/fs/coda/symlink.c
+++ b/fs/coda/symlink.c
@@ -26,7 +26,7 @@ static int coda_symlink_filler(struct file *file, struct page *page)
int error;
struct coda_inode_info *cii;
unsigned int len = PAGE_SIZE;
- char *p = kmap(page);
+ char *p = page_address(page);
cii = ITOC(inode);
@@ -34,13 +34,11 @@ static int coda_symlink_filler(struct file *file, struct page *page)
if (error)
goto fail;
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return error;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dcf26537c935..9144b779d10e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,6 +58,8 @@
#include <linux/atalk.h>
#include <linux/gfp.h>
+#include "internal.h"
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_sock.h>
#include <net/bluetooth/rfcomm.h>
@@ -115,19 +117,38 @@
#include <asm/fbio.h>
#endif
-static int w_long(unsigned int fd, unsigned int cmd,
- compat_ulong_t __user *argp)
+#define convert_in_user(srcptr, dstptr) \
+({ \
+ typeof(*srcptr) val; \
+ \
+ get_user(val, srcptr) || put_user(val, dstptr); \
+})
+
+static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int err;
+
+ err = security_file_ioctl(file, cmd, arg);
+ if (err)
+ return err;
+
+ return vfs_ioctl(file, cmd, arg);
+}
+
+static int w_long(struct file *file,
+ unsigned int cmd, compat_ulong_t __user *argp)
{
- mm_segment_t old_fs = get_fs();
int err;
- unsigned long val;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
- set_fs (KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long)&val);
- set_fs (old_fs);
- if (!err && put_user(val, argp))
+ if (valp == NULL)
return -EFAULT;
- return err;
+ err = do_ioctl(file, cmd, (unsigned long)valp);
+ if (err)
+ return err;
+ if (convert_in_user(valp, argp))
+ return -EFAULT;
+ return 0;
}
struct compat_video_event {
@@ -139,23 +160,23 @@ struct compat_video_event {
} u;
};
-static int do_video_get_event(unsigned int fd, unsigned int cmd,
- struct compat_video_event __user *up)
+static int do_video_get_event(struct file *file,
+ unsigned int cmd, struct compat_video_event __user *up)
{
- struct video_event kevent;
- mm_segment_t old_fs = get_fs();
+ struct video_event __user *kevent =
+ compat_alloc_user_space(sizeof(*kevent));
int err;
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd, cmd, (unsigned long) &kevent);
- set_fs(old_fs);
+ if (kevent == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, cmd, (unsigned long)kevent);
if (!err) {
- err = put_user(kevent.type, &up->type);
- err |= put_user(kevent.timestamp, &up->timestamp);
- err |= put_user(kevent.u.size.w, &up->u.size.w);
- err |= put_user(kevent.u.size.h, &up->u.size.h);
- err |= put_user(kevent.u.size.aspect_ratio,
+ err = convert_in_user(&kevent->type, &up->type);
+ err |= convert_in_user(&kevent->timestamp, &up->timestamp);
+ err |= convert_in_user(&kevent->u.size.w, &up->u.size.w);
+ err |= convert_in_user(&kevent->u.size.h, &up->u.size.h);
+ err |= convert_in_user(&kevent->u.size.aspect_ratio,
&up->u.size.aspect_ratio);
if (err)
err = -EFAULT;
@@ -169,8 +190,8 @@ struct compat_video_still_picture {
int32_t size;
};
-static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
- struct compat_video_still_picture __user *up)
+static int do_video_stillpicture(struct file *file,
+ unsigned int cmd, struct compat_video_still_picture __user *up)
{
struct video_still_picture __user *up_native;
compat_uptr_t fp;
@@ -190,7 +211,7 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -200,8 +221,8 @@ struct compat_video_spu_palette {
compat_uptr_t palette;
};
-static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
- struct compat_video_spu_palette __user *up)
+static int do_video_set_spu_palette(struct file *file,
+ unsigned int cmd, struct compat_video_spu_palette __user *up)
{
struct video_spu_palette __user *up_native;
compat_uptr_t palp;
@@ -218,7 +239,7 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
if (err)
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) up_native);
+ err = do_ioctl(file, cmd, (unsigned long) up_native);
return err;
}
@@ -276,7 +297,7 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
return 0;
}
-static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
+static int sg_ioctl_trans(struct file *file, unsigned int cmd,
sg_io_hdr32_t __user *sgio32)
{
sg_io_hdr_t __user *sgio;
@@ -289,7 +310,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (get_user(interface_id, &sgio32->interface_id))
return -EFAULT;
if (interface_id != 'S')
- return sys_ioctl(fd, cmd, (unsigned long)sgio32);
+ return do_ioctl(file, cmd, (unsigned long)sgio32);
if (get_user(iovec_count, &sgio32->iovec_count))
return -EFAULT;
@@ -349,7 +370,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
if (put_user(compat_ptr(data), &sgio->usr_ptr))
return -EFAULT;
- err = sys_ioctl(fd, cmd, (unsigned long) sgio);
+ err = do_ioctl(file, cmd, (unsigned long) sgio);
if (err >= 0) {
void __user *datap;
@@ -380,13 +401,13 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
int unused;
};
-static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
- compat_sg_req_info __user *o)
+static int sg_grt_trans(struct file *file,
+ unsigned int cmd, struct compat_sg_req_info __user *o)
{
int err, i;
sg_req_info_t __user *r;
r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
- err = sys_ioctl(fd,cmd,(unsigned long)r);
+ err = do_ioctl(file, cmd, (unsigned long)r);
if (err < 0)
return err;
for (i = 0; i < SG_MAX_QUEUE; i++) {
@@ -412,8 +433,8 @@ struct sock_fprog32 {
#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
-static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
- struct sock_fprog32 __user *u_fprog32)
+static int ppp_sock_fprog_ioctl_trans(struct file *file,
+ unsigned int cmd, struct sock_fprog32 __user *u_fprog32)
{
struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
void __user *fptr64;
@@ -435,7 +456,7 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
else
cmd = PPPIOCSACTIVE;
- return sys_ioctl(fd, cmd, (unsigned long) u_fprog64);
+ return do_ioctl(file, cmd, (unsigned long) u_fprog64);
}
struct ppp_option_data32 {
@@ -451,7 +472,7 @@ struct ppp_idle32 {
};
#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
-static int ppp_gidle(unsigned int fd, unsigned int cmd,
+static int ppp_gidle(struct file *file, unsigned int cmd,
struct ppp_idle32 __user *idle32)
{
struct ppp_idle __user *idle;
@@ -460,7 +481,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
idle = compat_alloc_user_space(sizeof(*idle));
- err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
+ err = do_ioctl(file, PPPIOCGIDLE, (unsigned long) idle);
if (!err) {
if (get_user(xmit, &idle->xmit_idle) ||
@@ -472,7 +493,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd,
return err;
}
-static int ppp_scompress(unsigned int fd, unsigned int cmd,
+static int ppp_scompress(struct file *file, unsigned int cmd,
struct ppp_option_data32 __user *odata32)
{
struct ppp_option_data __user *odata;
@@ -492,7 +513,7 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd,
sizeof(__u32) + sizeof(int)))
return -EFAULT;
- return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
+ return do_ioctl(file, PPPIOCSCOMPRESS, (unsigned long) odata);
}
#ifdef CONFIG_BLOCK
@@ -512,12 +533,13 @@ struct mtpos32 {
};
#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
-static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
+static int mt_ioctl_trans(struct file *file,
+ unsigned int cmd, void __user *argp)
{
- mm_segment_t old_fs = get_fs();
- struct mtget get;
+ /* NULL initialization to make gcc shut up */
+ struct mtget __user *get = NULL;
struct mtget32 __user *umget32;
- struct mtpos pos;
+ struct mtpos __user *pos = NULL;
struct mtpos32 __user *upos32;
unsigned long kcmd;
void *karg;
@@ -526,32 +548,34 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
switch(cmd) {
case MTIOCPOS32:
kcmd = MTIOCPOS;
- karg = &pos;
+ pos = compat_alloc_user_space(sizeof(*pos));
+ karg = pos;
break;
default: /* MTIOCGET32 */
kcmd = MTIOCGET;
- karg = &get;
+ get = compat_alloc_user_space(sizeof(*get));
+ karg = get;
break;
}
- set_fs (KERNEL_DS);
- err = sys_ioctl (fd, kcmd, (unsigned long)karg);
- set_fs (old_fs);
+ if (karg == NULL)
+ return -EFAULT;
+ err = do_ioctl(file, kcmd, (unsigned long)karg);
if (err)
return err;
switch (cmd) {
case MTIOCPOS32:
upos32 = argp;
- err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
+ err = convert_in_user(&pos->mt_blkno, &upos32->mt_blkno);
break;
case MTIOCGET32:
umget32 = argp;
- err = __put_user(get.mt_type, &umget32->mt_type);
- err |= __put_user(get.mt_resid, &umget32->mt_resid);
- err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
- err |= __put_user(get.mt_gstat, &umget32->mt_gstat);
- err |= __put_user(get.mt_erreg, &umget32->mt_erreg);
- err |= __put_user(get.mt_fileno, &umget32->mt_fileno);
- err |= __put_user(get.mt_blkno, &umget32->mt_blkno);
+ err = convert_in_user(&get->mt_type, &umget32->mt_type);
+ err |= convert_in_user(&get->mt_resid, &umget32->mt_resid);
+ err |= convert_in_user(&get->mt_dsreg, &umget32->mt_dsreg);
+ err |= convert_in_user(&get->mt_gstat, &umget32->mt_gstat);
+ err |= convert_in_user(&get->mt_erreg, &umget32->mt_erreg);
+ err |= convert_in_user(&get->mt_fileno, &umget32->mt_fileno);
+ err |= convert_in_user(&get->mt_blkno, &umget32->mt_blkno);
break;
}
return err ? -EFAULT: 0;
@@ -605,42 +629,41 @@ struct serial_struct32 {
compat_int_t reserved[1];
};
-static int serial_struct_ioctl(unsigned fd, unsigned cmd,
- struct serial_struct32 __user *ss32)
+static int serial_struct_ioctl(struct file *file,
+ unsigned cmd, struct serial_struct32 __user *ss32)
{
typedef struct serial_struct32 SS32;
int err;
- struct serial_struct ss;
- mm_segment_t oldseg = get_fs();
+ struct serial_struct __user *ss = compat_alloc_user_space(sizeof(*ss));
__u32 udata;
unsigned int base;
+ unsigned char *iomem_base;
+ if (ss == NULL)
+ return -EFAULT;
if (cmd == TIOCSSERIAL) {
- if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
+ if (copy_in_user(ss, ss32, offsetof(SS32, iomem_base)) ||
+ get_user(udata, &ss32->iomem_base))
return -EFAULT;
- if (__get_user(udata, &ss32->iomem_base))
+ iomem_base = compat_ptr(udata);
+ if (put_user(iomem_base, &ss->iomem_base) ||
+ convert_in_user(&ss32->iomem_reg_shift,
+ &ss->iomem_reg_shift) ||
+ convert_in_user(&ss32->port_high, &ss->port_high) ||
+ put_user(0UL, &ss->iomap_base))
return -EFAULT;
- ss.iomem_base = compat_ptr(udata);
- if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __get_user(ss.port_high, &ss32->port_high))
- return -EFAULT;
- ss.iomap_base = 0UL;
}
- set_fs(KERNEL_DS);
- err = sys_ioctl(fd,cmd,(unsigned long)(&ss));
- set_fs(oldseg);
+ err = do_ioctl(file, cmd, (unsigned long)ss);
if (cmd == TIOCGSERIAL && err >= 0) {
- if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32)))
- return -EFAULT;
- if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
+ if (copy_in_user(ss32, ss, offsetof(SS32, iomem_base)) ||
+ get_user(iomem_base, &ss->iomem_base))
return -EFAULT;
- base = (unsigned long)ss.iomem_base >> 32 ?
- 0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
- if (__put_user(base, &ss32->iomem_base) ||
- __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
- __put_user(ss.port_high, &ss32->port_high))
+ base = (unsigned long)iomem_base >> 32 ?
+ 0xffffffff : (unsigned)(unsigned long)iomem_base;
+ if (put_user(base, &ss32->iomem_base) ||
+ convert_in_user(&ss->iomem_reg_shift,
+ &ss32->iomem_reg_shift) ||
+ convert_in_user(&ss->port_high, &ss32->port_high))
return -EFAULT;
}
return err;
@@ -674,8 +697,8 @@ struct i2c_rdwr_aligned {
struct i2c_msg msgs[0];
};
-static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_rdwr_ioctl_data32 __user *udata)
+static int do_i2c_rdwr_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata)
{
struct i2c_rdwr_aligned __user *tdata;
struct i2c_msg __user *tmsgs;
@@ -708,11 +731,11 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
put_user(compat_ptr(datap), &tmsgs[i].buf))
return -EFAULT;
}
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
-static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
- struct i2c_smbus_ioctl_data32 __user *udata)
+static int do_i2c_smbus_ioctl(struct file *file,
+ unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata)
{
struct i2c_smbus_ioctl_data __user *tdata;
compat_caddr_t datap;
@@ -734,7 +757,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
__put_user(compat_ptr(datap), &tdata->data))
return -EFAULT;
- return sys_ioctl(fd, cmd, (unsigned long)tdata);
+ return do_ioctl(file, cmd, (unsigned long)tdata);
}
#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
@@ -742,29 +765,27 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
-static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
+static int rtc_ioctl(struct file *file,
+ unsigned cmd, void __user *argp)
{
- mm_segment_t oldfs = get_fs();
- compat_ulong_t val32;
- unsigned long kval;
+ unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp));
int ret;
+ if (valp == NULL)
+ return -EFAULT;
switch (cmd) {
case RTC_IRQP_READ32:
case RTC_EPOCH_READ32:
- set_fs(KERNEL_DS);
- ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ?
+ ret = do_ioctl(file, (cmd == RTC_IRQP_READ32) ?
RTC_IRQP_READ : RTC_EPOCH_READ,
- (unsigned long)&kval);
- set_fs(oldfs);
+ (unsigned long)valp);
if (ret)
return ret;
- val32 = kval;
- return put_user(val32, (unsigned int __user *)argp);
+ return convert_in_user(valp, (unsigned int __user *)argp);
case RTC_IRQP_SET32:
- return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_IRQP_SET, (unsigned long)argp);
case RTC_EPOCH_SET32:
- return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
+ return do_ioctl(file, RTC_EPOCH_SET, (unsigned long)argp);
}
return -ENOIOCTLCMD;
@@ -1436,53 +1457,53 @@ IGNORE_IOCTL(FBIOGCURSOR32)
* a compat_ioctl operation in the place that handleѕ the
* ioctl for the native case.
*/
-static long do_ioctl_trans(int fd, unsigned int cmd,
+static long do_ioctl_trans(unsigned int cmd,
unsigned long arg, struct file *file)
{
void __user *argp = compat_ptr(arg);
switch (cmd) {
case PPPIOCGIDLE32:
- return ppp_gidle(fd, cmd, argp);
+ return ppp_gidle(file, cmd, argp);
case PPPIOCSCOMPRESS32:
- return ppp_scompress(fd, cmd, argp);
+ return ppp_scompress(file, cmd, argp);
case PPPIOCSPASS32:
case PPPIOCSACTIVE32:
- return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
+ return ppp_sock_fprog_ioctl_trans(file, cmd, argp);
#ifdef CONFIG_BLOCK
case SG_IO:
- return sg_ioctl_trans(fd, cmd, argp);
+ return sg_ioctl_trans(file, cmd, argp);
case SG_GET_REQUEST_TABLE:
- return sg_grt_trans(fd, cmd, argp);
+ return sg_grt_trans(file, cmd, argp);
case MTIOCGET32:
case MTIOCPOS32:
- return mt_ioctl_trans(fd, cmd, argp);
+ return mt_ioctl_trans(file, cmd, argp);
#endif
/* Serial */
case TIOCGSERIAL:
case TIOCSSERIAL:
- return serial_struct_ioctl(fd, cmd, argp);
+ return serial_struct_ioctl(file, cmd, argp);
/* i2c */
case I2C_FUNCS:
- return w_long(fd, cmd, argp);
+ return w_long(file, cmd, argp);
case I2C_RDWR:
- return do_i2c_rdwr_ioctl(fd, cmd, argp);
+ return do_i2c_rdwr_ioctl(file, cmd, argp);
case I2C_SMBUS:
- return do_i2c_smbus_ioctl(fd, cmd, argp);
+ return do_i2c_smbus_ioctl(file, cmd, argp);
/* Not implemented in the native kernel */
case RTC_IRQP_READ32:
case RTC_IRQP_SET32:
case RTC_EPOCH_READ32:
case RTC_EPOCH_SET32:
- return rtc_ioctl(fd, cmd, argp);
+ return rtc_ioctl(file, cmd, argp);
/* dvb */
case VIDEO_GET_EVENT:
- return do_video_get_event(fd, cmd, argp);
+ return do_video_get_event(file, cmd, argp);
case VIDEO_STILLPICTURE:
- return do_video_stillpicture(fd, cmd, argp);
+ return do_video_stillpicture(file, cmd, argp);
case VIDEO_SET_SPU_PALETTE:
- return do_video_set_spu_palette(fd, cmd, argp);
+ return do_video_set_spu_palette(file, cmd, argp);
}
/*
@@ -1513,7 +1534,7 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case NBD_SET_BLKSIZE:
case NBD_SET_SIZE:
case NBD_SET_SIZE_BLOCKS:
- return do_vfs_ioctl(file, fd, cmd, arg);
+ return vfs_ioctl(file, cmd, arg);
}
return -ENOIOCTLCMD;
@@ -1602,7 +1623,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (compat_ioctl_check_table(XFORM(cmd)))
goto found_handler;
- error = do_ioctl_trans(fd, cmd, arg, f.file);
+ error = do_ioctl_trans(cmd, arg, f.file);
if (error == -ENOIOCTLCMD)
error = -ENOTTY;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index ec5c8325b503..db6d69289608 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -279,27 +279,33 @@ static int configfs_getlink(struct dentry *dentry, char * path)
}
-static const char *configfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *configfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ char *body;
int error;
- if (!page)
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = configfs_getlink(dentry, (char *)page);
+ error = configfs_getlink(dentry, body);
if (!error) {
- return *cookie = (void *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
- free_page(page);
+ kfree(body);
return ERR_PTR(error);
}
const struct inode_operations configfs_symlink_inode_operations = {
- .follow_link = configfs_follow_link,
+ .get_link = configfs_get_link,
.readlink = generic_readlink,
- .put_link = free_page_put_link,
.setattr = configfs_setattr,
};
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 355c522f3585..b862bc219cd7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -100,6 +100,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &cramfs_aops;
break;
default:
diff --git a/fs/dcache.c b/fs/dcache.c
index 5c33aeb0f68f..d27f0909d9f6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1734,7 +1734,7 @@ static unsigned d_flags_for_inode(struct inode *inode)
}
if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
- if (unlikely(inode->i_op->follow_link)) {
+ if (unlikely(inode->i_op->get_link)) {
add_flags = DCACHE_SYMLINK_TYPE;
goto type_determined;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e2e47ba5d313..a4dddc61594c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -674,16 +674,24 @@ out:
return rc ? ERR_PTR(rc) : buf;
}
-static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *ecryptfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
size_t len;
- char *buf = ecryptfs_readlink_lower(dentry, &len);
+ char *buf;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ buf = ecryptfs_readlink_lower(dentry, &len);
if (IS_ERR(buf))
return buf;
fsstack_copy_attr_atime(d_inode(dentry),
d_inode(ecryptfs_dentry_to_lower(dentry)));
buf[len] = '\0';
- return *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
+ return buf;
}
/**
@@ -1095,8 +1103,7 @@ out:
const struct inode_operations ecryptfs_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = ecryptfs_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ecryptfs_get_link,
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.getattr = ecryptfs_getattr_link,
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 079d20306ee1..cdf0872382af 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -151,6 +151,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &efs_symlink_aops;
break;
case S_IFCHR:
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index 75117d0dac2b..4870cc82deb0 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -13,7 +13,7 @@
static int efs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct buffer_head * bh;
struct inode * inode = page->mapping->host;
efs_block_t size = inode->i_size;
@@ -39,12 +39,10 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
}
link[size] = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 60f03b78914e..9eaf595aeaf8 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1224,6 +1224,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_link = (char *)oi->i_data;
} else {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
}
} else {
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 994e078da4bb..c20d77df2679 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -111,6 +111,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry,
if (l > sizeof(oi->i_data)) {
/* slow symlink */
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &exofs_aops;
memset(oi->i_data, 0, sizeof(oi->i_data));
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0aa9bf6e6e53..338eefda70c6 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1420,6 +1420,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
sizeof(ei->i_data) - 1);
} else {
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3267a80dbbe2..7a2be8f7f3c3 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -183,6 +183,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
if (l > sizeof (EXT2_I(inode)->i_data)) {
/* slow symlink */
inode->i_op = &ext2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (test_opt(inode->i_sb, NOBH))
inode->i_mapping->a_ops = &ext2_nobh_aops;
else
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index ae17179f3810..3495d8ae4b33 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -22,8 +22,7 @@
const struct inode_operations ext2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
@@ -35,7 +34,7 @@ const struct inode_operations ext2_symlink_inode_operations = {
const struct inode_operations ext2_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext2_setattr,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848afa8f..cd95d14f9cc2 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -292,16 +292,21 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
const struct xattr_handler *handler =
ext2_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest) {
error = -ERANGE;
goto cleanup;
}
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index dfb08750370d..ba97f243b050 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -7,29 +7,11 @@
#include <linux/security.h>
#include "xattr.h"
-static size_t
-ext2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
buffer, size);
}
@@ -39,8 +21,6 @@ ext2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
value, size, flags);
}
@@ -71,7 +51,6 @@ ext2_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler ext2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext2_xattr_security_list,
.get = ext2_xattr_security_get,
.set = ext2_xattr_security_set,
};
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 3150dd3a7859..2c94d1930626 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -8,23 +8,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_trusted_list(struct dentry *dentry)
{
- const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -32,8 +19,6 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
buffer, size);
}
@@ -43,8 +28,6 @@ ext2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
value, size, flags);
}
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 339a49bbb8ef..72a2a96d677f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -10,23 +10,10 @@
#include "ext2.h"
#include "xattr.h"
-static size_t
-ext2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext2_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -34,8 +21,6 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER,
@@ -47,8 +32,6 @@ ext2_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ea433a7f4bca..b3bd912df6bf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4283,6 +4283,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
+ inode_nohighmem(inode);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_op = &ext4_special_inode_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a969ab39f302..f27e0c2598c5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3132,6 +3132,7 @@ static int ext4_symlink(struct inode *dir,
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
if (!encryption_required)
inode->i_op = &ext4_symlink_inode_operations;
+ inode_nohighmem(inode);
ext4_set_aops(inode);
/*
* We cannot call page_symlink() with transaction started
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e8e7af62ac95..6f7ee30a89ce 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -23,17 +23,21 @@
#include "xattr.h"
#ifdef CONFIG_EXT4_FS_ENCRYPTION
-static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *ext4_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct ext4_str cstr, pstr;
- struct inode *inode = d_inode(dentry);
struct ext4_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
int res;
u32 plen, max_size = inode->i_sb->s_blocksize;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = ext4_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -45,7 +49,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
}
@@ -75,24 +79,20 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
if (res <= plen)
paddr[res] = '\0';
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
- if (cpage) {
- kunmap(cpage);
+ if (cpage)
page_cache_release(cpage);
- }
kfree(paddr);
return ERR_PTR(res);
}
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext4_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -103,8 +103,7 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -114,7 +113,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6b6b3e751f8c..e9b9afdd1d96 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -404,19 +404,24 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
const struct xattr_handler *handler =
ext4_xattr_handler(entry->e_name_index);
- if (handler) {
- size_t size = handler->list(handler, dentry, buffer,
- rest, entry->e_name,
- entry->e_name_len);
+ if (handler && (!handler->list || handler->list(dentry))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_len = strlen(prefix);
+ size_t size = prefix_len + entry->e_name_len + 1;
+
if (buffer) {
if (size > rest)
return -ERANGE;
- buffer += size;
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
rest -= size;
}
}
- return buffer_size - rest;
+ return buffer_size - rest; /* total size */
}
static int
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 36f4c1a84c21..3e81bdca071a 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -11,30 +11,11 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
-{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
- const size_t total_len = prefix_len + name_len + 1;
-
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int
ext4_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -44,8 +25,6 @@ ext4_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -79,7 +58,6 @@ ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
const struct xattr_handler ext4_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ext4_xattr_security_list,
.get = ext4_xattr_security_get,
.set = ext4_xattr_security_set,
};
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 488089053342..2a3c6f9b8cb8 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -12,23 +12,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_trusted_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return capable(CAP_SYS_ADMIN);
}
static int
@@ -36,8 +23,6 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -47,8 +32,6 @@ ext4_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d2dec3364062..d152f431e432 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -11,23 +11,10 @@
#include "ext4.h"
#include "xattr.h"
-static size_t
-ext4_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+ext4_xattr_user_list(struct dentry *dentry)
{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!test_opt(dentry->d_sb, XATTR_USER))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list+prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
+ return test_opt(dentry->d_sb, XATTR_USER);
}
static int
@@ -35,8 +22,6 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER,
@@ -48,8 +33,6 @@ ext4_xattr_user_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 97e20decacb4..5528801a5baf 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -202,6 +202,7 @@ make_now:
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2c32110f9fc0..e7587fce1b80 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -315,12 +315,15 @@ fail:
return err;
}
-static const char *f2fs_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- const char *link = page_follow_link_light(dentry, cookie);
+ const char *link = page_get_link(dentry, inode, done);
if (!IS_ERR(link) && !*link) {
/* this is broken symlink case */
- page_put_link(NULL, *cookie);
+ do_delayed_call(done);
+ clear_delayed_call(done);
link = ERR_PTR(-ENOENT);
}
return link;
@@ -351,6 +354,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &f2fs_encrypted_symlink_inode_operations;
else
inode->i_op = &f2fs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_lock_op(sbi);
@@ -923,18 +927,22 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
}
#ifdef CONFIG_F2FS_FS_ENCRYPTION
-static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie)
+static const char *f2fs_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct f2fs_str cstr;
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
- struct inode *inode = d_inode(dentry);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
res = f2fs_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -942,7 +950,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
cpage = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(cpage))
return ERR_CAST(cpage);
- caddr = kmap(cpage);
+ caddr = page_address(cpage);
caddr[size] = 0;
/* Symlink is encrypted */
@@ -982,21 +990,19 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
/* Null-terminate the name */
paddr[res] = '\0';
- kunmap(cpage);
page_cache_release(cpage);
- return *cookie = paddr;
+ set_delayed_call(done, kfree_link, paddr);
+ return paddr;
errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr);
- kunmap(cpage);
page_cache_release(cpage);
return ERR_PTR(res);
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_encrypted_follow_link,
- .put_link = kfree_put_link,
+ .get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.setxattr = generic_setxattr,
@@ -1031,8 +1037,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
const struct inode_operations f2fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = f2fs_follow_link,
- .put_link = page_put_link,
+ .get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 862368a32e53..036952a945fa 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -25,38 +25,6 @@
#include "f2fs.h"
#include "xattr.h"
-static size_t f2fs_xattr_generic_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- int total_len, prefix_len;
-
- switch (handler->flags) {
- case F2FS_XATTR_INDEX_USER:
- if (!test_opt(sbi, XATTR_USER))
- return -EOPNOTSUPP;
- break;
- case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
- case F2FS_XATTR_INDEX_SECURITY:
- break;
- default:
- return -EINVAL;
- }
-
- prefix_len = strlen(handler->prefix);
- total_len = prefix_len + len + 1;
- if (list && total_len <= list_size) {
- memcpy(list, handler->prefix, prefix_len);
- memcpy(list + prefix_len, name, len);
- list[prefix_len + len] = '\0';
- }
- return total_len;
-}
-
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name, void *buffer,
size_t size)
@@ -77,8 +45,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
return f2fs_getxattr(d_inode(dentry), handler->flags, name,
buffer, size, NULL);
}
@@ -103,24 +69,20 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
default:
return -EINVAL;
}
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return f2fs_setxattr(d_inode(dentry), handler->flags, name,
value, size, NULL, flags);
}
-static size_t f2fs_xattr_advise_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t len)
+static bool f2fs_xattr_user_list(struct dentry *dentry)
{
- const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
- size_t size;
+ struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return test_opt(sbi, XATTR_USER);
+}
+
+static bool f2fs_xattr_trusted_list(struct dentry *dentry)
+{
+ return capable(CAP_SYS_ADMIN);
}
static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
@@ -129,9 +91,6 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
-
if (buffer)
*((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
@@ -143,8 +102,6 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
{
struct inode *inode = d_inode(dentry);
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
@@ -183,7 +140,7 @@ int f2fs_init_security(struct inode *inode, struct inode *dir,
const struct xattr_handler f2fs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = F2FS_XATTR_INDEX_USER,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_user_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -191,15 +148,14 @@ const struct xattr_handler f2fs_xattr_user_handler = {
const struct xattr_handler f2fs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
.flags = F2FS_XATTR_INDEX_TRUSTED,
- .list = f2fs_xattr_generic_list,
+ .list = f2fs_xattr_trusted_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
const struct xattr_handler f2fs_xattr_advise_handler = {
- .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
+ .name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
- .list = f2fs_xattr_advise_list,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
@@ -207,7 +163,6 @@ const struct xattr_handler f2fs_xattr_advise_handler = {
const struct xattr_handler f2fs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = F2FS_XATTR_INDEX_SECURITY,
- .list = f2fs_xattr_generic_list,
.get = f2fs_xattr_generic_get,
.set = f2fs_xattr_generic_set,
};
@@ -455,20 +410,27 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
+ const char *prefix;
+ size_t prefix_len;
size_t size;
- if (!handler)
+ if (!handler || (handler->list && !handler->list(dentry)))
continue;
- size = handler->list(handler, dentry, buffer, rest,
- entry->e_name, entry->e_name_len);
- if (buffer && size > rest) {
- error = -ERANGE;
- goto cleanup;
+ prefix = handler->prefix ?: handler->name;
+ prefix_len = strlen(prefix);
+ size = prefix_len + entry->e_name_len + 1;
+ if (buffer) {
+ if (size > rest) {
+ error = -ERANGE;
+ goto cleanup;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, entry->e_name, entry->e_name_len);
+ buffer += entry->e_name_len;
+ *buffer++ = 0;
}
-
- if (buffer)
- buffer += size;
rest -= size;
}
error = buffer_size - rest;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 71a7100d5492..79dccc8252dd 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -27,7 +27,7 @@
#define F2FS_XATTR_REFCOUNT_MAX 1024
/* Name indexes */
-#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
+#define F2FS_SYSTEM_ADVISE_NAME "system.advise"
#define F2FS_XATTR_INDEX_USER 1
#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ef73ed674a27..3e2ccade61ed 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -326,6 +326,7 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
} else if (S_ISLNK(ip->i_mode)) {
if (!VXFS_ISIMMED(vip)) {
ip->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &vxfs_aops;
} else {
ip->i_op = &simple_symlink_inode_operations;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5e2e08712d3b..712601f299b8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1365,15 +1365,19 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
return err;
}
-static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
+static const char *fuse_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
char *link;
ssize_t ret;
- link = (char *) __get_free_page(GFP_KERNEL);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ link = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!link)
return ERR_PTR(-ENOMEM);
@@ -1385,11 +1389,11 @@ static const char *fuse_follow_link(struct dentry *dentry, void **cookie)
args.out.args[0].value = link;
ret = fuse_simple_request(fc, &args);
if (ret < 0) {
- free_page((unsigned long) link);
+ kfree(link);
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
- *cookie = link;
+ set_delayed_call(done, kfree_link, link);
}
fuse_invalidate_atime(inode);
return link;
@@ -1909,8 +1913,7 @@ static const struct inode_operations fuse_common_inode_operations = {
static const struct inode_operations fuse_symlink_inode_operations = {
.setattr = fuse_setattr,
- .follow_link = fuse_follow_link,
- .put_link = free_page_put_link,
+ .get_link = fuse_get_link,
.readlink = generic_readlink,
.getattr = fuse_getattr,
.setxattr = fuse_setxattr,
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 1be3b061c05c..791932617d1a 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -31,9 +31,9 @@ static const char *gfs2_acl_name(int type)
{
switch (type) {
case ACL_TYPE_ACCESS:
- return GFS2_POSIX_ACL_ACCESS;
+ return XATTR_POSIX_ACL_ACCESS;
case ACL_TYPE_DEFAULT:
- return GFS2_POSIX_ACL_DEFAULT;
+ return XATTR_POSIX_ACL_DEFAULT;
}
return NULL;
}
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 2d65ec4cd4be..3af4f407a483 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -12,8 +12,6 @@
#include "incore.h"
-#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
-#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)
extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 063fdfcf8275..1bae189f3245 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1712,24 +1712,30 @@ static int gfs2_rename2(struct inode *odir, struct dentry *odentry,
}
/**
- * gfs2_follow_link - Follow a symbolic link
+ * gfs2_get_link - Follow a symbolic link
* @dentry: The dentry of the link
- * @nd: Data that we pass to vfs_follow_link()
+ * @inode: The inode of the link
+ * @done: destructor for return value
*
* This can handle symlinks of any size.
*
* Returns: 0 on success or error code
*/
-static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
+static const char *gfs2_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder i_gh;
struct buffer_head *dibh;
unsigned int size;
char *buf;
int error;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
error = gfs2_glock_nq(&i_gh);
if (error) {
@@ -1759,7 +1765,7 @@ static const char *gfs2_follow_link(struct dentry *dentry, void **cookie)
out:
gfs2_glock_dq_uninit(&i_gh);
if (!IS_ERR(buf))
- *cookie = buf;
+ set_delayed_call(done, kfree_link, buf);
return buf;
}
@@ -2132,8 +2138,7 @@ const struct inode_operations gfs2_dir_iops = {
const struct inode_operations gfs2_symlink_iops = {
.readlink = generic_readlink,
- .follow_link = gfs2_follow_link,
- .put_link = kfree_put_link,
+ .get_link = gfs2_get_link,
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 53ce76a374fe..84f2d81fe451 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1237,56 +1237,6 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
size, flags, handler->flags);
}
-
-static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
- struct gfs2_ea_header *ea, char *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int amount = GFS2_EA_DATA_LEN(ea);
- unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
- int ret;
-
- ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
- if (ret)
- return ret;
-
- ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
- gfs2_trans_end(sdp);
-
- return ret;
-}
-
-int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct gfs2_ea_location el;
- int error;
-
- error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
- if (error)
- return error;
-
- if (GFS2_EA_IS_STUFFED(el.el_ea)) {
- error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
- if (error == 0) {
- gfs2_trans_add_meta(ip->i_gl, el.el_bh);
- memcpy(GFS2_EA2DATA(el.el_ea), data,
- GFS2_EA_DATA_LEN(el.el_ea));
- }
- } else {
- error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
- }
-
- brelse(el.el_bh);
- if (error)
- return error;
-
- error = gfs2_setattr_simple(inode, attr);
- gfs2_trans_end(sdp);
- return error;
-}
-
static int ea_dealloc_indirect(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index d392f8358f2f..2d887c88eb49 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -62,6 +62,5 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
-extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6dd107d7421e..19b33f8151f1 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -403,6 +403,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
} else if (S_ISLNK(inode->i_mode)) {
sbi->file_count++;
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
hip->clump_blocks = 1;
} else
@@ -526,6 +527,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
inode->i_mapping->a_ops = &hfsplus_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &hfsplus_aops;
} else {
init_special_inode(inode, inode->i_mode,
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index df0c9af68d05..afb33eda6d7d 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -21,10 +21,10 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -66,7 +66,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
switch (type) {
case ACL_TYPE_ACCESS:
- xattr_name = POSIX_ACL_XATTR_ACCESS;
+ xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
err = posix_acl_equiv_mode(acl, &inode->i_mode);
if (err < 0)
@@ -76,7 +76,7 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
break;
case ACL_TYPE_DEFAULT:
- xattr_name = POSIX_ACL_XATTR_DEFAULT;
+ xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e41a010cd89c..ab01530b4930 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -431,9 +431,6 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
char *xattr_name;
int res;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -589,9 +586,6 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
int res;
char *xattr_name;
- if (!strcmp(name, ""))
- return -EINVAL;
-
xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
GFP_KERNEL);
if (!xattr_name)
@@ -853,9 +847,6 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow retrieving properly prefixed attributes
* by prepending them with "osx."
@@ -876,9 +867,6 @@ static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
/*
* Don't allow setting properly prefixed attributes
* by prepending them with "osx."
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2ac99db3750e..7db524cc85b6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -892,9 +892,14 @@ static const struct inode_operations hostfs_dir_iops = {
.setattr = hostfs_setattr,
};
-static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *hostfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- char *link = __getname();
+ char *link;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ link = kmalloc(PATH_MAX, GFP_KERNEL);
if (link) {
char *path = dentry_name(dentry);
int err = -ENOMEM;
@@ -905,25 +910,20 @@ static const char *hostfs_follow_link(struct dentry *dentry, void **cookie)
__putname(path);
}
if (err < 0) {
- __putname(link);
+ kfree(link);
return ERR_PTR(err);
}
} else {
return ERR_PTR(-ENOMEM);
}
- return *cookie = link;
-}
-
-static void hostfs_put_link(struct inode *unused, void *cookie)
-{
- __putname(cookie);
+ set_delayed_call(done, kfree_link, link);
+ return link;
}
static const struct inode_operations hostfs_link_iops = {
.readlink = generic_readlink,
- .follow_link = hostfs_follow_link,
- .put_link = hostfs_put_link,
+ .get_link = hostfs_get_link,
};
static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 933c73780813..1f3c6d76200b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -77,6 +77,7 @@ void hpfs_read_inode(struct inode *i)
kfree(ea);
i->i_mode = S_IFLNK | 0777;
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &hpfs_symlink_aops;
set_nlink(i, 1);
i->i_size = ea_size;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ae4d5a1fa4c9..506765afa1a3 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -332,6 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
result->i_blocks = 1;
set_nlink(result, 1);
result->i_size = strlen(symlink);
+ inode_nohighmem(result);
result->i_op = &page_symlink_inode_operations;
result->i_data.a_ops = &hpfs_symlink_aops;
@@ -500,7 +501,7 @@ out:
static int hpfs_symlink_readpage(struct file *file, struct page *page)
{
- char *link = kmap(page);
+ char *link = page_address(page);
struct inode *i = page->mapping->host;
struct fnode *fnode;
struct buffer_head *bh;
@@ -516,14 +517,12 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page)
goto fail;
hpfs_unlock(i->i_sb);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
fail:
hpfs_unlock(i->i_sb);
SetPageError(page);
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index de4bdfac0cec..d8f51ee8126b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -760,6 +760,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
lockdep_annotate_inode_mutex_key(inode);
diff --git a/fs/inode.c b/fs/inode.c
index 1be5f9003eb3..5bb85a064ce7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2028,3 +2028,9 @@ void inode_set_flags(struct inode *inode, unsigned int flags,
new_flags) != old_flags));
}
EXPORT_SYMBOL(inode_set_flags);
+
+void inode_nohighmem(struct inode *inode)
+{
+ mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
+}
+EXPORT_SYMBOL(inode_nohighmem);
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0b41..e38c08ca437d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -151,3 +151,10 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern struct dentry_operations ns_dentry_operations;
+
+/*
+ * fs/ioctl.c
+ */
+extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5d01d2638ca5..41c352e81193 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include "internal.h"
#include <asm/ioctls.h>
@@ -32,8 +33,7 @@
*
* Returns 0 on success, -errno on error.
*/
-static long vfs_ioctl(struct file *filp, unsigned int cmd,
- unsigned long arg)
+long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int error = -ENOTTY;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d67a16f2a45d..61abdc4920da 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1417,6 +1417,7 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_fop = &isofs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &isofs_symlink_aops;
} else
/* XXX - parse_rock_ridge_inode() had already set i_rdev. */
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 735d7522a3a9..5384ceb35b1c 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -687,7 +687,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct iso_inode_info *ei = ISOFS_I(inode);
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
- char *link = kmap(page);
+ char *link = page_address(page);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
struct buffer_head *bh;
char *rpnt = link;
@@ -774,7 +774,6 @@ repeat:
brelse(bh);
*rpnt = '\0';
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -791,7 +790,6 @@ fail:
brelse(bh);
error:
SetPageError(page);
- kunmap(page);
unlock_page(page);
return -EIO;
}
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index bf12fe5f83d7..7a28facd7175 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -52,9 +52,6 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size);
}
@@ -63,31 +60,12 @@ static int jffs2_security_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
-
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
name, buffer, size, flags);
}
-static size_t jffs2_security_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_SECURITY_PREFIX);
- strcpy(list + XATTR_SECURITY_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_security_xattr_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = jffs2_security_listxattr,
.set = jffs2_security_setxattr,
.get = jffs2_security_getxattr
};
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index 8ce2f240125b..2cabd649d4fb 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -14,7 +14,7 @@
const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4c2c03663533..da3e18503c65 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -967,7 +967,8 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
struct jffs2_xattr_ref *ref, **pref;
struct jffs2_xattr_datum *xd;
const struct xattr_handler *xhandle;
- ssize_t len, rc;
+ const char *prefix;
+ ssize_t prefix_len, len, rc;
int retry = 0;
rc = check_xattr_ref_inode(c, ic);
@@ -998,18 +999,23 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
}
xhandle = xprefix_to_handler(xd->xprefix);
- if (!xhandle)
+ if (!xhandle || (xhandle->list && !xhandle->list(dentry)))
continue;
+ prefix = xhandle->prefix ?: xhandle->name;
+ prefix_len = strlen(prefix);
+ rc = prefix_len + xd->name_len + 1;
+
if (buffer) {
- rc = xhandle->list(xhandle, dentry, buffer + len,
- size - len, xd->xname,
- xd->name_len);
- } else {
- rc = xhandle->list(xhandle, dentry, NULL, 0,
- xd->xname, xd->name_len);
+ if (rc > size - len) {
+ rc = -ERANGE;
+ goto out;
+ }
+ memcpy(buffer, prefix, prefix_len);
+ buffer += prefix_len;
+ memcpy(buffer, xd->xname, xd->name_len);
+ buffer += xd->name_len;
+ *buffer++ = 0;
}
- if (rc < 0)
- goto out;
len += rc;
}
rc = len;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index a562da0d6a26..b2555ef07a12 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -20,8 +20,6 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size);
}
@@ -30,28 +28,13 @@ static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
name, buffer, size, flags);
}
-static size_t jffs2_trusted_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool jffs2_trusted_listxattr(struct dentry *dentry)
{
- size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && retlen<=list_size) {
- strcpy(list, XATTR_TRUSTED_PREFIX);
- strcpy(list + XATTR_TRUSTED_PREFIX_LEN, name);
- }
-
- return retlen;
+ return capable(CAP_SYS_ADMIN);
}
const struct xattr_handler jffs2_trusted_xattr_handler = {
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index cbc0472e59a8..539bd630b5e4 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -20,8 +20,6 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size);
}
@@ -30,30 +28,12 @@ static int jffs2_user_setxattr(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags)
{
- if (!strcmp(name, ""))
- return -EINVAL;
return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
name, buffer, size, flags);
}
-static size_t jffs2_user_listxattr(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
-
- if (list && retlen <= list_size) {
- strcpy(list, XATTR_USER_PREFIX);
- strcpy(list + XATTR_USER_PREFIX_LEN, name);
- }
-
- return retlen;
-}
-
const struct xattr_handler jffs2_user_xattr_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = jffs2_user_listxattr,
.set = jffs2_user_setxattr,
.get = jffs2_user_getxattr
};
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 0c8ca830b113..49456853e9de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -40,10 +40,10 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
switch(type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
@@ -82,7 +82,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
- ea_name = POSIX_ACL_XATTR_ACCESS;
+ ea_name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
rc = posix_acl_equiv_mode(acl, &inode->i_mode);
if (rc < 0)
@@ -94,7 +94,7 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
}
break;
case ACL_TYPE_DEFAULT:
- ea_name = POSIX_ACL_XATTR_DEFAULT;
+ ea_name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
return -EINVAL;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 41aa3ca6a6a4..9d9bae63ae2a 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -60,6 +60,7 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
} else if (S_ISLNK(inode->i_mode)) {
if (inode->i_size >= IDATASIZE) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &jfs_aops;
} else {
inode->i_op = &jfs_fast_symlink_inode_operations;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 9d7551f5c32a..701f89370de7 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -983,6 +983,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
jfs_info("jfs_symlink: allocate extent ip:0x%p", ip);
ip->i_op = &jfs_symlink_inode_operations;
+ inode_nohighmem(ip);
ip->i_mapping->a_ops = &jfs_aops;
/*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index 5929e2363cb8..f8db4fde0b0b 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -23,7 +23,7 @@
const struct inode_operations jfs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
@@ -33,8 +33,7 @@ const struct inode_operations jfs_fast_symlink_inode_operations = {
const struct inode_operations jfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = jfs_setattr,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 756dd56aaf60..16405ae88d2d 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -205,7 +205,7 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name)
if (!attrs)
return -ENOMEM;
- return simple_xattr_remove(&attrs->xattrs, name);
+ return simple_xattr_set(&attrs->xattrs, name, NULL, 0, XATTR_REPLACE);
}
ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
@@ -230,7 +230,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(&attrs->xattrs, buf, size);
+ return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index db272528ab5b..117b8b3416f9 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -112,18 +112,25 @@ static int kernfs_getlink(struct dentry *dentry, char *path)
return error;
}
-static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie)
+static const char *kernfs_iop_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- int error = -ENOMEM;
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- if (!page)
+ char *body;
+ int error;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+ body = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!body)
return ERR_PTR(-ENOMEM);
- error = kernfs_getlink(dentry, (char *)page);
+ error = kernfs_getlink(dentry, body);
if (unlikely(error < 0)) {
- free_page((unsigned long)page);
+ kfree(body);
return ERR_PTR(error);
}
- return *cookie = (char *)page;
+ set_delayed_call(done, kfree_link, body);
+ return body;
}
const struct inode_operations kernfs_symlink_iops = {
@@ -132,8 +139,7 @@ const struct inode_operations kernfs_symlink_iops = {
.getxattr = kernfs_iop_getxattr,
.listxattr = kernfs_iop_listxattr,
.readlink = generic_readlink,
- .follow_link = kernfs_iop_follow_link,
- .put_link = free_page_put_link,
+ .get_link = kernfs_iop_get_link,
.setattr = kernfs_iop_setattr,
.getattr = kernfs_iop_getattr,
.permission = kernfs_iop_permission,
diff --git a/fs/libfs.c b/fs/libfs.c
index c7cbfb092e94..01491299f348 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1019,17 +1019,12 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-void kfree_put_link(struct inode *unused, void *cookie)
+/* Because kfree isn't assignment-compatible with void(void*) ;-/ */
+void kfree_link(void *p)
{
- kfree(cookie);
+ kfree(p);
}
-EXPORT_SYMBOL(kfree_put_link);
-
-void free_page_put_link(struct inode *unused, void *cookie)
-{
- free_page((unsigned long) cookie);
-}
-EXPORT_SYMBOL(free_page_put_link);
+EXPORT_SYMBOL(kfree_link);
/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
@@ -1092,14 +1087,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
}
EXPORT_SYMBOL(simple_nosetlease);
-const char *simple_follow_link(struct dentry *dentry, void **cookie)
+const char *simple_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
{
- return d_inode(dentry)->i_link;
+ return inode->i_link;
}
-EXPORT_SYMBOL(simple_follow_link);
+EXPORT_SYMBOL(simple_get_link);
const struct inode_operations simple_symlink_inode_operations = {
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.readlink = generic_readlink
};
EXPORT_SYMBOL(simple_symlink_inode_operations);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d4c4..542468e9bfb4 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -528,7 +528,8 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
return PTR_ERR(inode);
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
return __logfs_create(dir, dentry, inode, target, destlen);
@@ -776,12 +777,6 @@ fail:
return -EIO;
}
-const struct inode_operations logfs_symlink_iops = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
-
const struct inode_operations logfs_dir_iops = {
.create = logfs_create,
.link = logfs_link,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index af49e2d6941a..0fce46d62b9c 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -64,7 +64,8 @@ static void logfs_inode_setops(struct inode *inode)
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFLNK:
- inode->i_op = &logfs_symlink_iops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &logfs_reg_aops;
break;
case S_IFSOCK: /* fall through */
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609465..209a26d84c38 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -495,7 +495,6 @@ static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
#endif
/* dir.c */
-extern const struct inode_operations logfs_symlink_iops;
extern const struct inode_operations logfs_dir_iops;
extern const struct file_operations logfs_dir_fops;
int logfs_replay_journal(struct super_block *sb);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 086cd0a61e80..cb1789ca1ee6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -435,8 +435,7 @@ static const struct address_space_operations minix_aops = {
static const struct inode_operations minix_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = minix_getattr,
};
@@ -452,6 +451,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
inode->i_mapping->a_ops = &minix_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &minix_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &minix_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
diff --git a/fs/namei.c b/fs/namei.c
index 0c3974cd3ecd..3c909aebef70 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -505,13 +505,13 @@ struct nameidata {
int total_link_count;
struct saved {
struct path link;
- void *cookie;
+ struct delayed_call done;
const char *name;
- struct inode *inode;
unsigned seq;
} *stack, internal[EMBEDDED_LEVELS];
struct filename *name;
struct nameidata *saved;
+ struct inode *link_inode;
unsigned root_seq;
int dfd;
};
@@ -592,11 +592,8 @@ static void drop_links(struct nameidata *nd)
int i = nd->depth;
while (i--) {
struct saved *last = nd->stack + i;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link) {
- inode->i_op->put_link(inode, last->cookie);
- last->cookie = NULL;
- }
+ do_delayed_call(&last->done);
+ clear_delayed_call(&last->done);
}
}
@@ -842,7 +839,7 @@ static inline void path_to_nameidata(const struct path *path,
}
/*
- * Helper to directly jump to a known parsed path from ->follow_link,
+ * Helper to directly jump to a known parsed path from ->get_link,
* caller must have taken a reference to path beforehand.
*/
void nd_jump_link(struct path *path)
@@ -858,9 +855,7 @@ void nd_jump_link(struct path *path)
static inline void put_link(struct nameidata *nd)
{
struct saved *last = nd->stack + --nd->depth;
- struct inode *inode = last->inode;
- if (last->cookie && inode->i_op->put_link)
- inode->i_op->put_link(inode, last->cookie);
+ do_delayed_call(&last->done);
if (!(nd->flags & LOOKUP_RCU))
path_put(&last->link);
}
@@ -892,7 +887,7 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if owner and follower match. */
- inode = nd->stack[0].inode;
+ inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
@@ -983,7 +978,7 @@ const char *get_link(struct nameidata *nd)
{
struct saved *last = nd->stack + nd->depth - 1;
struct dentry *dentry = last->link.dentry;
- struct inode *inode = last->inode;
+ struct inode *inode = nd->link_inode;
int error;
const char *res;
@@ -1004,15 +999,21 @@ const char *get_link(struct nameidata *nd)
nd->last_type = LAST_BIND;
res = inode->i_link;
if (!res) {
+ const char * (*get)(struct dentry *, struct inode *,
+ struct delayed_call *);
+ get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
- if (unlikely(unlazy_walk(nd, NULL, 0)))
- return ERR_PTR(-ECHILD);
+ res = get(NULL, inode, &last->done);
+ if (res == ERR_PTR(-ECHILD)) {
+ if (unlikely(unlazy_walk(nd, NULL, 0)))
+ return ERR_PTR(-ECHILD);
+ res = get(dentry, inode, &last->done);
+ }
+ } else {
+ res = get(dentry, inode, &last->done);
}
- res = inode->i_op->follow_link(dentry, &last->cookie);
- if (IS_ERR_OR_NULL(res)) {
- last->cookie = NULL;
+ if (IS_ERR_OR_NULL(res))
return res;
- }
}
if (*res == '/') {
if (nd->flags & LOOKUP_RCU) {
@@ -1691,8 +1692,8 @@ static int pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
- last->cookie = NULL;
- last->inode = inode;
+ clear_delayed_call(&last->done);
+ nd->link_inode = inode;
last->seq = seq;
return 1;
}
@@ -4495,72 +4496,73 @@ EXPORT_SYMBOL(readlink_copy);
/*
* A helper for ->readlink(). This should be used *ONLY* for symlinks that
- * have ->follow_link() touching nd only in nd_set_link(). Using (or not
- * using) it for any given inode is up to filesystem.
+ * have ->get_link() not calling nd_jump_link(). Using (or not using) it
+ * for any given inode is up to filesystem.
*/
int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- void *cookie;
+ DEFINE_DELAYED_CALL(done);
struct inode *inode = d_inode(dentry);
const char *link = inode->i_link;
int res;
if (!link) {
- link = inode->i_op->follow_link(dentry, &cookie);
+ link = inode->i_op->get_link(dentry, inode, &done);
if (IS_ERR(link))
return PTR_ERR(link);
}
res = readlink_copy(buffer, buflen, link);
- if (inode->i_op->put_link)
- inode->i_op->put_link(inode, cookie);
+ do_delayed_call(&done);
return res;
}
EXPORT_SYMBOL(generic_readlink);
/* get the link contents into pagecache */
-static char *page_getlink(struct dentry * dentry, struct page **ppage)
+const char *page_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
{
char *kaddr;
struct page *page;
- struct address_space *mapping = dentry->d_inode->i_mapping;
- page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
- return (char*)page;
- *ppage = page;
- kaddr = kmap(page);
- nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
+ struct address_space *mapping = inode->i_mapping;
+
+ if (!dentry) {
+ page = find_get_page(mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ page = read_mapping_page(mapping, 0, NULL);
+ if (IS_ERR(page))
+ return (char*)page;
+ }
+ set_delayed_call(callback, page_put_link, page);
+ BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
+ kaddr = page_address(page);
+ nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
return kaddr;
}
-int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-{
- struct page *page = NULL;
- int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
- if (page) {
- kunmap(page);
- page_cache_release(page);
- }
- return res;
-}
-EXPORT_SYMBOL(page_readlink);
+EXPORT_SYMBOL(page_get_link);
-const char *page_follow_link_light(struct dentry *dentry, void **cookie)
+void page_put_link(void *arg)
{
- struct page *page = NULL;
- char *res = page_getlink(dentry, &page);
- if (!IS_ERR(res))
- *cookie = page;
- return res;
+ put_page(arg);
}
-EXPORT_SYMBOL(page_follow_link_light);
+EXPORT_SYMBOL(page_put_link);
-void page_put_link(struct inode *unused, void *cookie)
+int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
- struct page *page = cookie;
- kunmap(page);
- page_cache_release(page);
+ DEFINE_DELAYED_CALL(done);
+ int res = readlink_copy(buffer, buflen,
+ page_get_link(dentry, d_inode(dentry),
+ &done));
+ do_delayed_call(&done);
+ return res;
}
-EXPORT_SYMBOL(page_put_link);
+EXPORT_SYMBOL(page_readlink);
/*
* The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4571,7 +4573,6 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
struct page *page;
void *fsdata;
int err;
- char *kaddr;
unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
if (nofs)
flags |= AOP_FLAG_NOFS;
@@ -4582,9 +4583,7 @@ retry:
if (err)
goto fail;
- kaddr = kmap_atomic(page);
- memcpy(kaddr, symname, len-1);
- kunmap_atomic(kaddr);
+ memcpy(page_address(page), symname, len-1);
err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
page, fsdata);
@@ -4609,7 +4608,6 @@ EXPORT_SYMBOL(page_symlink);
const struct inode_operations page_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
};
EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 9605a2f63549..ce1eb3f9dfe8 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -244,8 +244,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
static const struct inode_operations ncp_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = ncp_notify_change,
};
#endif
@@ -283,6 +282,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &ncp_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &ncp_symlink_aops;
#endif
} else {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c7e8b87da5b2..bdb4dc7b4ecd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -408,9 +408,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_fop = NULL;
inode->i_flags |= S_AUTOMOUNT;
}
- } else if (S_ISLNK(inode->i_mode))
+ } else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nfs_symlink_inode_operations;
- else
+ inode_nohighmem(inode);
+ } else
init_special_inode(inode, inode->i_mode, fattr->rdev);
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
@@ -1086,6 +1087,27 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
|| NFS_STALE(inode);
}
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
+ int ret = 0;
+
+ if (IS_SWAPFILE(inode))
+ goto out;
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ ret = -ECHILD;
+ goto out;
+ }
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+ (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+ ret = -ECHILD;
+ spin_unlock(&inode->i_lock);
+out:
+ return ret;
+}
+
/**
* __nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1ebe2fc7cda2..17c0fa1eccfa 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -284,12 +284,12 @@ nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
int error;
error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
- POSIX_ACL_XATTR_ACCESS, data, size, &result);
+ XATTR_NAME_POSIX_ACL_ACCESS, data, size, &result);
if (error)
return error;
error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
- POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+ XATTR_NAME_POSIX_ACL_DEFAULT, data, size, &result);
if (error)
return error;
return result;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 89818036f035..c57d1332c1c8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6253,9 +6253,6 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
const void *buf, size_t buflen,
int flags)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
}
@@ -6263,32 +6260,15 @@ static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
void *buf, size_t buflen)
{
- if (strcmp(key, "") != 0)
- return -EINVAL;
-
return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
}
-static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
{
- size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
-
- if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
- return 0;
-
- if (list && len <= list_len)
- memcpy(list, XATTR_NAME_NFSV4_ACL, len);
- return len;
+ return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry)));
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-static inline int nfs4_server_supports_labels(struct nfs_server *server)
-{
- return server->caps & NFS_CAP_SECURITY_LABEL;
-}
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
@@ -6310,29 +6290,34 @@ static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler,
return -EOPNOTSUPP;
}
-static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
{
- size_t len = 0;
+ int len = 0;
- if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
- len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
- if (list && len <= list_len)
- security_inode_listsecurity(d_inode(dentry), list, len);
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(inode, list, list_len);
+ if (list_len && len > list_len)
+ return -ERANGE;
}
return len;
}
static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = nfs4_xattr_list_nfs4_label,
.get = nfs4_xattr_get_nfs4_label,
.set = nfs4_xattr_set_nfs4_label,
};
-#endif
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+
+#endif
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
@@ -8749,6 +8734,24 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t error, error2;
+
+ error = generic_listxattr(dentry, list, size);
+ if (error < 0)
+ return error;
+ if (list) {
+ list += error;
+ size -= error;
+ }
+
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ if (error2 < 0)
+ return error2;
+ return error + error2;
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -8765,7 +8768,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8775,7 +8778,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8834,7 +8837,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
- .prefix = XATTR_NAME_NFSV4_ACL,
+ .name = XATTR_NAME_NFSV4_ACL,
.list = nfs4_xattr_list_nfs4_acl,
.get = nfs4_xattr_get_nfs4_acl,
.set = nfs4_xattr_set_nfs4_acl,
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index b6de433da5db..4fe3eead3868 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -42,21 +42,35 @@ error:
return -EIO;
}
-static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
+static const char *nfs_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct page *page;
void *err;
- err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
- if (err)
- return err;
- page = read_cache_page(&inode->i_data, 0,
- (filler_t *)nfs_symlink_filler, inode);
- if (IS_ERR(page))
- return ERR_CAST(page);
- *cookie = page;
- return kmap(page);
+ if (!dentry) {
+ err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
+ if (err)
+ return err;
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
+ if (err)
+ return err;
+ page = read_cache_page(&inode->i_data, 0,
+ (filler_t *)nfs_symlink_filler, inode);
+ if (IS_ERR(page))
+ return ERR_CAST(page);
+ }
+ set_delayed_call(done, page_put_link, page);
+ return page_address(page);
}
/*
@@ -64,8 +78,7 @@ static const char *nfs_follow_link(struct dentry *dentry, void **cookie)
*/
const struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = nfs_follow_link,
- .put_link = page_put_link,
+ .get_link = nfs_get_link,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index ac2f64943ff4..10b22527a617 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -510,6 +510,7 @@ static int __nilfs_read_inode(struct super_block *sb,
inode->i_mapping->a_ops = &nilfs_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
} else {
inode->i_op = &nilfs_special_inode_operations;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c9a1a491aa91..7ccdb961eea9 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -161,6 +161,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
@@ -568,8 +569,7 @@ const struct inode_operations nilfs_special_inode_operations = {
const struct inode_operations nilfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.permission = nilfs_permission,
};
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 8f87e05ee25d..97a563bab9a8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -361,6 +361,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
break;
case S_IFLNK:
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
break;
default:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3123408da935..afb81eae2c18 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1958,6 +1958,7 @@ static int ocfs2_symlink(struct inode *dir,
inode->i_rdev = 0;
newsize = l - 1;
inode->i_op = &ocfs2_symlink_inode_operations;
+ inode_nohighmem(inode);
if (l > ocfs2_fast_symlink_chars(sb)) {
u32 offset = 0;
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 66edce7ecfd7..6c2a3e3c521c 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -88,8 +88,7 @@ const struct address_space_operations ocfs2_fast_symlink_aops = {
const struct inode_operations ocfs2_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = ocfs2_getattr,
.setattr = ocfs2_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e9164f09841b..f0e241ffd94f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -544,8 +544,7 @@ static inline const char *ocfs2_xattr_prefix(int name_index)
if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
handler = ocfs2_xattr_handler_map[name_index];
-
- return handler ? handler->prefix : NULL;
+ return handler ? xattr_prefix(handler) : NULL;
}
static u32 ocfs2_xattr_name_hash(struct inode *inode,
@@ -884,14 +883,39 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
return ret;
}
-static int ocfs2_xattr_list_entry(char *buffer, size_t size,
- size_t *result, const char *prefix,
+static int ocfs2_xattr_list_entry(struct super_block *sb,
+ char *buffer, size_t size,
+ size_t *result, int type,
const char *name, int name_len)
{
char *p = buffer + *result;
- int prefix_len = strlen(prefix);
- int total_len = prefix_len + name_len + 1;
+ const char *prefix;
+ int prefix_len;
+ int total_len;
+ switch(type) {
+ case OCFS2_XATTR_INDEX_USER:
+ if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
+ case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
+ if (!(sb->s_flags & MS_POSIXACL))
+ return 0;
+ break;
+
+ case OCFS2_XATTR_INDEX_TRUSTED:
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+ break;
+ }
+
+ prefix = ocfs2_xattr_prefix(type);
+ if (!prefix)
+ return 0;
+ prefix_len = strlen(prefix);
+ total_len = prefix_len + name_len + 1;
*result += total_len;
/* we are just looking for how big our buffer needs to be */
@@ -914,23 +938,20 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
{
size_t result = 0;
int i, type, ret;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
-
- if (prefix) {
- name = (const char *)header +
- le16_to_cpu(entry->xe_name_offset);
+ name = (const char *)header +
+ le16_to_cpu(entry->xe_name_offset);
- ret = ocfs2_xattr_list_entry(buffer, buffer_size,
- &result, prefix, name,
- entry->xe_name_len);
- if (ret)
- return ret;
- }
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ buffer, buffer_size,
+ &result, type, name,
+ entry->xe_name_len);
+ if (ret)
+ return ret;
}
return result;
@@ -4033,32 +4054,30 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
int ret = 0, type;
struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
int i, block_off, new_offset;
- const char *prefix, *name;
+ const char *name;
for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
- prefix = ocfs2_xattr_prefix(type);
- if (prefix) {
- ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
- bucket_xh(bucket),
- i,
- &block_off,
- &new_offset);
- if (ret)
- break;
+ ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
+ bucket_xh(bucket),
+ i,
+ &block_off,
+ &new_offset);
+ if (ret)
+ break;
- name = (const char *)bucket_block(bucket, block_off) +
- new_offset;
- ret = ocfs2_xattr_list_entry(xl->buffer,
- xl->buffer_size,
- &xl->result,
- prefix, name,
- entry->xe_name_len);
- if (ret)
- break;
- }
+ name = (const char *)bucket_block(bucket, block_off) +
+ new_offset;
+ ret = ocfs2_xattr_list_entry(inode->i_sb,
+ xl->buffer,
+ xl->buffer_size,
+ &xl->result,
+ type, name,
+ entry->xe_name_len);
+ if (ret)
+ break;
}
return ret;
@@ -7226,31 +7245,14 @@ int ocfs2_init_security_and_acl(struct inode *dir,
leave:
return ret;
}
+
/*
* 'security' attributes support
*/
-static size_t ocfs2_xattr_security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, buffer, size);
}
@@ -7259,9 +7261,6 @@ static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
name, value, size, flags);
}
@@ -7314,7 +7313,6 @@ int ocfs2_init_security_set(handle_t *handle,
const struct xattr_handler ocfs2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = ocfs2_xattr_security_list,
.get = ocfs2_xattr_security_get,
.set = ocfs2_xattr_security_set,
};
@@ -7322,31 +7320,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = {
/*
* 'trusted' attributes support
*/
-static size_t ocfs2_xattr_trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN))
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, buffer, size);
}
@@ -7355,16 +7332,12 @@ static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
name, value, size, flags);
}
const struct xattr_handler ocfs2_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
- .list = ocfs2_xattr_trusted_list,
.get = ocfs2_xattr_trusted_get,
.set = ocfs2_xattr_trusted_set,
};
@@ -7372,34 +7345,12 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = {
/*
* 'user' attributes support
*/
-static size_t ocfs2_xattr_user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- const size_t prefix_len = XATTR_USER_PREFIX_LEN;
- const size_t total_len = prefix_len + name_len + 1;
- struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
-
- if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
- return 0;
-
- if (list && total_len <= list_size) {
- memcpy(list, XATTR_USER_PREFIX, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
- }
- return total_len;
-}
-
static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
void *buffer, size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
@@ -7412,8 +7363,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
{
struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
- if (strcmp(name, "") == 0)
- return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
@@ -7423,7 +7372,6 @@ static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
const struct xattr_handler ocfs2_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
- .list = ocfs2_xattr_user_list,
.get = ocfs2_xattr_user_get,
.set = ocfs2_xattr_user_set,
};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 4060ffde8722..964a60fa7afc 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -131,57 +131,23 @@ out_dput:
return err;
}
-
-struct ovl_link_data {
- struct dentry *realdentry;
- void *cookie;
-};
-
-static const char *ovl_follow_link(struct dentry *dentry, void **cookie)
+static const char *ovl_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
struct dentry *realdentry;
struct inode *realinode;
- struct ovl_link_data *data = NULL;
- const char *ret;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
realdentry = ovl_dentry_real(dentry);
realinode = realdentry->d_inode;
- if (WARN_ON(!realinode->i_op->follow_link))
+ if (WARN_ON(!realinode->i_op->get_link))
return ERR_PTR(-EPERM);
- if (realinode->i_op->put_link) {
- data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
- if (!data)
- return ERR_PTR(-ENOMEM);
- data->realdentry = realdentry;
- }
-
- ret = realinode->i_op->follow_link(realdentry, cookie);
- if (IS_ERR_OR_NULL(ret)) {
- kfree(data);
- return ret;
- }
-
- if (data)
- data->cookie = *cookie;
-
- *cookie = data;
-
- return ret;
-}
-
-static void ovl_put_link(struct inode *unused, void *c)
-{
- struct inode *realinode;
- struct ovl_link_data *data = c;
-
- if (!data)
- return;
-
- realinode = data->realdentry->d_inode;
- realinode->i_op->put_link(realinode, data->cookie);
- kfree(data);
+ return realinode->i_op->get_link(realdentry, realinode, done);
}
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
@@ -378,8 +344,7 @@ static const struct inode_operations ovl_file_inode_operations = {
static const struct inode_operations ovl_symlink_inode_operations = {
.setattr = ovl_setattr,
- .follow_link = ovl_follow_link,
- .put_link = ovl_put_link,
+ .get_link = ovl_get_link,
.readlink = ovl_readlink,
.getattr = ovl_getattr,
.setxattr = ovl_setxattr,
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 4adde1e2cbec..711dd5170376 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -769,8 +769,6 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
struct posix_acl *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(d_backing_inode(dentry)))
return -EOPNOTSUPP;
if (d_is_symlink(dentry))
@@ -797,8 +795,6 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
struct posix_acl *acl = NULL;
int ret;
- if (strcmp(name, "") != 0)
- return -EINVAL;
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
@@ -827,25 +823,14 @@ out:
return ret;
}
-static size_t
-posix_acl_xattr_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool
+posix_acl_xattr_list(struct dentry *dentry)
{
- const char *xname = handler->prefix;
- size_t size;
-
- if (!IS_POSIXACL(d_backing_inode(dentry)))
- return 0;
-
- size = strlen(xname) + 1;
- if (list && size <= list_size)
- memcpy(list, xname, size);
- return size;
+ return IS_POSIXACL(d_backing_inode(dentry));
}
const struct xattr_handler posix_acl_access_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
+ .name = XATTR_NAME_POSIX_ACL_ACCESS,
.flags = ACL_TYPE_ACCESS,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
@@ -854,7 +839,7 @@ const struct xattr_handler posix_acl_access_xattr_handler = {
EXPORT_SYMBOL_GPL(posix_acl_access_xattr_handler);
const struct xattr_handler posix_acl_default_xattr_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .name = XATTR_NAME_POSIX_ACL_DEFAULT,
.flags = ACL_TYPE_DEFAULT,
.list = posix_acl_xattr_list,
.get = posix_acl_xattr_get,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4bd5d3118acd..55e01f88eac9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1564,12 +1564,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
return -ENOENT;
}
-static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_pid_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
struct path path;
int error = -EACCES;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
/* Are we allowed to snoop on the tasks file descriptors? */
if (!proc_fd_access_allowed(inode))
goto out;
@@ -1630,7 +1634,7 @@ out:
const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_pid_follow_link,
+ .get_link = proc_pid_get_link,
.setattr = proc_setattr,
};
@@ -1895,7 +1899,7 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
.d_delete = pid_delete_dentry,
};
-static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path)
{
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
@@ -1945,20 +1949,22 @@ struct map_files_info {
* path to the file in question.
*/
static const char *
-proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+proc_map_files_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- return proc_pid_follow_link(dentry, NULL);
+ return proc_pid_get_link(dentry, inode, done);
}
/*
- * Identical to proc_pid_link_inode_operations except for follow_link()
+ * Identical to proc_pid_link_inode_operations except for get_link()
*/
static const struct inode_operations proc_map_files_link_inode_operations = {
.readlink = proc_pid_readlink,
- .follow_link = proc_map_files_follow_link,
+ .get_link = proc_map_files_get_link,
.setattr = proc_setattr,
};
@@ -1975,7 +1981,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
return -ENOENT;
ei = PROC_I(inode);
- ei->op.proc_get_link = proc_map_files_get_link;
+ ei->op.proc_get_link = map_files_get_link;
inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fdebb0..d0e9b9b6223e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -393,24 +393,25 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
-static const char *proc_follow_link(struct dentry *dentry, void **cookie)
+static void proc_put_link(void *p)
{
- struct proc_dir_entry *pde = PDE(d_inode(dentry));
- if (unlikely(!use_pde(pde)))
- return ERR_PTR(-EINVAL);
- *cookie = pde;
- return pde->data;
+ unuse_pde(p);
}
-static void proc_put_link(struct inode *unused, void *p)
+static const char *proc_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- unuse_pde(p);
+ struct proc_dir_entry *pde = PDE(inode);
+ if (unlikely(!use_pde(pde)))
+ return ERR_PTR(-EINVAL);
+ set_delayed_call(done, proc_put_link, pde);
+ return pde->data;
}
const struct inode_operations proc_link_inode_operations = {
.readlink = generic_readlink,
- .follow_link = proc_follow_link,
- .put_link = proc_put_link,
+ .get_link = proc_get_link,
};
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index f6e8354b8cea..1dece8781f91 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -30,14 +30,18 @@ static const struct proc_ns_operations *ns_entries[] = {
&mntns_operations,
};
-static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_ns_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct inode *inode = d_inode(dentry);
const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
struct task_struct *task;
struct path ns_path;
void *error = ERR_PTR(-EACCES);
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
task = get_proc_task(inode);
if (!task)
return error;
@@ -74,7 +78,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
static const struct inode_operations proc_ns_link_inode_operations = {
.readlink = proc_ns_readlink,
- .follow_link = proc_ns_follow_link,
+ .get_link = proc_ns_get_link,
.setattr = proc_setattr,
};
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061fc0..67e8db442cf0 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -18,26 +18,28 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
- name = kmalloc(12, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d", tgid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
- .follow_link = proc_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_self_get_link,
};
static unsigned self_inum;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd0a1..9eacd59e0360 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -19,26 +19,29 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
return readlink_copy(buffer, buflen, tmp);
}
-static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie)
+static const char *proc_thread_self_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+ struct pid_namespace *ns = inode->i_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
if (!pid)
return ERR_PTR(-ENOENT);
- name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
- if (!name)
- return ERR_PTR(-ENOMEM);
+ name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF,
+ dentry ? GFP_KERNEL : GFP_ATOMIC);
+ if (unlikely(!name))
+ return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD);
sprintf(name, "%d/task/%d", tgid, pid);
- return *cookie = name;
+ set_delayed_call(done, kfree_link, name);
+ return name;
}
static const struct inode_operations proc_thread_self_inode_operations = {
.readlink = proc_thread_self_readlink,
- .follow_link = proc_thread_self_follow_link,
- .put_link = kfree_put_link,
+ .get_link = proc_thread_self_get_link,
};
static unsigned thread_self_inum;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index c4bcb778886e..f37b3deb01b4 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -316,6 +316,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
inode->i_fop = &qnx4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx4_aops;
qnx4_i(inode)->mmu_private = inode->i_size;
} else {
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 32d2e1a9774c..9728b5499e1d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -582,6 +582,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mapping->a_ops = &qnx6_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &qnx6_aops;
} else
init_special_inode(inode, inode->i_mode, 0);
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 889d558b4e05..38981b037524 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -79,6 +79,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
break;
case S_IFLNK:
inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
break;
}
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3d8e7e671d5b..ae9e5b308cf9 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1361,6 +1361,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
inode->i_fop = &reiserfs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
} else {
inode->i_blocks = 0;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 47f96988fdd4..2a12d46d7fb4 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1170,6 +1170,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
reiserfs_update_inode_transaction(parent_dir);
inode->i_op = &reiserfs_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &reiserfs_address_space_operations;
retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
@@ -1664,8 +1665,7 @@ const struct inode_operations reiserfs_dir_inode_operations = {
*/
const struct inode_operations reiserfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.setattr = reiserfs_setattr,
.setxattr = reiserfs_setxattr,
.getxattr = reiserfs_getxattr,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 66b26fdfff8d..e5ddb4e5ea94 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -756,7 +756,8 @@ find_xattr_handler_prefix(const struct xattr_handler **handlers,
return NULL;
for_each_xattr_handler(handlers, xah) {
- if (strncmp(xah->prefix, name, strlen(xah->prefix)) == 0)
+ const char *prefix = xattr_prefix(xah);
+ if (strncmp(prefix, name, strlen(prefix)) == 0)
break;
}
@@ -839,19 +840,16 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
- if (!handler) /* Unsupported xattr name */
+ if (!handler /* Unsupported xattr name */ ||
+ (handler->list && !handler->list(b->dentry)))
return 0;
+ size = namelen + 1;
if (b->buf) {
- size = handler->list(handler, b->dentry,
- b->buf + b->pos, b->size, name,
- namelen);
if (size > b->size)
return -ERANGE;
- } else {
- size = handler->list(handler, b->dentry,
- NULL, 0, name, namelen);
+ memcpy(b->buf + b->pos, name, namelen);
+ b->buf[b->pos + namelen] = 0;
}
-
b->pos += size;
}
return 0;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 4b34b9dc03dd..558a16beaacb 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -186,10 +186,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
@@ -244,7 +244,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
switch (type) {
case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
@@ -256,7 +256,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
}
break;
case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index ac659af431ae..ab0217d32039 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -34,21 +34,9 @@ security_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t security_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t namelen)
+static bool security_list(struct dentry *dentry)
{
- const size_t len = namelen + 1;
-
- if (IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_len) {
- memcpy(list, name, namelen);
- list[namelen] = '\0';
- }
-
- return len;
+ return !IS_PRIVATE(d_inode(dentry));
}
/* Initializes the security context for a new inode and returns the number
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a338adf1b8b4..64b67aa643a9 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -33,20 +33,9 @@ trusted_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t trusted_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool trusted_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
- return 0;
-
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return capable(CAP_SYS_ADMIN) && !IS_PRIVATE(d_inode(dentry));
}
const struct xattr_handler reiserfs_xattr_trusted_handler = {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 39c9667191c5..12e6306f562a 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -30,19 +30,9 @@ user_set(const struct xattr_handler *handler, struct dentry *dentry,
return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
}
-static size_t user_list(const struct xattr_handler *handler,
- struct dentry *dentry, char *list, size_t list_size,
- const char *name, size_t name_len)
+static bool user_list(struct dentry *dentry)
{
- const size_t len = name_len + 1;
-
- if (!reiserfs_xattrs_user(dentry->d_sb))
- return 0;
- if (list && len <= list_size) {
- memcpy(list, name, name_len);
- list[name_len] = '\0';
- }
- return len;
+ return reiserfs_xattrs_user(dentry->d_sb);
}
const struct xattr_handler reiserfs_xattr_user_handler = {
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268733cda397..bb894e78a821 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -360,6 +360,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
break;
case ROMFH_SYM:
i->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(i);
i->i_data.a_ops = &romfs_aops;
mode |= S_IRWXUGO;
break;
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index a1ce5ce60632..0927b1e80ab6 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -41,6 +41,7 @@
#include <linux/fs.h>
#include <linux/vfs.h>
#include <linux/xattr.h>
+#include <linux/pagemap.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -291,6 +292,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
inode->i_op = &squashfs_symlink_inode_ops;
+ inode_nohighmem(inode);
inode->i_data.a_ops = &squashfs_symlink_aops;
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 12806dffb345..dbcc2f54bad4 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -119,8 +119,7 @@ const struct address_space_operations squashfs_symlink_aops = {
const struct inode_operations squashfs_symlink_inode_ops = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getxattr = generic_getxattr,
.listxattr = squashfs_listxattr
};
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 6a4cc344085c..1e9de96288d8 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -58,7 +58,7 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
struct squashfs_xattr_entry entry;
struct squashfs_xattr_val val;
const struct xattr_handler *handler;
- int name_size, prefix_size = 0;
+ int name_size;
err = squashfs_read_metadata(sb, &entry, &start, &offset,
sizeof(entry));
@@ -67,15 +67,16 @@ ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
name_size = le16_to_cpu(entry.size);
handler = squashfs_xattr_handler(le16_to_cpu(entry.type));
- if (handler)
- prefix_size = handler->list(handler, d, buffer, rest,
- NULL, name_size);
- if (prefix_size) {
+ if (handler && (!handler->list || handler->list(d))) {
+ const char *prefix = handler->prefix ?: handler->name;
+ size_t prefix_size = strlen(prefix);
+
if (buffer) {
if (prefix_size + name_size + 1 > rest) {
err = -ERANGE;
goto failed;
}
+ memcpy(buffer, prefix, prefix_size);
buffer += prefix_size;
}
err = squashfs_read_metadata(sb, buffer, &start,
@@ -212,25 +213,10 @@ failed:
}
-static size_t squashfs_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
-{
- int len = strlen(handler->prefix);
-
- if (list && len <= list_size)
- memcpy(list, handler->prefix, len);
- return len;
-}
-
static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
struct dentry *d, const char *name,
void *buffer, size_t size)
{
- if (name[0] == '\0')
- return -EINVAL;
-
return squashfs_xattr_get(d_inode(d), handler->flags, name,
buffer, size);
}
@@ -241,22 +227,15 @@ static int squashfs_xattr_handler_get(const struct xattr_handler *handler,
static const struct xattr_handler squashfs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
.flags = SQUASHFS_XATTR_USER,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
/*
* Trusted namespace support
*/
-static size_t squashfs_trusted_xattr_handler_list(const struct xattr_handler *handler,
- struct dentry *d, char *list,
- size_t list_size, const char *name,
- size_t name_len)
+static bool squashfs_trusted_xattr_handler_list(struct dentry *d)
{
- if (!capable(CAP_SYS_ADMIN))
- return 0;
- return squashfs_xattr_handler_list(handler, d, list, list_size, name,
- name_len);
+ return capable(CAP_SYS_ADMIN);
}
static const struct xattr_handler squashfs_xattr_trusted_handler = {
@@ -272,7 +251,6 @@ static const struct xattr_handler squashfs_xattr_trusted_handler = {
static const struct xattr_handler squashfs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.flags = SQUASHFS_XATTR_SECURITY,
- .list = squashfs_xattr_handler_list,
.get = squashfs_xattr_handler_get
};
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 02fa1dcc5969..07ac18c355e7 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -146,8 +146,7 @@ static inline void write3byte(struct sysv_sb_info *sbi,
static const struct inode_operations sysv_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
+ .get_link = page_get_link,
.getattr = sysv_getattr,
};
@@ -163,6 +162,7 @@ void sysv_set_inode(struct inode *inode, dev_t rdev)
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &sysv_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &sysv_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0edc12856147..eff62801acbf 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1608,7 +1608,7 @@ const struct inode_operations ubifs_file_inode_operations = {
const struct inode_operations ubifs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
.setattr = ubifs_setattr,
.getattr = ubifs_getattr,
.setxattr = ubifs_setxattr,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8d0b3ade0ff0..055746350d16 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1540,7 +1540,8 @@ reread:
break;
case ICBTAG_FILE_TYPE_SYMLINK:
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
break;
case ICBTAG_FILE_TYPE_MAIN:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c97b5a8d1e24..42eafb91f7ff 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -921,7 +921,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
inode->i_data.a_ops = &udf_symlink_aops;
- inode->i_op = &udf_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
struct kernel_lb_addr eloc;
@@ -1344,8 +1345,3 @@ const struct inode_operations udf_dir_inode_operations = {
.rename = udf_rename,
.tmpfile = udf_tmpfile,
};
-const struct inode_operations udf_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
-};
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 862535b3ba58..8d619773056b 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -107,7 +107,7 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct buffer_head *bh = NULL;
unsigned char *symlink;
int err;
- unsigned char *p = kmap(page);
+ unsigned char *p = page_address(page);
struct udf_inode_info *iinfo;
uint32_t pos;
@@ -141,7 +141,6 @@ static int udf_symlink_filler(struct file *file, struct page *page)
up_read(&iinfo->i_data_sem);
SetPageUptodate(page);
- kunmap(page);
unlock_page(page);
return 0;
@@ -149,7 +148,6 @@ out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
out_unmap:
- kunmap(page);
unlock_page(page);
return err;
}
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 47bb3f5ca360..ce169b49429d 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -85,7 +85,6 @@ extern const struct inode_operations udf_dir_inode_operations;
extern const struct file_operations udf_dir_operations;
extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
-extern const struct inode_operations udf_symlink_inode_operations;
extern const struct address_space_operations udf_aops;
extern const struct address_space_operations udf_adinicb_aops;
extern const struct address_space_operations udf_symlink_aops;
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index 392db25c0b56..ec4a6b49fa13 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_UFS_FS) += ufs.o
ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
- namei.o super.o symlink.o util.o
+ namei.o super.o util.o
ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a064cf44b143..d897e169ab9c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -528,11 +528,12 @@ static void ufs_set_inode_ops(struct inode *inode)
inode->i_mapping->a_ops = &ufs_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (!inode->i_blocks) {
- inode->i_op = &ufs_fast_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
+ inode->i_op = &simple_symlink_inode_operations;
} else {
- inode->i_op = &ufs_symlink_inode_operations;
inode->i_mapping->a_ops = &ufs_aops;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
}
} else
init_special_inode(inode, inode->i_mode,
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 47966554317c..acf4a3b61b81 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -123,14 +123,15 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
/* slow symlink */
- inode->i_op = &ufs_symlink_inode_operations;
+ inode->i_op = &page_symlink_inode_operations;
+ inode_nohighmem(inode);
inode->i_mapping->a_ops = &ufs_aops;
err = page_symlink(inode, symname, l);
if (err)
goto out_fail;
} else {
/* fast symlink */
- inode->i_op = &ufs_fast_symlink_inode_operations;
+ inode->i_op = &simple_symlink_inode_operations;
inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink;
memcpy(inode->i_link, symname, l);
inode->i_size = l-1;
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
deleted file mode 100644
index 874480bb43e9..000000000000
--- a/fs/ufs/symlink.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * linux/fs/ufs/symlink.c
- *
- * Only fast symlinks left here - the rest is done by generic code. AV, 1999
- *
- * Copyright (C) 1998
- * Daniel Pirkl <daniel.pirkl@emai.cz>
- * Charles University, Faculty of Mathematics and Physics
- *
- * from
- *
- * linux/fs/ext2/symlink.c
- *
- * Copyright (C) 1992, 1993, 1994, 1995
- * Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- *
- * from
- *
- * linux/fs/minix/symlink.c
- *
- * Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext2 symlink handling code
- */
-
-#include "ufs_fs.h"
-#include "ufs.h"
-
-const struct inode_operations ufs_fast_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = simple_follow_link,
- .setattr = ufs_setattr,
-};
-
-const struct inode_operations ufs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
- .setattr = ufs_setattr,
-};
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 7da4aca868c0..c87f4c3fa9dd 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -136,10 +136,6 @@ extern __printf(3, 4)
void ufs_panic(struct super_block *, const char *, const char *, ...);
void ufs_mark_sb_dirty(struct super_block *sb);
-/* symlink.c */
-extern const struct inode_operations ufs_fast_symlink_inode_operations;
-extern const struct inode_operations ufs_symlink_inode_operations;
-
static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
{
return sb->s_fs_info;
diff --git a/fs/xattr.c b/fs/xattr.c
index 9b932b95d74e..d7f5037a17b5 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -208,25 +208,6 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
return error;
}
-/* Compare an extended attribute value with the given value */
-int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
- const char *value, size_t size, gfp_t flags)
-{
- char *xattr_value = NULL;
- int rc;
-
- rc = vfs_getxattr_alloc(dentry, xattr_name, &xattr_value, 0, flags);
- if (rc < 0)
- return rc;
-
- if ((rc != size) || (memcmp(xattr_value, value, rc) != 0))
- rc = -EINVAL;
- else
- rc = 0;
- kfree(xattr_value);
- return rc;
-}
-
ssize_t
vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
{
@@ -700,13 +681,20 @@ xattr_resolve_name(const struct xattr_handler **handlers, const char **name)
return NULL;
for_each_xattr_handler(handlers, handler) {
- const char *n = strcmp_prefix(*name, handler->prefix);
+ const char *n;
+
+ n = strcmp_prefix(*name, xattr_prefix(handler));
if (n) {
+ if (!handler->prefix ^ !*n) {
+ if (*n)
+ continue;
+ return ERR_PTR(-EINVAL);
+ }
*name = n;
- break;
+ return handler;
}
}
- return handler;
+ return ERR_PTR(-EOPNOTSUPP);
}
/*
@@ -718,8 +706,8 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->get(handler, dentry, name, buffer, size);
}
@@ -735,19 +723,25 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!buffer) {
for_each_xattr_handler(handlers, handler) {
- size += handler->list(handler, dentry, NULL, 0,
- NULL, 0);
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ size += strlen(handler->name) + 1;
}
} else {
char *buf = buffer;
+ size_t len;
for_each_xattr_handler(handlers, handler) {
- size = handler->list(handler, dentry, buf, buffer_size,
- NULL, 0);
- if (size > buffer_size)
+ if (!handler->name ||
+ (handler->list && !handler->list(dentry)))
+ continue;
+ len = strlen(handler->name);
+ if (len + 1 > buffer_size)
return -ERANGE;
- buf += size;
- buffer_size -= size;
+ memcpy(buf, handler->name, len + 1);
+ buf += len + 1;
+ buffer_size -= len + 1;
}
size = buf - buffer;
}
@@ -765,8 +759,8 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
if (size == 0)
value = ""; /* empty EA, do not remove */
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, value, size, flags);
}
@@ -780,8 +774,8 @@ generic_removexattr(struct dentry *dentry, const char *name)
const struct xattr_handler *handler;
handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
- if (!handler)
- return -EOPNOTSUPP;
+ if (IS_ERR(handler))
+ return PTR_ERR(handler);
return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE);
}
@@ -808,7 +802,7 @@ EXPORT_SYMBOL(generic_removexattr);
const char *xattr_full_name(const struct xattr_handler *handler,
const char *name)
{
- size_t prefix_len = strlen(handler->prefix);
+ size_t prefix_len = strlen(xattr_prefix(handler));
return name - prefix_len;
}
@@ -863,8 +857,22 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
return ret;
}
-static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
+/**
+ * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
+ * @xattrs: target simple_xattr list
+ * @name: name of the extended attribute
+ * @value: value of the xattr. If %NULL, will remove the attribute.
+ * @size: size of the new xattr
+ * @flags: %XATTR_{CREATE|REPLACE}
+ *
+ * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
+ * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
+ * otherwise, fails with -ENODATA.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+ const void *value, size_t size, int flags)
{
struct simple_xattr *xattr;
struct simple_xattr *new_xattr = NULL;
@@ -914,73 +922,64 @@ out:
}
-/**
- * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
- * @xattrs: target simple_xattr list
- * @name: name of the new extended attribute
- * @value: value of the new xattr. If %NULL, will remove the attribute
- * @size: size of the new xattr
- * @flags: %XATTR_{CREATE|REPLACE}
- *
- * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
- * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
- * otherwise, fails with -ENODATA.
- *
- * Returns 0 on success, -errno on failure.
- */
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags)
-{
- if (size == 0)
- value = ""; /* empty EA, do not remove */
- return __simple_xattr_set(xattrs, name, value, size, flags);
-}
-
-/*
- * xattr REMOVE operation for in-memory/pseudo filesystems
- */
-int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name)
+static bool xattr_is_trusted(const char *name)
{
- return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE);
+ return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
-static bool xattr_is_trusted(const char *name)
+static int xattr_list_one(char **buffer, ssize_t *remaining_size,
+ const char *name)
{
- return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+ size_t len = strlen(name) + 1;
+ if (*buffer) {
+ if (*remaining_size < len)
+ return -ERANGE;
+ memcpy(*buffer, name, len);
+ *buffer += len;
+ }
+ *remaining_size -= len;
+ return 0;
}
/*
* xattr LIST operation for in-memory/pseudo filesystems
*/
-ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
- size_t size)
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ char *buffer, size_t size)
{
bool trusted = capable(CAP_SYS_ADMIN);
struct simple_xattr *xattr;
- size_t used = 0;
+ ssize_t remaining_size = size;
+ int err;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ if (inode->i_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_ACCESS);
+ if (err)
+ return err;
+ }
+ if (inode->i_default_acl) {
+ err = xattr_list_one(&buffer, &remaining_size,
+ XATTR_NAME_POSIX_ACL_DEFAULT);
+ if (err)
+ return err;
+ }
+#endif
spin_lock(&xattrs->lock);
list_for_each_entry(xattr, &xattrs->head, list) {
- size_t len;
-
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
continue;
- len = strlen(xattr->name) + 1;
- used += len;
- if (buffer) {
- if (size < used) {
- used = -ERANGE;
- break;
- }
- memcpy(buffer, xattr->name, len);
- buffer += len;
- }
+ err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+ if (err)
+ return err;
}
spin_unlock(&xattrs->lock);
- return used;
+ return size - remaining_size;
}
/*
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6bb470fbb8e8..2d5df1f23bbc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -252,29 +252,6 @@ xfs_set_mode(struct inode *inode, umode_t mode)
return error;
}
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
- int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
-
- return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
- ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
- return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode))
- return 0;
- return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
int
xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 52f8255d6bdf..286fa89217f5 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -24,16 +24,12 @@ struct posix_acl;
#ifdef CONFIG_XFS_POSIX_ACL
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int posix_acl_access_exists(struct inode *inode);
-extern int posix_acl_default_exists(struct inode *inode);
#else
static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
{
return NULL;
}
# define xfs_set_acl NULL
-# define posix_acl_access_exists(inode) 0
-# define posix_acl_default_exists(inode) 0
#endif /* CONFIG_XFS_POSIX_ACL */
extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0cdf0..06eafafe636e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -414,13 +414,17 @@ xfs_vn_rename(
* uio is kmalloced for this reason...
*/
STATIC const char *
-xfs_vn_follow_link(
+xfs_vn_get_link(
struct dentry *dentry,
- void **cookie)
+ struct inode *inode,
+ struct delayed_call *done)
{
char *link;
int error = -ENOMEM;
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
if (!link)
goto out_err;
@@ -429,7 +433,8 @@ xfs_vn_follow_link(
if (unlikely(error))
goto out_kfree;
- return *cookie = link;
+ set_delayed_call(done, kfree_link, link);
+ return link;
out_kfree:
kfree(link);
@@ -1172,8 +1177,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = xfs_vn_follow_link,
- .put_link = kfree_put_link,
+ .get_link = xfs_vn_get_link,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca21c6..110f1d7d86b0 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -39,9 +39,6 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error, asize = size;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
xflags |= ATTR_KERNOVAL;
@@ -84,9 +81,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry,
struct xfs_inode *ip = XFS_I(d_inode(dentry));
int error;
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
xflags |= ATTR_CREATE;
@@ -135,47 +129,19 @@ const struct xattr_handler *xfs_xattr_handlers[] = {
NULL
};
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return sizeof("security");
- else if (flags & XFS_ATTR_ROOT)
- return sizeof("trusted");
- else
- return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return xfs_xattr_security_handler.prefix;
- else if (flags & XFS_ATTR_ROOT)
- return xfs_xattr_trusted_handler.prefix;
- else
- return xfs_xattr_user_handler.prefix;
-}
-
static int
-xfs_xattr_put_listent(
+__xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
+ char *prefix,
+ int prefix_len,
+ unsigned char *name,
+ int namelen)
{
- unsigned int prefix_len = xfs_xattr_prefix_len(flags);
char *offset;
int arraytop;
- ASSERT(context->count >= 0);
-
- /*
- * Only show root namespace entries if we are actually allowed to
- * see them.
- */
- if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
- return 0;
+ if (!context->alist)
+ goto compute_size;
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
@@ -183,17 +149,19 @@ xfs_xattr_put_listent(
return 1;
}
offset = (char *)context->alist + context->count;
- strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+ strncpy(offset, prefix, prefix_len);
offset += prefix_len;
strncpy(offset, (char *)name, namelen); /* real name */
offset += namelen;
*offset = '\0';
+
+compute_size:
context->count += prefix_len + namelen + 1;
return 0;
}
static int
-xfs_xattr_put_listent_sizes(
+xfs_xattr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
@@ -201,24 +169,55 @@ xfs_xattr_put_listent_sizes(
int valuelen,
unsigned char *value)
{
- context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
- return 0;
-}
+ char *prefix;
+ int prefix_len;
-static int
-list_one_attr(const char *name, const size_t len, void *data,
- size_t size, ssize_t *result)
-{
- char *p = data + *result;
+ ASSERT(context->count >= 0);
- *result += len;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
+ if (flags & XFS_ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+ if (namelen == SGI_ACL_FILE_SIZE &&
+ strncmp(name, SGI_ACL_FILE,
+ SGI_ACL_FILE_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_ACCESS,
+ strlen(XATTR_POSIX_ACL_ACCESS));
+ if (ret)
+ return ret;
+ } else if (namelen == SGI_ACL_DEFAULT_SIZE &&
+ strncmp(name, SGI_ACL_DEFAULT,
+ SGI_ACL_DEFAULT_SIZE) == 0) {
+ int ret = __xfs_xattr_put_listent(
+ context, XATTR_SYSTEM_PREFIX,
+ XATTR_SYSTEM_PREFIX_LEN,
+ XATTR_POSIX_ACL_DEFAULT,
+ strlen(XATTR_POSIX_ACL_DEFAULT));
+ if (ret)
+ return ret;
+ }
+#endif
- strcpy(p, name);
- return 0;
+ /*
+ * Only show root namespace entries if we are actually allowed to
+ * see them.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return 0;
+
+ prefix = XATTR_TRUSTED_PREFIX;
+ prefix_len = XATTR_TRUSTED_PREFIX_LEN;
+ } else if (flags & XFS_ATTR_SECURE) {
+ prefix = XATTR_SECURITY_PREFIX;
+ prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ } else {
+ prefix = XATTR_USER_PREFIX;
+ prefix_len = XATTR_USER_PREFIX_LEN;
+ }
+
+ return __xfs_xattr_put_listent(context, prefix, prefix_len, name,
+ namelen);
}
ssize_t
@@ -227,7 +226,6 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
struct inode *inode = d_inode(dentry);
- int error;
/*
* First read the regular on-disk attributes.
@@ -236,37 +234,14 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
context.dp = XFS_I(inode);
context.cursor = &cursor;
context.resynch = 1;
- context.alist = data;
+ context.alist = size ? data : NULL;
context.bufsize = size;
context.firstu = context.bufsize;
-
- if (size)
- context.put_listent = xfs_xattr_put_listent;
- else
- context.put_listent = xfs_xattr_put_listent_sizes;
+ context.put_listent = xfs_xattr_put_listent;
xfs_attr_list_int(&context);
if (context.count < 0)
return -ERANGE;
- /*
- * Then add the two synthetic ACL attributes.
- */
- if (posix_acl_access_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- if (posix_acl_default_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
return context.count;
}
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index b42afada1280..0f45f93ef692 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -93,7 +93,7 @@
#endif /* CONFIG_SMP */
#ifndef smp_store_mb
-#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
#endif
#ifndef smp_mb__before_atomic
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 14b0ff32fb9f..3a6803cb0ec9 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -569,7 +569,7 @@ static inline int track_pfn_copy(struct vm_area_struct *vma)
}
/*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
* can be for the entire vma (in which case pfn, size are zero).
*/
@@ -577,6 +577,13 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
unsigned long pfn, unsigned long size)
{
}
+
+/*
+ * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
+ */
+static inline void untrack_pfn_moved(struct vm_area_struct *vma)
+{
+}
#else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr,
@@ -586,6 +593,7 @@ extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
+extern void untrack_pfn_moved(struct vm_area_struct *vma);
#endif
#ifdef __HAVE_COLOR_ZERO_PAGE
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index e2aadbc7151f..39e1cb201b8e 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,8 +12,9 @@
* GNU General Public License for more details.
*
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hp.com>
+ * Authors: Waiman Long <waiman.long@hpe.com>
*/
#ifndef __ASM_GENERIC_QSPINLOCK_H
#define __ASM_GENERIC_QSPINLOCK_H
@@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
{
if (!atomic_read(&lock->val) &&
- (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
+ (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
return 1;
return 0;
}
@@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val;
- val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
+ val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
if (likely(val == 0))
return;
queued_spin_lock_slowpath(lock, val);
@@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
/*
* smp_mb__before_atomic() in order to guarantee release semantics
*/
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
atomic_sub(_Q_LOCKED_VAL, &lock->val);
}
#endif
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index f589222bfa87..35b22f94d2d2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -19,6 +19,10 @@ extern unsigned long min_low_pfn;
* highest page
*/
extern unsigned long max_pfn;
+/*
+ * highest possible page
+ */
+extern unsigned long long max_possible_pfn;
#ifndef CONFIG_NO_BOOTMEM
/*
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7784b597e959..6013021a3b39 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -62,12 +62,18 @@ struct module;
* @suspend: suspend function for the clocksource, if necessary
* @resume: resume function for the clocksource, if necessary
* @owner: module reference, must be set by clocksource in modules
+ *
+ * Note: This struct is not used in hotpathes of the timekeeping code
+ * because the timekeeper caches the hot path fields in its own data
+ * structure, so no line cache alignment is required,
+ *
+ * The pointer to the clocksource itself is handed to the read
+ * callback. If you need extra information there you can wrap struct
+ * clocksource into your own struct. Depending on the amount of
+ * information you need you should consider to cache line align that
+ * structure.
*/
struct clocksource {
- /*
- * Hotpath data, fits in a single cache line when the
- * clocksource itself is cacheline aligned.
- */
cycle_t (*read)(struct clocksource *cs);
cycle_t mask;
u32 mult;
@@ -95,7 +101,7 @@ struct clocksource {
cycle_t wd_last;
#endif
struct module *owner;
-} ____cacheline_aligned;
+};
/*
* Clock source flags bits::
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4dac1036594f..00b042c49ccd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__u.__val; \
})
+/**
+ * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ *
+ * The control dependency provides a LOAD->STORE order, the additional RMB
+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
+ * aka. ACQUIRE.
+ */
+#define smp_cond_acquire(cond) do { \
+ while (!(cond)) \
+ cpu_relax(); \
+ smp_rmb(); /* ctrl + rmb := acquire */ \
+} while (0)
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 68b575afe5f5..d259274238db 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { }
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void guest_enter(void)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_guest_enter(current);
else
current->flags |= PF_VCPU;
@@ -100,7 +100,7 @@ static inline void guest_exit(void)
if (context_tracking_is_enabled())
__context_tracking_exit(CONTEXT_GUEST);
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_guest_exit(current);
else
current->flags &= ~PF_VCPU;
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index ee956c528fab..1d34fe68f48a 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -22,12 +22,12 @@ struct context_tracking {
};
#ifdef CONFIG_CONTEXT_TRACKING
-extern struct static_key context_tracking_enabled;
+extern struct static_key_false context_tracking_enabled;
DECLARE_PER_CPU(struct context_tracking, context_tracking);
static inline bool context_tracking_is_enabled(void)
{
- return static_key_false(&context_tracking_enabled);
+ return static_branch_unlikely(&context_tracking_enabled);
}
static inline bool context_tracking_cpu_is_enabled(void)
diff --git a/include/linux/delayed_call.h b/include/linux/delayed_call.h
new file mode 100644
index 000000000000..f7fa76ae1a9b
--- /dev/null
+++ b/include/linux/delayed_call.h
@@ -0,0 +1,34 @@
+#ifndef _DELAYED_CALL_H
+#define _DELAYED_CALL_H
+
+/*
+ * Poor man's closures; I wish we could've done them sanely polymorphic,
+ * but...
+ */
+
+struct delayed_call {
+ void (*fn)(void *);
+ void *arg;
+};
+
+#define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL}
+
+/* I really wish we had closures with sane typechecking... */
+static inline void set_delayed_call(struct delayed_call *call,
+ void (*fn)(void *), void *arg)
+{
+ call->fn = fn;
+ call->arg = arg;
+}
+
+static inline void do_delayed_call(struct delayed_call *call)
+{
+ if (call->fn)
+ call->fn(call->arg);
+}
+
+static inline void clear_delayed_call(struct delayed_call *call)
+{
+ call->fn = NULL;
+}
+#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4165e9ac9e36..5972ffe5719a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -493,6 +493,25 @@ static inline void bpf_jit_free(struct bpf_prog *fp)
#define BPF_ANC BIT(15)
+static inline bool bpf_needs_clear_a(const struct sock_filter *first)
+{
+ switch (first->code) {
+ case BPF_RET | BPF_K:
+ case BPF_LD | BPF_W | BPF_LEN:
+ return false;
+
+ case BPF_LD | BPF_W | BPF_ABS:
+ case BPF_LD | BPF_H | BPF_ABS:
+ case BPF_LD | BPF_B | BPF_ABS:
+ if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
+ return true;
+ return false;
+
+ default:
+ return true;
+ }
+}
+
static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
{
BUG_ON(ftest->code & BPF_ANC);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..ef3cd36689f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -31,6 +31,7 @@
#include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>
+#include <linux/delayed_call.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1633,12 +1634,11 @@ struct file_operations {
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
- const char * (*follow_link) (struct dentry *, void **);
+ const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
int (*permission) (struct inode *, int);
struct posix_acl * (*get_acl)(struct inode *, int);
int (*readlink) (struct dentry *, char __user *,int);
- void (*put_link) (struct inode *, void *);
int (*create) (struct inode *,struct dentry *, umode_t, bool);
int (*link) (struct dentry *,struct inode *,struct dentry *);
@@ -2736,14 +2736,14 @@ extern const struct file_operations generic_ro_fops;
extern int readlink_copy(char __user *, int, const char *);
extern int page_readlink(struct dentry *, char __user *, int);
-extern const char *page_follow_link_light(struct dentry *, void **);
-extern void page_put_link(struct inode *, void *);
+extern const char *page_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
+extern void page_put_link(void *);
extern int __page_symlink(struct inode *inode, const char *symname, int len,
int nofs);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct inode *, void *);
-extern void free_page_put_link(struct inode *, void *);
+extern void kfree_link(void *);
extern int generic_readlink(struct dentry *, char __user *, int);
extern void generic_fillattr(struct inode *, struct kstat *);
int vfs_getattr_nosec(struct path *path, struct kstat *stat);
@@ -2754,7 +2754,8 @@ void __inode_sub_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
-const char *simple_follow_link(struct dentry *, void **);
+const char *simple_get_link(struct dentry *, struct inode *,
+ struct delayed_call *);
extern const struct inode_operations simple_symlink_inode_operations;
extern int iterate_dir(struct file *, struct dir_context *);
@@ -2764,8 +2765,6 @@ extern int vfs_lstat(const char __user *, struct kstat *);
extern int vfs_fstat(unsigned int, struct kstat *);
extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
-extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
- unsigned long arg);
extern int __generic_block_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
loff_t start, loff_t len,
@@ -3025,5 +3024,6 @@ static inline bool dir_relax(struct inode *inode)
}
extern bool path_noexec(const struct path *path);
+extern void inode_nohighmem(struct inode *inode);
#endif /* _LINUX_FS_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index eae6548efbf0..60048c50404e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -586,6 +586,7 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
+extern void ftrace_release_mod(struct module *mod);
extern void ftrace_disable_daemon(void);
extern void ftrace_enable_daemon(void);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1c1ff7e4faa4..f2cb8d45513d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -150,7 +150,7 @@ extern struct task_group root_task_group;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \
- .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \
+ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
.vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS,
#else
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ad16809c8596..cb30edbfe9fc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -195,6 +195,7 @@ extern void disable_irq(unsigned int irq);
extern void disable_percpu_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
extern void enable_percpu_irq(unsigned int irq, unsigned int type);
+extern bool irq_percpu_is_enabled(unsigned int irq);
extern void irq_wake_thread(unsigned int irq, void *dev_id);
/* The following three functions are for the core kernel use only. */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index bae69e5d693c..9c940263ca23 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -103,10 +103,21 @@ struct device_node;
void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
int gic_cpu_if_down(unsigned int gic_nr);
+/*
+ * Subdrivers that need some preparatory work can initialize their
+ * chips and call this to register their GICs.
+ */
+int gic_of_init(struct device_node *node, struct device_node *parent);
+
+/*
+ * Legacy platforms not converted to DT yet must use this to init
+ * their GIC
+ */
void gic_init(unsigned int nr, int start,
void __iomem *dist , void __iomem *cpu);
-int gicv2m_of_init(struct device_node *node, struct irq_domain *parent);
+int gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent);
void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
int gic_get_cpu_id(unsigned int cpu);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index a587a33363c7..dcca77c4b9d2 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -1,6 +1,8 @@
#ifndef _LINUX_IRQDESC_H
#define _LINUX_IRQDESC_H
+#include <linux/rcupdate.h>
+
/*
* Core internal functions to deal with irq descriptors
*/
@@ -40,6 +42,7 @@ struct pt_regs;
* IRQF_NO_SUSPEND set
* @force_resume_depth: number of irqactions on a irq descriptor with
* IRQF_FORCE_RESUME set
+ * @rcu: rcu head for delayed free
* @dir: /proc/irq/ procfs entry
* @name: flow handler name for /proc/interrupts output
*/
@@ -82,6 +85,9 @@ struct irq_desc {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
#endif
+#ifdef CONFIG_SPARSE_IRQ
+ struct rcu_head rcu;
+#endif
int parent_irq;
struct module *owner;
const char *name;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index d5e5c5bef28c..f64622ad02c1 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -211,6 +211,11 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
return node ? &node->fwnode : NULL;
}
+static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
+{
+ return fwnode && fwnode->type == FWNODE_IRQCHIP;
+}
+
static inline struct irq_domain *irq_find_matching_host(struct device_node *node,
enum irq_domain_bus_token bus_token)
{
@@ -367,6 +372,9 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false);
}
+extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs, void *arg);
extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
unsigned int virq,
irq_hw_number_t hwirq,
@@ -410,6 +418,11 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
static inline void irq_dispose_mapping(unsigned int virq) { }
static inline void irq_domain_activate_irq(struct irq_data *data) { }
static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
+static inline struct irq_domain *irq_find_matching_fwnode(
+ struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token)
+{
+ return NULL;
+}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 350dfb08aee3..7311c3294e25 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -255,6 +255,7 @@ extern long (*panic_blink)(int state);
__printf(1, 2)
void panic(const char *fmt, ...)
__noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
extern void oops_enter(void);
extern void oops_exit(void);
void print_oops_end_marker(void);
@@ -446,6 +447,33 @@ extern int sysctl_panic_on_stackoverflow;
extern bool crash_kexec_post_notifiers;
/*
+ * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
+ * holds a CPU number which is executing panic() currently. A value of
+ * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
+ */
+extern atomic_t panic_cpu;
+#define PANIC_CPU_INVALID -1
+
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
+ */
+#define nmi_panic(regs, fmt, ...) \
+do { \
+ int old_cpu, cpu; \
+ \
+ cpu = raw_smp_processor_id(); \
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \
+ \
+ if (old_cpu == PANIC_CPU_INVALID) \
+ panic(fmt, ##__VA_ARGS__); \
+ else if (old_cpu != cpu) \
+ nmi_panic_self_stop(regs); \
+} while (0)
+
+/*
* Only to be used by arch init code. If the user over-wrote the default
* CONFIG_PANIC_TIMEOUT, honor it.
*/
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d140b1e9faa7..7b68d2788a56 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -237,6 +237,7 @@ extern int kexec_purgatory_get_set_symbol(struct kimage *image,
unsigned int size, bool get_value);
extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
const char *name);
+extern void __crash_kexec(struct pt_regs *);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -332,6 +333,7 @@ int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
+static inline void __crash_kexec(struct pt_regs *regs) { }
static inline void crash_kexec(struct pt_regs *regs) { }
static inline int kexec_should_crash(struct task_struct *p) { return 0; }
#define kexec_in_progress false
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 600c1e0626a5..851821bfd553 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -205,6 +205,7 @@ enum {
ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */
ATA_LFLAG_RST_ONCE = (1 << 9), /* limit recovery to one reset */
ATA_LFLAG_CHANGED = (1 << 10), /* LPM state changed on this link */
+ ATA_LFLAG_NO_DB_DELAY = (1 << 11), /* no debounce delay on link resume */
/* struct ata_port flags */
ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
diff --git a/include/linux/list.h b/include/linux/list.h
index 993395a2e55c..5356f4d661a7 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -24,7 +24,7 @@
static inline void INIT_LIST_HEAD(struct list_head *list)
{
- list->next = list;
+ WRITE_ONCE(list->next, list);
list->prev = list;
}
@@ -42,7 +42,7 @@ static inline void __list_add(struct list_head *new,
next->prev = new;
new->next = next;
new->prev = prev;
- prev->next = new;
+ WRITE_ONCE(prev->next, new);
}
#else
extern void __list_add(struct list_head *new,
@@ -186,7 +186,7 @@ static inline int list_is_last(const struct list_head *list,
*/
static inline int list_empty(const struct list_head *head)
{
- return head->next == head;
+ return READ_ONCE(head->next) == head;
}
/**
@@ -608,7 +608,7 @@ static inline int hlist_unhashed(const struct hlist_node *h)
static inline int hlist_empty(const struct hlist_head *h)
{
- return !h->first;
+ return !READ_ONCE(h->first);
}
static inline void __hlist_del(struct hlist_node *n)
@@ -642,7 +642,7 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
n->next = first;
if (first)
first->pprev = &n->next;
- h->first = n;
+ WRITE_ONCE(h->first, n);
n->pprev = &h->first;
}
@@ -653,14 +653,14 @@ static inline void hlist_add_before(struct hlist_node *n,
n->pprev = next->pprev;
n->next = next;
next->pprev = &n->next;
- *(n->pprev) = n;
+ WRITE_ONCE(*(n->pprev), n);
}
static inline void hlist_add_behind(struct hlist_node *n,
struct hlist_node *prev)
{
n->next = prev->next;
- prev->next = n;
+ WRITE_ONCE(prev->next, n);
n->pprev = &prev->next;
if (n->next)
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 8132214e8efd..ee7229a6c06a 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -70,7 +70,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h,
static inline int hlist_bl_empty(const struct hlist_bl_head *h)
{
- return !((unsigned long)h->first & ~LIST_BL_LOCKMASK);
+ return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK);
}
static inline void hlist_bl_add_head(struct hlist_bl_node *n,
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index 444d2b1313bd..b01fe1009084 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -57,7 +57,7 @@ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
{
- return is_a_nulls(h->first);
+ return is_a_nulls(READ_ONCE(h->first));
}
static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index f67b2ec18e6d..89df7abedd67 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -172,7 +172,7 @@ struct dw_mci {
/* For edmac */
struct dw_mci_dma_slave *dms;
/* Registers's physical base address */
- void *phy_regs;
+ resource_size_t phy_regs;
u32 cmd_status;
u32 data_status;
@@ -235,16 +235,10 @@ struct dw_mci_dma_ops {
};
/* IP Quirks/flags. */
-/* DTO fix for command transmission with IDMAC configured */
-#define DW_MCI_QUIRK_IDMAC_DTO BIT(0)
-/* delay needed between retries on some 2.11a implementations */
-#define DW_MCI_QUIRK_RETRY_DELAY BIT(1)
-/* High Speed Capable - Supports HS cards (up to 50MHz) */
-#define DW_MCI_QUIRK_HIGHSPEED BIT(2)
/* Unreliable card detection */
-#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3)
+#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(0)
/* Timer for broken data transfer over scheme */
-#define DW_MCI_QUIRK_BROKEN_DTO BIT(4)
+#define DW_MCI_QUIRK_BROKEN_DTO BIT(1)
struct dma_pdata;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8673ffe3d86e..8dd4d290ab0d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -212,7 +212,9 @@ struct mmc_host {
u32 ocr_avail_sdio; /* SDIO-specific OCR */
u32 ocr_avail_sd; /* SD-specific OCR */
u32 ocr_avail_mmc; /* MMC-specific OCR */
+#ifdef CONFIG_PM_SLEEP
struct notifier_block pm_notify;
+#endif
u32 max_current_330;
u32 max_current_300;
u32 max_current_180;
@@ -259,7 +261,6 @@ struct mmc_host {
#define MMC_CAP_UHS_SDR50 (1 << 17) /* Host supports UHS SDR50 mode */
#define MMC_CAP_UHS_SDR104 (1 << 18) /* Host supports UHS SDR104 mode */
#define MMC_CAP_UHS_DDR50 (1 << 19) /* Host supports UHS DDR50 mode */
-#define MMC_CAP_RUNTIME_RESUME (1 << 20) /* Resume at runtime_resume. */
#define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */
#define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */
#define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */
@@ -289,6 +290,7 @@ struct mmc_host {
#define MMC_CAP2_HSX00_1_2V (MMC_CAP2_HS200_1_2V_SDR | MMC_CAP2_HS400_1_2V)
#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
#define MMC_CAP2_NO_WRITE_PROTECT (1 << 18) /* No physical write protect pin, assume that card is always read-write */
+#define MMC_CAP2_NO_SDIO (1 << 19) /* Do not send SDIO commands during initialization */
mmc_pm_flag_t pm_caps; /* supported pm features */
@@ -434,8 +436,6 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc,
int mmc_regulator_get_supply(struct mmc_host *mmc);
-int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *);
-
static inline int mmc_card_is_removable(struct mmc_host *host)
{
return !(host->caps & MMC_CAP_NONREMOVABLE);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f71a25e5fd25..1c6342ab8c0e 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -174,6 +174,7 @@ struct msi_controller {
#include <asm/msi.h>
struct irq_domain;
+struct irq_domain_ops;
struct irq_chip;
struct device_node;
struct fwnode_handle;
@@ -279,6 +280,23 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
irq_write_msi_msg_t write_msi_msg);
void platform_msi_domain_free_irqs(struct device *dev);
+
+/* When an MSI domain is used as an intermediate domain */
+int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *args);
+int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
+ int virq, int nvec, msi_alloc_info_t *args);
+struct irq_domain *
+platform_msi_create_device_domain(struct device *dev,
+ unsigned int nvec,
+ irq_write_msi_msg_t write_msi_msg,
+ const struct irq_domain_ops *ops,
+ void *host_data);
+int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs);
+void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nvec);
+void *platform_msi_get_host_data(struct irq_domain *domain);
#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c8723b62c4cd..bc742dac7d3a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -25,7 +25,7 @@
#define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX
#define SNOR_MFR_SPANSION CFI_MFR_AMD
#define SNOR_MFR_SST CFI_MFR_SST
-#define SNOR_MFR_WINBOND 0xef
+#define SNOR_MFR_WINBOND 0xef /* Also used by some Spansion */
/*
* Note on opcode nomenclature: some opcodes have a format like
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e961474a52..37a3d2981352 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -359,6 +359,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
+extern int nfs_revalidate_mapping_rcu(struct inode *inode);
extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6ae25aae88fd..d86378c226fb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1946,6 +1946,16 @@ static inline struct irq_domain *
pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
#endif /* CONFIG_OF */
+#ifdef CONFIG_ACPI
+struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus);
+
+void
+pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *));
+#else
+static inline struct irq_domain *
+pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; }
+#endif
+
#ifdef CONFIG_EEH
static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
{
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 12c9b485beb7..84f542df7ff5 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -116,7 +116,7 @@ void percpu_ref_reinit(struct percpu_ref *ref);
*/
static inline void percpu_ref_kill(struct percpu_ref *ref)
{
- return percpu_ref_kill_and_confirm(ref, NULL);
+ percpu_ref_kill_and_confirm(ref, NULL);
}
/*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index caebf2a758dc..4bc6dafb703e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -18,12 +18,6 @@
#define PERCPU_MODULE_RESERVE 0
#endif
-#ifndef PERCPU_ENOUGH_ROOM
-#define PERCPU_ENOUGH_ROOM \
- (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
- PERCPU_MODULE_RESERVE)
-#endif
-
/* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
diff --git a/include/linux/platform_data/irq-renesas-intc-irqpin.h b/include/linux/platform_data/irq-renesas-intc-irqpin.h
deleted file mode 100644
index e4cb911066a6..000000000000
--- a/include/linux/platform_data/irq-renesas-intc-irqpin.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Renesas INTC External IRQ Pin Driver
- *
- * Copyright (C) 2013 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __IRQ_RENESAS_INTC_IRQPIN_H__
-#define __IRQ_RENESAS_INTC_IRQPIN_H__
-
-struct renesas_intc_irqpin_config {
- unsigned int sense_bitfield_width;
- unsigned int irq_base;
- bool control_parent;
-};
-
-#endif /* __IRQ_RENESAS_INTC_IRQPIN_H__ */
diff --git a/include/linux/platform_data/mmc-mvsdio.h b/include/linux/platform_data/mmc-mvsdio.h
deleted file mode 100644
index d02704cd3695..000000000000
--- a/include/linux/platform_data/mmc-mvsdio.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __MMC_MVSDIO_H
-#define __MMC_MVSDIO_H
-
-#include <linux/mbus.h>
-
-struct mvsdio_platform_data {
- unsigned int clock;
- int gpio_card_detect;
- int gpio_write_protect;
-};
-
-#endif
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index dc777be5f2e1..6abd019c76f8 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -51,6 +51,7 @@ extern void arch_setup_pdev_archdata(struct platform_device *);
extern struct resource *platform_get_resource(struct platform_device *,
unsigned int, unsigned int);
extern int platform_get_irq(struct platform_device *, unsigned int);
+extern int platform_irq_count(struct platform_device *);
extern struct resource *platform_get_resource_byname(struct platform_device *,
unsigned int,
const char *);
diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 6f14ee295822..e5e8ec40278d 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -9,16 +9,12 @@
#ifndef _POSIX_ACL_XATTR_H
#define _POSIX_ACL_XATTR_H
+#include <uapi/linux/xattr.h>
#include <linux/posix_acl.h>
-/* Extended attribute names */
-#define POSIX_ACL_XATTR_ACCESS "system.posix_acl_access"
-#define POSIX_ACL_XATTR_DEFAULT "system.posix_acl_default"
-
/* Supported ACL a_version fields */
#define POSIX_ACL_XATTR_VERSION 0x0002
-
/* An undefined entry e_id value */
#define ACL_UNDEFINED_ID (-1)
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 5ed540986019..14ec1652daf4 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -179,32 +179,31 @@ static inline void list_replace_rcu(struct list_head *old,
}
/**
- * list_splice_init_rcu - splice an RCU-protected list into an existing list.
+ * __list_splice_init_rcu - join an RCU-protected list into an existing list.
* @list: the RCU-protected list to splice
- * @head: the place in the list to splice the first list into
+ * @prev: points to the last element of the existing list
+ * @next: points to the first element of the existing list
* @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
*
- * @head can be RCU-read traversed concurrently with this function.
+ * The list pointed to by @prev and @next can be RCU-read traversed
+ * concurrently with this function.
*
* Note that this function blocks.
*
- * Important note: the caller must take whatever action is necessary to
- * prevent any other updates to @head. In principle, it is possible
- * to modify the list as soon as sync() begins execution.
- * If this sort of thing becomes necessary, an alternative version
- * based on call_rcu() could be created. But only if -really-
- * needed -- there is no shortage of RCU API members.
+ * Important note: the caller must take whatever action is necessary to prevent
+ * any other updates to the existing list. In principle, it is possible to
+ * modify the list as soon as sync() begins execution. If this sort of thing
+ * becomes necessary, an alternative version based on call_rcu() could be
+ * created. But only if -really- needed -- there is no shortage of RCU API
+ * members.
*/
-static inline void list_splice_init_rcu(struct list_head *list,
- struct list_head *head,
- void (*sync)(void))
+static inline void __list_splice_init_rcu(struct list_head *list,
+ struct list_head *prev,
+ struct list_head *next,
+ void (*sync)(void))
{
struct list_head *first = list->next;
struct list_head *last = list->prev;
- struct list_head *at = head->next;
-
- if (list_empty(list))
- return;
/*
* "first" and "last" tracking list, so initialize it. RCU readers
@@ -231,10 +230,40 @@ static inline void list_splice_init_rcu(struct list_head *list,
* this function.
*/
- last->next = at;
- rcu_assign_pointer(list_next_rcu(head), first);
- first->prev = head;
- at->prev = last;
+ last->next = next;
+ rcu_assign_pointer(list_next_rcu(prev), first);
+ first->prev = prev;
+ next->prev = last;
+}
+
+/**
+ * list_splice_init_rcu - splice an RCU-protected list into an existing list,
+ * designed for stacks.
+ * @list: the RCU-protected list to splice
+ * @head: the place in the existing list to splice the first list into
+ * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
+ */
+static inline void list_splice_init_rcu(struct list_head *list,
+ struct list_head *head,
+ void (*sync)(void))
+{
+ if (!list_empty(list))
+ __list_splice_init_rcu(list, head, head->next, sync);
+}
+
+/**
+ * list_splice_tail_init_rcu - splice an RCU-protected list into an existing
+ * list, designed for queues.
+ * @list: the RCU-protected list to splice
+ * @head: the place in the existing list to splice the first list into
+ * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
+ */
+static inline void list_splice_tail_init_rcu(struct list_head *list,
+ struct list_head *head,
+ void (*sync)(void))
+{
+ if (!list_empty(list))
+ __list_splice_init_rcu(list, head->prev, head, sync);
}
/**
@@ -305,6 +334,42 @@ static inline void list_splice_init_rcu(struct list_head *list,
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
+ * list_entry_lockless - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu(), but requires some implicit RCU
+ * read-side guarding. One example is running within a special
+ * exception-time environment where preemption is disabled and where
+ * lockdep cannot be invoked (in which case updaters must use RCU-sched,
+ * as in synchronize_sched(), call_rcu_sched(), and friends). Another
+ * example is when items are added to the list, but never deleted.
+ */
+#define list_entry_lockless(ptr, type, member) \
+ container_of((typeof(ptr))lockless_dereference(ptr), type, member)
+
+/**
+ * list_for_each_entry_lockless - iterate over rcu list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu(), but requires some implicit RCU
+ * read-side guarding. One example is running within a special
+ * exception-time environment where preemption is disabled and where
+ * lockdep cannot be invoked (in which case updaters must use RCU-sched,
+ * as in synchronize_sched(), call_rcu_sched(), and friends). Another
+ * example is when items are added to the list, but never deleted.
+ */
+#define list_for_each_entry_lockless(pos, head, member) \
+ for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry_lockless(pos->member.next, typeof(*pos), member))
+
+/**
* list_for_each_entry_continue_rcu - continue iteration over list of given type
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a0189ba67fde..14e6f47ee16f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,10 +48,17 @@
#include <asm/barrier.h>
+#ifndef CONFIG_TINY_RCU
extern int rcu_expedited; /* for sysctl */
+extern int rcu_normal; /* also for sysctl */
+#endif /* #ifndef CONFIG_TINY_RCU */
#ifdef CONFIG_TINY_RCU
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
+static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */
+{
+ return true;
+}
static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
{
return false;
@@ -65,6 +72,7 @@ static inline void rcu_unexpedite_gp(void)
{
}
#else /* #ifdef CONFIG_TINY_RCU */
+bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
void rcu_expedite_gp(void);
void rcu_unexpedite_gp(void);
@@ -321,7 +329,6 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */
void rcu_init(void);
-void rcu_end_inkernel_boot(void);
void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
@@ -329,6 +336,12 @@ struct notifier_block;
int rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu);
+#ifndef CONFIG_TINY_RCU
+void rcu_end_inkernel_boot(void);
+#else /* #ifndef CONFIG_TINY_RCU */
+static inline void rcu_end_inkernel_boot(void) { }
+#endif /* #ifndef CONFIG_TINY_RCU */
+
#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
@@ -379,9 +392,9 @@ static inline void rcu_init_nohz(void)
*/
#define RCU_NONIDLE(a) \
do { \
- rcu_irq_enter(); \
+ rcu_irq_enter_irqson(); \
do { a; } while (0); \
- rcu_irq_exit(); \
+ rcu_irq_exit_irqson(); \
} while (0)
/*
@@ -741,7 +754,7 @@ static inline void rcu_preempt_sleep_check(void)
* The tracing infrastructure traces RCU (we want that), but unfortunately
* some of the RCU checks causes tracing to lock up the system.
*
- * The tracing version of rcu_dereference_raw() must not call
+ * The no-tracing version of rcu_dereference_raw() must not call
* rcu_read_lock_held().
*/
#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4c1aaf9cce7b..64809aea661c 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -181,6 +181,14 @@ static inline void rcu_irq_enter(void)
{
}
+static inline void rcu_irq_exit_irqson(void)
+{
+}
+
+static inline void rcu_irq_enter_irqson(void)
+{
+}
+
static inline void rcu_irq_exit(void)
{
}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 60d15a080d7c..ad1eda9fa4da 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void);
/*
* Note a virtualization-based context switch. This is simply a
* wrapper around rcu_note_context_switch(), which allows TINY_RCU
- * to save a few bytes.
+ * to save a few bytes. The caller must have disabled interrupts.
*/
static inline void rcu_virt_note_context_switch(int cpu)
{
@@ -97,6 +97,8 @@ void rcu_idle_enter(void);
void rcu_idle_exit(void);
void rcu_irq_enter(void);
void rcu_irq_exit(void);
+void rcu_irq_enter_irqson(void);
+void rcu_irq_exit_irqson(void);
void exit_rcu(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a43edea..4bae8ab3b893 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void update_cpu_load_nohz(void);
+extern void update_cpu_load_nohz(int active);
#else
-static inline void update_cpu_load_nohz(void) { }
+static inline void update_cpu_load_nohz(int active) { }
#endif
extern unsigned long get_parent_ip(unsigned long addr);
@@ -377,6 +377,7 @@ extern void scheduler_tick(void);
extern void sched_show_task(struct task_struct *p);
#ifdef CONFIG_LOCKUP_DETECTOR
+extern void touch_softlockup_watchdog_sched(void);
extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
@@ -387,6 +388,9 @@ extern unsigned int softlockup_panic;
extern unsigned int hardlockup_panic;
void lockup_detector_init(void);
#else
+static inline void touch_softlockup_watchdog_sched(void)
+{
+}
static inline void touch_softlockup_watchdog(void)
{
}
@@ -1268,8 +1272,13 @@ struct sched_entity {
#endif
#ifdef CONFIG_SMP
- /* Per entity load average tracking */
- struct sched_avg avg;
+ /*
+ * Per entity load average tracking.
+ *
+ * Put into separate cache line so it does not
+ * collide with read-mostly values above.
+ */
+ struct sched_avg avg ____cacheline_aligned_in_smp;
#endif
};
@@ -1455,14 +1464,15 @@ struct task_struct {
/* Used for emulating ABI behavior of previous Linux versions */
unsigned int personality;
- unsigned in_execve:1; /* Tell the LSMs that the process is doing an
- * execve */
- unsigned in_iowait:1;
-
- /* Revert to default priority/policy when forking */
+ /* scheduler bits, serialized by scheduler locks */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
+ unsigned :0; /* force alignment to the next boundary */
+
+ /* unserialized, strictly 'current' */
+ unsigned in_execve:1; /* bit to tell LSMs we're in execve */
+ unsigned in_iowait:1;
#ifdef CONFIG_MEMCG
unsigned memcg_may_oom:1;
#endif
@@ -1519,11 +1529,14 @@ struct task_struct {
cputime_t gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- seqlock_t vtime_seqlock;
+ seqcount_t vtime_seqcount;
unsigned long long vtime_snap;
enum {
- VTIME_SLEEPING = 0,
+ /* Task is sleeping or running in a CPU with VTIME inactive */
+ VTIME_INACTIVE = 0,
+ /* Task runs in userspace in a CPU with VTIME active */
VTIME_USER,
+ /* Task runs in kernelspace in a CPU with VTIME active */
VTIME_SYS,
} vtime_snap_whence;
#endif
@@ -2002,7 +2015,8 @@ static inline int pid_alive(const struct task_struct *p)
}
/**
- * is_global_init - check if a task structure is init
+ * is_global_init - check if a task structure is init. Since init
+ * is free to have sub-threads we need to check tgid.
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
@@ -2011,7 +2025,7 @@ static inline int pid_alive(const struct task_struct *p)
*/
static inline int is_global_init(struct task_struct *tsk)
{
- return tsk->pid == 1;
+ return task_tgid_nr(tsk) == 1;
}
extern struct pid *cad_pid;
diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index efa931c5cef1..411b52e424e1 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -10,11 +10,17 @@
#ifdef CONFIG_GENERIC_SCHED_CLOCK
extern void sched_clock_postinit(void);
-#else
-static inline void sched_clock_postinit(void) { }
-#endif
extern void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate);
+#else
+static inline void sched_clock_postinit(void) { }
+
+static inline void sched_clock_register(u64 (*read)(void), int bits,
+ unsigned long rate)
+{
+ ;
+}
+#endif
#endif
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0e1b1540597a..3cc9632dcc2a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -29,7 +29,7 @@ struct cpu_stop_work {
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work)
preempt_enable();
}
-static inline void stop_one_cpu_nowait(unsigned int cpu,
+static inline bool stop_one_cpu_nowait(unsigned int cpu,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu,
work_buf->fn = fn;
work_buf->arg = arg;
schedule_work(&work_buf->work);
+ return true;
}
+
+ return false;
}
static inline int stop_cpus(const struct cpumask *cpumask,
diff --git a/include/linux/time.h b/include/linux/time.h
index beebe3a02d43..297f09f23896 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,32 @@ static inline bool timeval_valid(const struct timeval *tv)
extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
+/*
+ * Validates if a timespec/timeval used to inject a time offset is valid.
+ * Offsets can be postive or negative. The value of the timeval/timespec
+ * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must
+ * always be non-negative.
+ */
+static inline bool timeval_inject_offset_valid(const struct timeval *tv)
+{
+ /* We don't check the tv_sec as it can be positive or negative */
+
+ /* Can't have more microseconds then a second */
+ if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC)
+ return false;
+ return true;
+}
+
+static inline bool timespec_inject_offset_valid(const struct timespec *ts)
+{
+ /* We don't check the tv_sec as it can be positive or negative */
+
+ /* Can't have more nanoseconds then a second */
+ if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
+ return false;
+ return true;
+}
+
#define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
new file mode 100644
index 000000000000..e1ee97c713bf
--- /dev/null
+++ b/include/linux/tracepoint-defs.h
@@ -0,0 +1,27 @@
+#ifndef TRACEPOINT_DEFS_H
+#define TRACEPOINT_DEFS_H 1
+
+/*
+ * File can be included directly by headers who only want to access
+ * tracepoint->key to guard out of line trace calls. Otherwise
+ * linux/tracepoint.h should be used.
+ */
+
+#include <linux/atomic.h>
+#include <linux/static_key.h>
+
+struct tracepoint_func {
+ void *func;
+ void *data;
+ int prio;
+};
+
+struct tracepoint {
+ const char *name; /* Tracepoint name */
+ struct static_key key;
+ void (*regfunc)(void);
+ void (*unregfunc)(void);
+ struct tracepoint_func __rcu *funcs;
+};
+
+#endif
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 696a339c592c..78e8397a1800 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,26 +17,12 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
-#include <linux/static_key.h>
+#include <linux/tracepoint-defs.h>
struct module;
struct tracepoint;
struct notifier_block;
-struct tracepoint_func {
- void *func;
- void *data;
- int prio;
-};
-
-struct tracepoint {
- const char *name; /* Tracepoint name */
- struct static_key key;
- void (*regfunc)(void);
- void (*unregfunc)(void);
- struct tracepoint_func __rcu *funcs;
-};
-
struct trace_enum_map {
const char *system;
const char *enum_string;
@@ -171,8 +157,8 @@ extern void syscall_unregfunc(void);
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond), \
- rcu_irq_enter(), \
- rcu_irq_exit()); \
+ rcu_irq_enter_irqson(), \
+ rcu_irq_exit_irqson()); \
}
#else
#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index c5165fd256f9..fa2196990f84 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,16 +10,27 @@
struct task_struct;
/*
- * vtime_accounting_enabled() definitions/declarations
+ * vtime_accounting_cpu_enabled() definitions/declarations
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-static inline bool vtime_accounting_enabled(void) { return true; }
+static inline bool vtime_accounting_cpu_enabled(void) { return true; }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+/*
+ * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
+ * in that case and compute the tickless cputime.
+ * For now vtime state is tied to context tracking. We might want to decouple
+ * those later if necessary.
+ */
static inline bool vtime_accounting_enabled(void)
{
- if (context_tracking_is_enabled()) {
+ return context_tracking_is_enabled();
+}
+
+static inline bool vtime_accounting_cpu_enabled(void)
+{
+ if (vtime_accounting_enabled()) {
if (context_tracking_cpu_is_enabled())
return true;
}
@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-static inline bool vtime_accounting_enabled(void) { return false; }
+static inline bool vtime_accounting_cpu_enabled(void) { return false; }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_common_task_switch(struct task_struct *prev);
static inline void vtime_task_switch(struct task_struct *prev)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_common_task_switch(prev);
}
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
extern void vtime_common_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_enter(struct task_struct *tsk)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_common_account_irq_enter(tsk);
}
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
vtime_gen_account_irq_exit(tsk);
}
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 513b36f04dfd..d2f4ec7dba7c 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
q->func = func;
}
+/**
+ * waitqueue_active -- locklessly test for waiters on the queue
+ * @q: the waitqueue to test for waiters
+ *
+ * returns true if the wait list is not empty
+ *
+ * NOTE: this function is lockless and requires care, incorrect usage _will_
+ * lead to sporadic and non-obvious failure.
+ *
+ * Use either while holding wait_queue_head_t::lock or when used for wakeups
+ * with an extra smp_mb() like:
+ *
+ * CPU0 - waker CPU1 - waiter
+ *
+ * for (;;) {
+ * @cond = true; prepare_to_wait(&wq, &wait, state);
+ * smp_mb(); // smp_mb() from set_current_state()
+ * if (waitqueue_active(wq)) if (@cond)
+ * wake_up(wq); break;
+ * schedule();
+ * }
+ * finish_wait(&wq, &wait);
+ *
+ * Because without the explicit smp_mb() it's possible for the
+ * waitqueue_active() load to get hoisted over the @cond store such that we'll
+ * observe an empty wait list while the waiter might not observe @cond.
+ *
+ * Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
+ * which (when the lock is uncontended) are of roughly equal cost.
+ */
static inline int waitqueue_active(wait_queue_head_t *q)
{
return !list_empty(&q->task_list);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0197358f1e81..0e32bc71245e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -618,4 +618,10 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
{ return 0; }
#endif /* CONFIG_SYSFS */
+#ifdef CONFIG_WQ_WATCHDOG
+void wq_watchdog_touch(int cpu);
+#else /* CONFIG_WQ_WATCHDOG */
+static inline void wq_watchdog_touch(int cpu) { }
+#endif /* CONFIG_WQ_WATCHDOG */
+
#endif
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 89474b9d260c..4457541de3c9 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -19,12 +19,16 @@
struct inode;
struct dentry;
+/*
+ * struct xattr_handler: When @name is set, match attributes with exactly that
+ * name. When @prefix is set instead, match attributes with that prefix and
+ * with a non-empty suffix.
+ */
struct xattr_handler {
+ const char *name;
const char *prefix;
int flags; /* fs private flags */
- size_t (*list)(const struct xattr_handler *, struct dentry *dentry,
- char *list, size_t list_size, const char *name,
- size_t name_len);
+ bool (*list)(struct dentry *dentry);
int (*get)(const struct xattr_handler *, struct dentry *dentry,
const char *name, void *buffer, size_t size);
int (*set)(const struct xattr_handler *, struct dentry *dentry,
@@ -53,8 +57,11 @@ int generic_setxattr(struct dentry *dentry, const char *name, const void *value,
int generic_removexattr(struct dentry *dentry, const char *name);
ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name,
char **xattr_value, size_t size, gfp_t flags);
-int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
- const char *value, size_t size, gfp_t flags);
+
+static inline const char *xattr_prefix(const struct xattr_handler *handler)
+{
+ return handler->prefix ?: handler->name;
+}
struct simple_xattrs {
struct list_head head;
@@ -95,8 +102,7 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
const void *value, size_t size, int flags);
-int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name);
-ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
+ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer,
size_t size);
void simple_xattr_list_add(struct simple_xattrs *xattrs,
struct simple_xattr *new_xattr);
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 774d85b2d5d9..5689a0c749f7 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -29,7 +29,7 @@ struct l3mdev_ops {
/* IPv4 ops */
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
const struct flowi4 *fl4);
- void (*l3mdev_get_saddr)(struct net_device *dev,
+ int (*l3mdev_get_saddr)(struct net_device *dev,
struct flowi4 *fl4);
/* IPv6 ops */
@@ -112,10 +112,11 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return rc;
}
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
- struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
{
struct net_device *dev;
+ int rc = 0;
if (ifindex) {
@@ -124,11 +125,13 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex,
dev = dev_get_by_index_rcu(net, ifindex);
if (dev && netif_is_l3_master(dev) &&
dev->l3mdev_ops->l3mdev_get_saddr) {
- dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
+ rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
}
rcu_read_unlock();
}
+
+ return rc;
}
static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev,
@@ -200,9 +203,10 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return false;
}
-static inline void l3mdev_get_saddr(struct net *net, int ifindex,
- struct flowi4 *fl4)
+static inline int l3mdev_get_saddr(struct net *net, int ifindex,
+ struct flowi4 *fl4)
{
+ return 0;
}
static inline
diff --git a/include/net/route.h b/include/net/route.h
index ee81307863d5..a3b9ef74a389 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -283,7 +283,12 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
sport, dport, sk);
if (!src && oif) {
- l3mdev_get_saddr(net, oif, fl4);
+ int rc;
+
+ rc = l3mdev_get_saddr(net, oif, fl4);
+ if (rc < 0)
+ return ERR_PTR(rc);
+
src = fl4->saddr;
}
if (!dst || !src) {
diff --git a/include/sound/soc.h b/include/sound/soc.h
index a8b4b9c8b1d2..fb955e69a78e 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1655,7 +1655,7 @@ extern const struct dev_pm_ops snd_soc_pm_ops;
/* Helper functions */
static inline void snd_soc_dapm_mutex_lock(struct snd_soc_dapm_context *dapm)
{
- mutex_lock(&dapm->card->dapm_mutex);
+ mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
}
static inline void snd_soc_dapm_mutex_unlock(struct snd_soc_dapm_context *dapm)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d801bb0d9f6d..1afe9623c1a7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -171,6 +171,9 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
+ PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
+ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -192,6 +195,9 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
diff --git a/init/main.c b/init/main.c
index 9e64d7097f1a..c6ebefafa496 100644
--- a/init/main.c
+++ b/init/main.c
@@ -943,6 +943,8 @@ static int __ref kernel_init(void *unused)
flush_delayed_fput();
+ rcu_end_inkernel_boot();
+
if (ramdisk_execute_command) {
ret = run_init_process(ramdisk_execute_command);
if (!ret)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index d8560ee3bab7..9ad37b9e44a7 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -24,7 +24,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>
-struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(context_tracking_enabled);
EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_PER_CPU(struct context_tracking, context_tracking);
@@ -191,7 +191,7 @@ void __init context_tracking_cpu_set(int cpu)
if (!per_cpu(context_tracking.active, cpu)) {
per_cpu(context_tracking.active, cpu) = true;
- static_key_slow_inc(&context_tracking_enabled);
+ static_branch_inc(&context_tracking_enabled);
}
if (initialized)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ef2d6ea10736..bf8244190d0f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -126,6 +126,37 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
return data.ret;
}
+static void event_function_call(struct perf_event *event,
+ int (*active)(void *),
+ void (*inactive)(void *),
+ void *data)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct task_struct *task = ctx->task;
+
+ if (!task) {
+ cpu_function_call(event->cpu, active, data);
+ return;
+ }
+
+again:
+ if (!task_function_call(task, active, data))
+ return;
+
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->is_active) {
+ /*
+ * Reload the task pointer, it might have been changed by
+ * a concurrent perf_event_context_sched_out().
+ */
+ task = ctx->task;
+ raw_spin_unlock_irq(&ctx->lock);
+ goto again;
+ }
+ inactive(data);
+ raw_spin_unlock_irq(&ctx->lock);
+}
+
#define EVENT_OWNER_KERNEL ((void *) -1)
static bool is_kernel_event(struct perf_event *event)
@@ -1629,6 +1660,17 @@ struct remove_event {
bool detach_group;
};
+static void ___perf_remove_from_context(void *info)
+{
+ struct remove_event *re = info;
+ struct perf_event *event = re->event;
+ struct perf_event_context *ctx = event->ctx;
+
+ if (re->detach_group)
+ perf_group_detach(event);
+ list_del_event(event, ctx);
+}
+
/*
* Cross CPU call to remove a performance event
*
@@ -1656,7 +1698,6 @@ static int __perf_remove_from_context(void *info)
return 0;
}
-
/*
* Remove the event from a task's (or a CPU's) list of events.
*
@@ -1673,7 +1714,6 @@ static int __perf_remove_from_context(void *info)
static void perf_remove_from_context(struct perf_event *event, bool detach_group)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
struct remove_event re = {
.event = event,
.detach_group = detach_group,
@@ -1681,44 +1721,8 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group
lockdep_assert_held(&ctx->mutex);
- if (!task) {
- /*
- * Per cpu events are removed via an smp call. The removal can
- * fail if the CPU is currently offline, but in that case we
- * already called __perf_remove_from_context from
- * perf_event_exit_cpu.
- */
- cpu_function_call(event->cpu, __perf_remove_from_context, &re);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_remove_from_context, &re))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
- /*
- * If we failed to find a running task, but find the context active now
- * that we've acquired the ctx->lock, retry.
- */
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since the task isn't running, its safe to remove the event, us
- * holding the ctx->lock ensures the task won't get scheduled in.
- */
- if (detach_group)
- perf_group_detach(event);
- list_del_event(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_remove_from_context,
+ ___perf_remove_from_context, &re);
}
/*
@@ -1762,6 +1766,20 @@ int __perf_event_disable(void *info)
return 0;
}
+void ___perf_event_disable(void *info)
+{
+ struct perf_event *event = info;
+
+ /*
+ * Since we have the lock this context can't be scheduled
+ * in, so we can change the state safely.
+ */
+ if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ update_group_times(event);
+ event->state = PERF_EVENT_STATE_OFF;
+ }
+}
+
/*
* Disable a event.
*
@@ -1778,43 +1796,16 @@ int __perf_event_disable(void *info)
static void _perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
-
- if (!task) {
- /*
- * Disable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_disable, event);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_event_disable, event))
- return;
raw_spin_lock_irq(&ctx->lock);
- /*
- * If the event is still active, we need to retry the cross-call.
- */
- if (event->state == PERF_EVENT_STATE_ACTIVE) {
+ if (event->state <= PERF_EVENT_STATE_OFF) {
raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE) {
- update_group_times(event);
- event->state = PERF_EVENT_STATE_OFF;
+ return;
}
raw_spin_unlock_irq(&ctx->lock);
+
+ event_function_call(event, __perf_event_disable,
+ ___perf_event_disable, event);
}
/*
@@ -2067,6 +2058,18 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
}
+static void ___perf_install_in_context(void *info)
+{
+ struct perf_event *event = info;
+ struct perf_event_context *ctx = event->ctx;
+
+ /*
+ * Since the task isn't running, its safe to add the event, us holding
+ * the ctx->lock ensures the task won't get scheduled in.
+ */
+ add_event_to_ctx(event, ctx);
+}
+
/*
* Cross CPU call to install and enable a performance event
*
@@ -2143,48 +2146,14 @@ perf_install_in_context(struct perf_event_context *ctx,
struct perf_event *event,
int cpu)
{
- struct task_struct *task = ctx->task;
-
lockdep_assert_held(&ctx->mutex);
event->ctx = ctx;
if (event->cpu != -1)
event->cpu = cpu;
- if (!task) {
- /*
- * Per cpu events are installed via an smp call and
- * the install is always successful.
- */
- cpu_function_call(cpu, __perf_install_in_context, event);
- return;
- }
-
-retry:
- if (!task_function_call(task, __perf_install_in_context, event))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
- /*
- * If we failed to find a running task, but find the context active now
- * that we've acquired the ctx->lock, retry.
- */
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- /*
- * Reload the task pointer, it might have been changed by
- * a concurrent perf_event_context_sched_out().
- */
- task = ctx->task;
- goto retry;
- }
-
- /*
- * Since the task isn't running, its safe to add the event, us holding
- * the ctx->lock ensures the task won't get scheduled in.
- */
- add_event_to_ctx(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_install_in_context,
+ ___perf_install_in_context, event);
}
/*
@@ -2287,6 +2256,11 @@ unlock:
return 0;
}
+void ___perf_event_enable(void *info)
+{
+ __perf_event_mark_enabled((struct perf_event *)info);
+}
+
/*
* Enable a event.
*
@@ -2299,58 +2273,26 @@ unlock:
static void _perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
- struct task_struct *task = ctx->task;
- if (!task) {
- /*
- * Enable the event on the cpu that it's on
- */
- cpu_function_call(event->cpu, __perf_event_enable, event);
+ raw_spin_lock_irq(&ctx->lock);
+ if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+ raw_spin_unlock_irq(&ctx->lock);
return;
}
- raw_spin_lock_irq(&ctx->lock);
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- goto out;
-
/*
* If the event is in error state, clear that first.
- * That way, if we see the event in error state below, we
- * know that it has gone back into error state, as distinct
- * from the task having been scheduled away before the
- * cross-call arrived.
+ *
+ * That way, if we see the event in error state below, we know that it
+ * has gone back into error state, as distinct from the task having
+ * been scheduled away before the cross-call arrived.
*/
if (event->state == PERF_EVENT_STATE_ERROR)
event->state = PERF_EVENT_STATE_OFF;
-
-retry:
- if (!ctx->is_active) {
- __perf_event_mark_enabled(event);
- goto out;
- }
-
raw_spin_unlock_irq(&ctx->lock);
- if (!task_function_call(task, __perf_event_enable, event))
- return;
-
- raw_spin_lock_irq(&ctx->lock);
-
- /*
- * If the context is active and the event is still off,
- * we need to retry the cross-call.
- */
- if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
- /*
- * task could have been flipped by a concurrent
- * perf_event_context_sched_out()
- */
- task = ctx->task;
- goto retry;
- }
-
-out:
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_event_enable,
+ ___perf_event_enable, event);
}
/*
@@ -3154,15 +3096,16 @@ static int event_enable_on_exec(struct perf_event *event,
* Enable all of a task's events that have been marked enable-on-exec.
* This expects task == current.
*/
-static void perf_event_enable_on_exec(struct perf_event_context *ctx)
+static void perf_event_enable_on_exec(int ctxn)
{
- struct perf_event_context *clone_ctx = NULL;
+ struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_event *event;
unsigned long flags;
int enabled = 0;
int ret;
local_irq_save(flags);
+ ctx = current->perf_event_ctxp[ctxn];
if (!ctx || !ctx->nr_events)
goto out;
@@ -3205,17 +3148,11 @@ out:
void perf_event_exec(void)
{
- struct perf_event_context *ctx;
int ctxn;
rcu_read_lock();
- for_each_task_context_nr(ctxn) {
- ctx = current->perf_event_ctxp[ctxn];
- if (!ctx)
- continue;
-
- perf_event_enable_on_exec(ctx);
- }
+ for_each_task_context_nr(ctxn)
+ perf_event_enable_on_exec(ctxn);
rcu_read_unlock();
}
@@ -4154,6 +4091,22 @@ struct period_event {
u64 value;
};
+static void ___perf_event_period(void *info)
+{
+ struct period_event *pe = info;
+ struct perf_event *event = pe->event;
+ u64 value = pe->value;
+
+ if (event->attr.freq) {
+ event->attr.sample_freq = value;
+ } else {
+ event->attr.sample_period = value;
+ event->hw.sample_period = value;
+ }
+
+ local64_set(&event->hw.period_left, 0);
+}
+
static int __perf_event_period(void *info)
{
struct period_event *pe = info;
@@ -4190,8 +4143,6 @@ static int __perf_event_period(void *info)
static int perf_event_period(struct perf_event *event, u64 __user *arg)
{
struct period_event pe = { .event = event, };
- struct perf_event_context *ctx = event->ctx;
- struct task_struct *task;
u64 value;
if (!is_sampling_event(event))
@@ -4206,34 +4157,10 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
return -EINVAL;
- task = ctx->task;
pe.value = value;
- if (!task) {
- cpu_function_call(event->cpu, __perf_event_period, &pe);
- return 0;
- }
-
-retry:
- if (!task_function_call(task, __perf_event_period, &pe))
- return 0;
-
- raw_spin_lock_irq(&ctx->lock);
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- task = ctx->task;
- goto retry;
- }
-
- if (event->attr.freq) {
- event->attr.sample_freq = value;
- } else {
- event->attr.sample_period = value;
- event->hw.sample_period = value;
- }
-
- local64_set(&event->hw.period_left, 0);
- raw_spin_unlock_irq(&ctx->lock);
+ event_function_call(event, __perf_event_period,
+ ___perf_event_period, &pe);
return 0;
}
@@ -6493,9 +6420,6 @@ struct swevent_htable {
/* Recursion avoidance in each contexts */
int recursion[PERF_NR_CONTEXTS];
-
- /* Keeps track of cpu being initialized/exited */
- bool online;
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -6753,14 +6677,8 @@ static int perf_swevent_add(struct perf_event *event, int flags)
hwc->state = !(flags & PERF_EF_START);
head = find_swevent_head(swhash, event);
- if (!head) {
- /*
- * We can race with cpu hotplug code. Do not
- * WARN if the cpu just got unplugged.
- */
- WARN_ON_ONCE(swhash->online);
+ if (WARN_ON_ONCE(!head))
return -EINVAL;
- }
hlist_add_head_rcu(&event->hlist_entry, head);
perf_event_update_userpage(event);
@@ -6828,7 +6746,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
-
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
struct swevent_hlist *hlist;
@@ -9291,7 +9208,6 @@ static void perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- swhash->online = true;
if (swhash->hlist_refcount > 0) {
struct swevent_hlist *hlist;
@@ -9333,14 +9249,7 @@ static void perf_event_exit_cpu_context(int cpu)
static void perf_event_exit_cpu(int cpu)
{
- struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-
perf_event_exit_cpu_context(cpu);
-
- mutex_lock(&swhash->hlist_mutex);
- swhash->online = false;
- swevent_hlist_release(swhash);
- mutex_unlock(&swhash->hlist_mutex);
}
#else
static inline void perf_event_exit_cpu(int cpu) { }
diff --git a/kernel/fork.c b/kernel/fork.c
index fce002ee3ddf..291b08cc817b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -380,6 +380,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
#endif
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
+ tsk->wake_q.next = NULL;
account_kernel_stack(ti, 1);
@@ -1348,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
prev_cputime_init(&p->prev_cputime);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- seqlock_init(&p->vtime_seqlock);
+ seqcount_init(&p->vtime_seqcount);
p->vtime_snap = 0;
- p->vtime_snap_whence = VTIME_SLEEPING;
+ p->vtime_snap_whence = VTIME_INACTIVE;
#endif
#if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/futex.c b/kernel/futex.c
index 684d7549825a..8a310e240cda 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void)
}
/*
+ * Drops a reference to the pi_state object and frees or caches it
+ * when the last reference is gone.
+ *
* Must be called with the hb lock held.
*/
-static void free_pi_state(struct futex_pi_state *pi_state)
+static void put_pi_state(struct futex_pi_state *pi_state)
{
if (!pi_state)
return;
@@ -1706,31 +1709,35 @@ retry_private:
* exist yet, look it up one more time to ensure we have a
* reference to it. If the lock was taken, ret contains the
* vpid of the top waiter task.
+ * If the lock was not taken, we have pi_state and an initial
+ * refcount on it. In case of an error we have nothing.
*/
if (ret > 0) {
WARN_ON(pi_state);
drop_count++;
task_count++;
/*
- * If we acquired the lock, then the user
- * space value of uaddr2 should be vpid. It
- * cannot be changed by the top waiter as it
- * is blocked on hb2 lock if it tries to do
- * so. If something fiddled with it behind our
- * back the pi state lookup might unearth
- * it. So we rather use the known value than
- * rereading and handing potential crap to
- * lookup_pi_state.
+ * If we acquired the lock, then the user space value
+ * of uaddr2 should be vpid. It cannot be changed by
+ * the top waiter as it is blocked on hb2 lock if it
+ * tries to do so. If something fiddled with it behind
+ * our back the pi state lookup might unearth it. So
+ * we rather use the known value than rereading and
+ * handing potential crap to lookup_pi_state.
+ *
+ * If that call succeeds then we have pi_state and an
+ * initial refcount on it.
*/
ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
}
switch (ret) {
case 0:
+ /* We hold a reference on the pi state. */
break;
+
+ /* If the above failed, then pi_state is NULL */
case -EFAULT:
- free_pi_state(pi_state);
- pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
@@ -1746,8 +1753,6 @@ retry_private:
* exit to complete.
* - The user space value changed.
*/
- free_pi_state(pi_state);
- pi_state = NULL;
double_unlock_hb(hb1, hb2);
hb_waiters_dec(hb2);
put_futex_key(&key2);
@@ -1801,30 +1806,58 @@ retry_private:
* of requeue_pi if we couldn't acquire the lock atomically.
*/
if (requeue_pi) {
- /* Prepare the waiter to take the rt_mutex. */
+ /*
+ * Prepare the waiter to take the rt_mutex. Take a
+ * refcount on the pi_state and store the pointer in
+ * the futex_q object of the waiter.
+ */
atomic_inc(&pi_state->refcount);
this->pi_state = pi_state;
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
this->rt_waiter,
this->task);
if (ret == 1) {
- /* We got the lock. */
+ /*
+ * We got the lock. We do neither drop the
+ * refcount on pi_state nor clear
+ * this->pi_state because the waiter needs the
+ * pi_state for cleaning up the user space
+ * value. It will drop the refcount after
+ * doing so.
+ */
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
} else if (ret) {
- /* -EDEADLK */
+ /*
+ * rt_mutex_start_proxy_lock() detected a
+ * potential deadlock when we tried to queue
+ * that waiter. Drop the pi_state reference
+ * which we took above and remove the pointer
+ * to the state from the waiters futex_q
+ * object.
+ */
this->pi_state = NULL;
- free_pi_state(pi_state);
- goto out_unlock;
+ put_pi_state(pi_state);
+ /*
+ * We stop queueing more waiters and let user
+ * space deal with the mess.
+ */
+ break;
}
}
requeue_futex(this, hb1, hb2, &key2);
drop_count++;
}
+ /*
+ * We took an extra initial reference to the pi_state either
+ * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
+ * need to drop it here again.
+ */
+ put_pi_state(pi_state);
+
out_unlock:
- free_pi_state(pi_state);
double_unlock_hb(hb1, hb2);
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
@@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q)
__unqueue_futex(q);
BUG_ON(!q->pi_state);
- free_pi_state(q->pi_state);
+ put_pi_state(q->pi_state);
q->pi_state = NULL;
spin_unlock(q->lock_ptr);
@@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
+ /*
+ * Drop the reference to the pi state which
+ * the requeue_pi() code acquired for us.
+ */
+ put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
}
} else {
@@ -3046,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
if (op & FUTEX_CLOCK_REALTIME) {
flags |= FLAGS_CLOCKRT;
- if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+ if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
+ cmd != FUTEX_WAIT_REQUEUE_PI)
return -ENOSYS;
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 15206453b12a..5797909f4e5b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -338,7 +338,6 @@ void handle_nested_irq(unsigned int irq)
raw_spin_lock_irq(&desc->lock);
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
@@ -346,6 +345,7 @@ void handle_nested_irq(unsigned int irq)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock_irq(&desc->lock);
@@ -412,13 +412,13 @@ void handle_simple_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
out_unlock:
@@ -462,7 +462,6 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -473,6 +472,7 @@ void handle_level_irq(struct irq_desc *desc)
goto out_unlock;
}
+ kstat_incr_irqs_this_cpu(desc);
handle_irq_event(desc);
cond_unmask_irq(desc);
@@ -532,7 +532,6 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
- kstat_incr_irqs_this_cpu(desc);
/*
* If its disabled or no action available
@@ -544,6 +543,7 @@ void handle_fasteoi_irq(struct irq_desc *desc)
goto out;
}
+ kstat_incr_irqs_this_cpu(desc);
if (desc->istate & IRQS_ONESHOT)
mask_irq(desc);
@@ -950,6 +950,7 @@ void irq_chip_ack_parent(struct irq_data *data)
data = data->parent_data;
data->chip->irq_ack(data);
}
+EXPORT_SYMBOL_GPL(irq_chip_ack_parent);
/**
* irq_chip_mask_parent - Mask the parent interrupt
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 239e2ae2c947..0409da0bcc33 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -159,6 +159,7 @@ static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ init_rcu_head(&desc->rcu);
desc_set_defaults(irq, desc, node, owner);
@@ -171,6 +172,15 @@ err_desc:
return NULL;
}
+static void delayed_free_desc(struct rcu_head *rhp)
+{
+ struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+
+ free_masks(desc);
+ free_percpu(desc->kstat_irqs);
+ kfree(desc);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -187,9 +197,12 @@ static void free_desc(unsigned int irq)
delete_irq_desc(irq);
mutex_unlock(&sparse_irq_lock);
- free_masks(desc);
- free_percpu(desc->kstat_irqs);
- kfree(desc);
+ /*
+ * We free the descriptor, masks and stat fields via RCU. That
+ * allows demultiplex interrupts to do rcu based management of
+ * the child interrupts.
+ */
+ call_rcu(&desc->rcu, delayed_free_desc);
}
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 22aa9612ef7c..8cf95de1ab3f 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -60,6 +60,7 @@ struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
fwid->fwnode.type = FWNODE_IRQCHIP;
return &fwid->fwnode;
}
+EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode);
/**
* irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
@@ -70,13 +71,14 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
{
struct irqchip_fwid *fwid;
- if (WARN_ON(fwnode->type != FWNODE_IRQCHIP))
+ if (WARN_ON(!is_fwnode_irqchip(fwnode)))
return;
fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
kfree(fwid->name);
kfree(fwid);
}
+EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
/**
* __irq_domain_add() - Allocate a new irq_domain data structure
@@ -1013,6 +1015,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
return NULL;
}
+EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
/**
* irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
@@ -1125,9 +1128,9 @@ static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
}
}
-static int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
- unsigned int irq_base,
- unsigned int nr_irqs, void *arg)
+int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+ unsigned int irq_base,
+ unsigned int nr_irqs, void *arg)
{
int ret = 0;
struct irq_domain *parent = domain->parent;
@@ -1343,6 +1346,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
return (irq_data && irq_data->domain == domain) ? irq_data : NULL;
}
+EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
/**
* irq_domain_set_info - Set the complete data for a @virq in @domain
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0eebaeef317b..841187239adc 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1434,6 +1434,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!desc)
return NULL;
+ chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
/*
@@ -1447,7 +1448,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
if (!action) {
WARN(1, "Trying to free already-free IRQ %d\n", irq);
raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ chip_bus_sync_unlock(desc);
return NULL;
}
@@ -1475,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
#endif
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ chip_bus_sync_unlock(desc);
unregister_handler_proc(irq, action);
@@ -1553,9 +1555,7 @@ void free_irq(unsigned int irq, void *dev_id)
desc->affinity_notify = NULL;
#endif
- chip_bus_lock(desc);
kfree(__free_irq(irq, dev_id));
- chip_bus_sync_unlock(desc);
}
EXPORT_SYMBOL(free_irq);
@@ -1743,6 +1743,31 @@ out:
}
EXPORT_SYMBOL_GPL(enable_percpu_irq);
+/**
+ * irq_percpu_is_enabled - Check whether the per cpu irq is enabled
+ * @irq: Linux irq number to check for
+ *
+ * Must be called from a non migratable context. Returns the enable
+ * state of a per cpu interrupt on the current cpu.
+ */
+bool irq_percpu_is_enabled(unsigned int irq)
+{
+ unsigned int cpu = smp_processor_id();
+ struct irq_desc *desc;
+ unsigned long flags;
+ bool is_enabled;
+
+ desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU);
+ if (!desc)
+ return false;
+
+ is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+ irq_put_desc_unlock(desc, flags);
+
+ return is_enabled;
+}
+EXPORT_SYMBOL_GPL(irq_percpu_is_enabled);
+
void disable_percpu_irq(unsigned int irq)
{
unsigned int cpu = smp_processor_id();
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 6b0c0b74a2a1..15b249e7c673 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -252,6 +252,60 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
&msi_domain_ops, info);
}
+int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+ struct msi_domain_ops *ops = info->ops;
+ int ret;
+
+ ret = ops->msi_check(domain, info, dev);
+ if (ret == 0)
+ ret = ops->msi_prepare(domain, dev, nvec, arg);
+
+ return ret;
+}
+
+int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
+ int virq, int nvec, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+ struct msi_domain_ops *ops = info->ops;
+ struct msi_desc *desc;
+ int ret = 0;
+
+ for_each_msi_entry(desc, dev) {
+ /* Don't even try the multi-MSI brain damage. */
+ if (WARN_ON(!desc->irq || desc->nvec_used != 1)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
+
+ ops->set_desc(arg, desc);
+ /* Assumes the domain mutex is held! */
+ ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg);
+ if (ret)
+ break;
+
+ irq_set_msi_desc_off(virq, 0, desc);
+ }
+
+ if (ret) {
+ /* Mop up the damage */
+ for_each_msi_entry(desc, dev) {
+ if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
+ continue;
+
+ irq_domain_free_irqs_common(domain, desc->irq, 1);
+ }
+ }
+
+ return ret;
+}
+
/**
* msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
* @domain: The domain to allocate from
@@ -270,9 +324,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
struct msi_desc *desc;
int i, ret, virq = -1;
- ret = ops->msi_check(domain, info, dev);
- if (ret == 0)
- ret = ops->msi_prepare(domain, dev, nvec, &arg);
+ ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
if (ret)
return ret;
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 11b64a63c0f8..c823f3001e12 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -853,7 +853,12 @@ struct kimage *kexec_image;
struct kimage *kexec_crash_image;
int kexec_load_disabled;
-void crash_kexec(struct pt_regs *regs)
+/*
+ * No panic_cpu check version of crash_kexec(). This function is called
+ * only when panic_cpu holds the current CPU number; this is the only CPU
+ * which processes crash_kexec routines.
+ */
+void __crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
@@ -876,6 +881,29 @@ void crash_kexec(struct pt_regs *regs)
}
}
+void crash_kexec(struct pt_regs *regs)
+{
+ int old_cpu, this_cpu;
+
+ /*
+ * Only one CPU is allowed to execute the crash_kexec() code as with
+ * panic(). Otherwise parallel calls of panic() and crash_kexec()
+ * may stop each other. To exclude them, we use panic_cpu here too.
+ */
+ this_cpu = raw_smp_processor_id();
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
+ if (old_cpu == PANIC_CPU_INVALID) {
+ /* This is the 1st CPU which comes here, so go ahead. */
+ __crash_kexec(regs);
+
+ /*
+ * Reset panic_cpu to allow another panic()/crash_kexec()
+ * call.
+ */
+ atomic_set(&panic_cpu, PANIC_CPU_INVALID);
+ }
+}
+
size_t crash_get_memory_size(void)
{
size_t size = 0;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e83b26464061..152da4a48867 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -20,7 +20,7 @@
#include <linux/capability.h>
#include <linux/compiler.h>
-#include <linux/rcupdate.h> /* rcu_expedited */
+#include <linux/rcupdate.h> /* rcu_expedited and rcu_normal */
#define KERNEL_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
@@ -144,11 +144,12 @@ static ssize_t fscaps_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(fscaps);
+#ifndef CONFIG_TINY_RCU
int rcu_expedited;
static ssize_t rcu_expedited_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", rcu_expedited);
+ return sprintf(buf, "%d\n", READ_ONCE(rcu_expedited));
}
static ssize_t rcu_expedited_store(struct kobject *kobj,
struct kobj_attribute *attr,
@@ -161,6 +162,24 @@ static ssize_t rcu_expedited_store(struct kobject *kobj,
}
KERNEL_ATTR_RW(rcu_expedited);
+int rcu_normal;
+static ssize_t rcu_normal_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", READ_ONCE(rcu_normal));
+}
+static ssize_t rcu_normal_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (kstrtoint(buf, 0, &rcu_normal))
+ return -EINVAL;
+
+ return count;
+}
+KERNEL_ATTR_RW(rcu_normal);
+#endif /* #ifndef CONFIG_TINY_RCU */
+
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
@@ -202,7 +221,10 @@ static struct attribute * kernel_attrs[] = {
&kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
+#ifndef CONFIG_TINY_RCU
&rcu_expedited_attr.attr,
+ &rcu_normal_attr.attr,
+#endif
NULL
};
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 87e9ce6a63c5..393d1874b9e0 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -14,8 +14,9 @@
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
* (C) Copyright 2013-2014 Red Hat, Inc.
* (C) Copyright 2015 Intel Corp.
+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
*
- * Authors: Waiman Long <waiman.long@hp.com>
+ * Authors: Waiman Long <waiman.long@hpe.com>
* Peter Zijlstra <peterz@infradead.org>
*/
@@ -176,7 +177,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
{
struct __qspinlock *l = (void *)lock;
- return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+ /*
+ * Use release semantics to make sure that the MCS node is properly
+ * initialized before changing the tail code.
+ */
+ return (u32)xchg_release(&l->tail,
+ tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
}
#else /* _Q_PENDING_BITS == 8 */
@@ -208,7 +214,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
for (;;) {
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
- old = atomic_cmpxchg(&lock->val, val, new);
+ /*
+ * Use release semantics to make sure that the MCS node is
+ * properly initialized before changing the tail code.
+ */
+ old = atomic_cmpxchg_release(&lock->val, val, new);
if (old == val)
break;
@@ -238,18 +248,20 @@ static __always_inline void set_locked(struct qspinlock *lock)
*/
static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
+static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
+ struct mcs_spinlock *prev) { }
static __always_inline void __pv_kick_node(struct qspinlock *lock,
struct mcs_spinlock *node) { }
-static __always_inline void __pv_wait_head(struct qspinlock *lock,
- struct mcs_spinlock *node) { }
+static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
+ struct mcs_spinlock *node)
+ { return 0; }
#define pv_enabled() false
#define pv_init_node __pv_init_node
#define pv_wait_node __pv_wait_node
#define pv_kick_node __pv_kick_node
-#define pv_wait_head __pv_wait_head
+#define pv_wait_head_or_lock __pv_wait_head_or_lock
#ifdef CONFIG_PARAVIRT_SPINLOCKS
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
@@ -319,7 +331,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
if (val == new)
new |= _Q_PENDING_VAL;
- old = atomic_cmpxchg(&lock->val, val, new);
+ /*
+ * Acquire semantic is required here as the function may
+ * return immediately if the lock was free.
+ */
+ old = atomic_cmpxchg_acquire(&lock->val, val, new);
if (old == val)
break;
@@ -382,6 +398,7 @@ queue:
* p,*,* -> n,*,*
*/
old = xchg_tail(lock, tail);
+ next = NULL;
/*
* if there was a previous node; link it and wait until reaching the
@@ -391,8 +408,18 @@ queue:
prev = decode_tail(old);
WRITE_ONCE(prev->next, node);
- pv_wait_node(node);
+ pv_wait_node(node, prev);
arch_mcs_spin_lock_contended(&node->locked);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We optimistically load
+ * the next pointer & prefetch the cacheline for writing
+ * to reduce latency in the upcoming MCS unlock operation.
+ */
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
}
/*
@@ -406,11 +433,22 @@ queue:
* sequentiality; this is because the set_locked() function below
* does not imply a full barrier.
*
+ * The PV pv_wait_head_or_lock function, if active, will acquire
+ * the lock and return a non-zero value. So we have to skip the
+ * smp_load_acquire() call. As the next PV queue head hasn't been
+ * designated yet, there is no way for the locked value to become
+ * _Q_SLOW_VAL. So both the set_locked() and the
+ * atomic_cmpxchg_relaxed() calls will be safe.
+ *
+ * If PV isn't active, 0 will be returned instead.
+ *
*/
- pv_wait_head(lock, node);
- while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
- cpu_relax();
+ if ((val = pv_wait_head_or_lock(lock, node)))
+ goto locked;
+ smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
+
+locked:
/*
* claim the lock:
*
@@ -422,11 +460,17 @@ queue:
* to grab the lock.
*/
for (;;) {
- if (val != tail) {
+ /* In the PV case we might already have _Q_LOCKED_VAL set */
+ if ((val & _Q_TAIL_MASK) != tail) {
set_locked(lock);
break;
}
- old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
+ /*
+ * The smp_load_acquire() call above has provided the necessary
+ * acquire semantics required for locking. At most two
+ * iterations of this loop may be ran.
+ */
+ old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
if (old == val)
goto release; /* No contention */
@@ -434,10 +478,12 @@ queue:
}
/*
- * contended path; wait for next, release.
+ * contended path; wait for next if not observed yet, release.
*/
- while (!(next = READ_ONCE(node->next)))
- cpu_relax();
+ if (!next) {
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ }
arch_mcs_spin_unlock_contended(&next->locked);
pv_kick_node(lock, next);
@@ -462,7 +508,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
#undef pv_init_node
#undef pv_wait_node
#undef pv_kick_node
-#undef pv_wait_head
+#undef pv_wait_head_or_lock
#undef queued_spin_lock_slowpath
#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index f0450ff4829b..87bb235c3448 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -23,6 +23,20 @@
#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
/*
+ * Queue Node Adaptive Spinning
+ *
+ * A queue node vCPU will stop spinning if the vCPU in the previous node is
+ * not running. The one lock stealing attempt allowed at slowpath entry
+ * mitigates the slight slowdown for non-overcommitted guest with this
+ * aggressive wait-early mechanism.
+ *
+ * The status of the previous node will be checked at fixed interval
+ * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't
+ * pound on the cacheline of the previous node too heavily.
+ */
+#define PV_PREV_CHECK_MASK 0xff
+
+/*
* Queue node uses: vcpu_running & vcpu_halted.
* Queue head uses: vcpu_running & vcpu_hashed.
*/
@@ -41,6 +55,94 @@ struct pv_node {
};
/*
+ * By replacing the regular queued_spin_trylock() with the function below,
+ * it will be called once when a lock waiter enter the PV slowpath before
+ * being queued. By allowing one lock stealing attempt here when the pending
+ * bit is off, it helps to reduce the performance impact of lock waiter
+ * preemption without the drawback of lock starvation.
+ */
+#define queued_spin_trylock(l) pv_queued_spin_steal_lock(l)
+static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+ (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+}
+
+/*
+ * The pending bit is used by the queue head vCPU to indicate that it
+ * is actively spinning on the lock and no lock stealing is allowed.
+ */
+#if _Q_PENDING_BITS == 8
+static __always_inline void set_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->pending, 1);
+}
+
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ WRITE_ONCE(l->pending, 0);
+}
+
+/*
+ * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
+ * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock
+ * just to be sure that it will get it.
+ */
+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+
+ return !READ_ONCE(l->locked) &&
+ (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL)
+ == _Q_PENDING_VAL);
+}
+#else /* _Q_PENDING_BITS == 8 */
+static __always_inline void set_pending(struct qspinlock *lock)
+{
+ atomic_set_mask(_Q_PENDING_VAL, &lock->val);
+}
+
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ atomic_clear_mask(_Q_PENDING_VAL, &lock->val);
+}
+
+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ for (;;) {
+ int old, new;
+
+ if (val & _Q_LOCKED_MASK)
+ break;
+
+ /*
+ * Try to clear pending bit & set locked bit
+ */
+ old = val;
+ new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
+ val = atomic_cmpxchg(&lock->val, old, new);
+
+ if (val == old)
+ return 1;
+ }
+ return 0;
+}
+#endif /* _Q_PENDING_BITS == 8 */
+
+/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
* Lock and MCS node addresses hash table for fast lookup
*
* Hashing is done on a per-cacheline basis to minimize the need to access
@@ -100,10 +202,13 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
{
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
struct pv_hash_entry *he;
+ int hopcnt = 0;
for_each_hash_entry(he, offset, hash) {
+ hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
+ qstat_hop(hopcnt);
return &he->lock;
}
}
@@ -144,6 +249,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
}
/*
+ * Return true if when it is time to check the previous node which is not
+ * in a running state.
+ */
+static inline bool
+pv_wait_early(struct pv_node *prev, int loop)
+{
+
+ if ((loop & PV_PREV_CHECK_MASK) != 0)
+ return false;
+
+ return READ_ONCE(prev->state) != vcpu_running;
+}
+
+/*
* Initialize the PV part of the mcs_spinlock node.
*/
static void pv_init_node(struct mcs_spinlock *node)
@@ -161,15 +280,23 @@ static void pv_init_node(struct mcs_spinlock *node)
* pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
* behalf.
*/
-static void pv_wait_node(struct mcs_spinlock *node)
+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
{
struct pv_node *pn = (struct pv_node *)node;
+ struct pv_node *pp = (struct pv_node *)prev;
+ int waitcnt = 0;
int loop;
+ bool wait_early;
- for (;;) {
- for (loop = SPIN_THRESHOLD; loop; loop--) {
+ /* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
+ for (;; waitcnt++) {
+ for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
if (READ_ONCE(node->locked))
return;
+ if (pv_wait_early(pp, loop)) {
+ wait_early = true;
+ break;
+ }
cpu_relax();
}
@@ -184,12 +311,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
*/
smp_store_mb(pn->state, vcpu_halted);
- if (!READ_ONCE(node->locked))
+ if (!READ_ONCE(node->locked)) {
+ qstat_inc(qstat_pv_wait_node, true);
+ qstat_inc(qstat_pv_wait_again, waitcnt);
+ qstat_inc(qstat_pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
+ }
/*
- * If pv_kick_node() changed us to vcpu_hashed, retain that value
- * so that pv_wait_head() knows to not also try to hash this lock.
+ * If pv_kick_node() changed us to vcpu_hashed, retain that
+ * value so that pv_wait_head_or_lock() knows to not also try
+ * to hash this lock.
*/
cmpxchg(&pn->state, vcpu_halted, vcpu_running);
@@ -200,6 +332,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
+ qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
}
/*
@@ -212,8 +345,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
/*
* Called after setting next->locked = 1 when we're the lock owner.
*
- * Instead of waking the waiters stuck in pv_wait_node() advance their state such
- * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state
+ * such that they're waiting in pv_wait_head_or_lock(), this avoids a
+ * wake/sleep cycle.
*/
static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
{
@@ -242,14 +376,19 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
}
/*
- * Wait for l->locked to become clear; halt the vcpu after a short spin.
+ * Wait for l->locked to become clear and acquire the lock;
+ * halt the vcpu after a short spin.
* __pv_queued_spin_unlock() will wake us.
+ *
+ * The current value of the lock will be returned for additional processing.
*/
-static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
+static u32
+pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
{
struct pv_node *pn = (struct pv_node *)node;
struct __qspinlock *l = (void *)lock;
struct qspinlock **lp = NULL;
+ int waitcnt = 0;
int loop;
/*
@@ -259,12 +398,25 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
if (READ_ONCE(pn->state) == vcpu_hashed)
lp = (struct qspinlock **)1;
- for (;;) {
+ for (;; waitcnt++) {
+ /*
+ * Set correct vCPU state to be used by queue node wait-early
+ * mechanism.
+ */
+ WRITE_ONCE(pn->state, vcpu_running);
+
+ /*
+ * Set the pending bit in the active lock spinning loop to
+ * disable lock stealing before attempting to acquire the lock.
+ */
+ set_pending(lock);
for (loop = SPIN_THRESHOLD; loop; loop--) {
- if (!READ_ONCE(l->locked))
- return;
+ if (trylock_clear_pending(lock))
+ goto gotlock;
cpu_relax();
}
+ clear_pending(lock);
+
if (!lp) { /* ONCE */
lp = pv_hash(lock, pn);
@@ -280,51 +432,50 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
*
* Matches the smp_rmb() in __pv_queued_spin_unlock().
*/
- if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
+ if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
/*
- * The lock is free and _Q_SLOW_VAL has never
- * been set. Therefore we need to unhash before
- * getting the lock.
+ * The lock was free and now we own the lock.
+ * Change the lock value back to _Q_LOCKED_VAL
+ * and unhash the table.
*/
+ WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
WRITE_ONCE(*lp, NULL);
- return;
+ goto gotlock;
}
}
+ WRITE_ONCE(pn->state, vcpu_halted);
+ qstat_inc(qstat_pv_wait_head, true);
+ qstat_inc(qstat_pv_wait_again, waitcnt);
pv_wait(&l->locked, _Q_SLOW_VAL);
/*
* The unlocker should have freed the lock before kicking the
* CPU. So if the lock is still not free, it is a spurious
- * wakeup and so the vCPU should wait again after spinning for
- * a while.
+ * wakeup or another vCPU has stolen the lock. The current
+ * vCPU should spin again.
*/
+ qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked));
}
/*
- * Lock is unlocked now; the caller will acquire it without waiting.
- * As with pv_wait_node() we rely on the caller to do a load-acquire
- * for us.
+ * The cmpxchg() or xchg() call before coming here provides the
+ * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL
+ * here is to indicate to the compiler that the value will always
+ * be nozero to enable better code optimization.
*/
+gotlock:
+ return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
}
/*
- * PV version of the unlock function to be used in stead of
- * queued_spin_unlock().
+ * PV versions of the unlock fastpath and slowpath functions to be used
+ * instead of queued_spin_unlock().
*/
-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+__visible void
+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
{
struct __qspinlock *l = (void *)lock;
struct pv_node *node;
- u8 locked;
-
- /*
- * We must not unlock if SLOW, because in that case we must first
- * unhash. Otherwise it would be possible to have multiple @lock
- * entries, which would be BAD.
- */
- locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
- if (likely(locked == _Q_LOCKED_VAL))
- return;
if (unlikely(locked != _Q_SLOW_VAL)) {
WARN(!debug_locks_silent,
@@ -338,7 +489,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* so we need a barrier to order the read of the node data in
* pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
*
- * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL.
+ * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
*/
smp_rmb();
@@ -361,14 +512,35 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
+ qstat_inc(qstat_pv_kick_unlock, true);
pv_kick(node->cpu);
}
+
/*
* Include the architecture specific callee-save thunk of the
* __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() near the top of the file to make sure
- * that the callee-save thunk and the real unlock function are close
- * to each other sharing consecutive instruction cachelines.
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
*/
#include <asm/qspinlock_paravirt.h>
+#ifndef __pv_queued_spin_unlock
+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
+{
+ struct __qspinlock *l = (void *)lock;
+ u8 locked;
+
+ /*
+ * We must not unlock if SLOW, because in that case we must first
+ * unhash. Otherwise it would be possible to have multiple @lock
+ * entries, which would be BAD.
+ */
+ locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ if (likely(locked == _Q_LOCKED_VAL))
+ return;
+
+ __pv_queued_spin_unlock_slowpath(lock, locked);
+}
+#endif /* __pv_queued_spin_unlock */
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
new file mode 100644
index 000000000000..640dcecdd1df
--- /dev/null
+++ b/kernel/locking/qspinlock_stat.h
@@ -0,0 +1,300 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+
+/*
+ * When queued spinlock statistical counters are enabled, the following
+ * debugfs files will be created for reporting the counter values:
+ *
+ * <debugfs>/qlockstat/
+ * pv_hash_hops - average # of hops per hashing operation
+ * pv_kick_unlock - # of vCPU kicks issued at unlock time
+ * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
+ * pv_latency_kick - average latency (ns) of vCPU kick operation
+ * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
+ * pv_lock_stealing - # of lock stealing operations
+ * pv_spurious_wakeup - # of spurious wakeups
+ * pv_wait_again - # of vCPU wait's that happened after a vCPU kick
+ * pv_wait_early - # of early vCPU wait's
+ * pv_wait_head - # of vCPU wait's at the queue head
+ * pv_wait_node - # of vCPU wait's at a non-head queue node
+ *
+ * Writing to the "reset_counters" file will reset all the above counter
+ * values.
+ *
+ * These statistical counters are implemented as per-cpu variables which are
+ * summed and computed whenever the corresponding debugfs files are read. This
+ * minimizes added overhead making the counters usable even in a production
+ * environment.
+ *
+ * There may be slight difference between pv_kick_wake and pv_kick_unlock.
+ */
+enum qlock_stats {
+ qstat_pv_hash_hops,
+ qstat_pv_kick_unlock,
+ qstat_pv_kick_wake,
+ qstat_pv_latency_kick,
+ qstat_pv_latency_wake,
+ qstat_pv_lock_stealing,
+ qstat_pv_spurious_wakeup,
+ qstat_pv_wait_again,
+ qstat_pv_wait_early,
+ qstat_pv_wait_head,
+ qstat_pv_wait_node,
+ qstat_num, /* Total number of statistical counters */
+ qstat_reset_cnts = qstat_num,
+};
+
+#ifdef CONFIG_QUEUED_LOCK_STAT
+/*
+ * Collect pvqspinlock statistics
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+
+static const char * const qstat_names[qstat_num + 1] = {
+ [qstat_pv_hash_hops] = "pv_hash_hops",
+ [qstat_pv_kick_unlock] = "pv_kick_unlock",
+ [qstat_pv_kick_wake] = "pv_kick_wake",
+ [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
+ [qstat_pv_latency_kick] = "pv_latency_kick",
+ [qstat_pv_latency_wake] = "pv_latency_wake",
+ [qstat_pv_lock_stealing] = "pv_lock_stealing",
+ [qstat_pv_wait_again] = "pv_wait_again",
+ [qstat_pv_wait_early] = "pv_wait_early",
+ [qstat_pv_wait_head] = "pv_wait_head",
+ [qstat_pv_wait_node] = "pv_wait_node",
+ [qstat_reset_cnts] = "reset_counters",
+};
+
+/*
+ * Per-cpu counters
+ */
+static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
+static DEFINE_PER_CPU(u64, pv_kick_time);
+
+/*
+ * Function to read and return the qlock statistical counter values
+ *
+ * The following counters are handled specially:
+ * 1. qstat_pv_latency_kick
+ * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
+ * 2. qstat_pv_latency_wake
+ * Average wake latency (ns) = pv_latency_wake/pv_kick_wake
+ * 3. qstat_pv_hash_hops
+ * Average hops/hash = pv_hash_hops/pv_kick_unlock
+ */
+static ssize_t qstat_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[64];
+ int cpu, counter, len;
+ u64 stat = 0, kicks = 0;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ if (!file->f_inode) {
+ WARN_ON_ONCE(1);
+ return -EBADF;
+ }
+ counter = (long)(file->f_inode->i_private);
+
+ if (counter >= qstat_num)
+ return -EBADF;
+
+ for_each_possible_cpu(cpu) {
+ stat += per_cpu(qstats[counter], cpu);
+ /*
+ * Need to sum additional counter for some of them
+ */
+ switch (counter) {
+
+ case qstat_pv_latency_kick:
+ case qstat_pv_hash_hops:
+ kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
+ break;
+
+ case qstat_pv_latency_wake:
+ kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
+ break;
+ }
+ }
+
+ if (counter == qstat_pv_hash_hops) {
+ u64 frac;
+
+ frac = 100ULL * do_div(stat, kicks);
+ frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
+
+ /*
+ * Return a X.XX decimal number
+ */
+ len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
+ } else {
+ /*
+ * Round to the nearest ns
+ */
+ if ((counter == qstat_pv_latency_kick) ||
+ (counter == qstat_pv_latency_wake)) {
+ stat = 0;
+ if (kicks)
+ stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
+ }
+ len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
+ }
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+/*
+ * Function to handle write request
+ *
+ * When counter = reset_cnts, reset all the counter values.
+ * Since the counter updates aren't atomic, the resetting is done twice
+ * to make sure that the counters are very likely to be all cleared.
+ */
+static ssize_t qstat_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int cpu;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ if (!file->f_inode) {
+ WARN_ON_ONCE(1);
+ return -EBADF;
+ }
+ if ((long)(file->f_inode->i_private) != qstat_reset_cnts)
+ return count;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ unsigned long *ptr = per_cpu_ptr(qstats, cpu);
+
+ for (i = 0 ; i < qstat_num; i++)
+ WRITE_ONCE(ptr[i], 0);
+ for (i = 0 ; i < qstat_num; i++)
+ WRITE_ONCE(ptr[i], 0);
+ }
+ return count;
+}
+
+/*
+ * Debugfs data structures
+ */
+static const struct file_operations fops_qstat = {
+ .read = qstat_read,
+ .write = qstat_write,
+ .llseek = default_llseek,
+};
+
+/*
+ * Initialize debugfs for the qspinlock statistical counters
+ */
+static int __init init_qspinlock_stat(void)
+{
+ struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
+ int i;
+
+ if (!d_qstat) {
+ pr_warn("Could not create 'qlockstat' debugfs directory\n");
+ return 0;
+ }
+
+ /*
+ * Create the debugfs files
+ *
+ * As reading from and writing to the stat files can be slow, only
+ * root is allowed to do the read/write to limit impact to system
+ * performance.
+ */
+ for (i = 0; i < qstat_num; i++)
+ debugfs_create_file(qstat_names[i], 0400, d_qstat,
+ (void *)(long)i, &fops_qstat);
+
+ debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
+ (void *)(long)qstat_reset_cnts, &fops_qstat);
+ return 0;
+}
+fs_initcall(init_qspinlock_stat);
+
+/*
+ * Increment the PV qspinlock statistical counters
+ */
+static inline void qstat_inc(enum qlock_stats stat, bool cond)
+{
+ if (cond)
+ this_cpu_inc(qstats[stat]);
+}
+
+/*
+ * PV hash hop count
+ */
+static inline void qstat_hop(int hopcnt)
+{
+ this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
+}
+
+/*
+ * Replacement function for pv_kick()
+ */
+static inline void __pv_kick(int cpu)
+{
+ u64 start = sched_clock();
+
+ per_cpu(pv_kick_time, cpu) = start;
+ pv_kick(cpu);
+ this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
+}
+
+/*
+ * Replacement function for pv_wait()
+ */
+static inline void __pv_wait(u8 *ptr, u8 val)
+{
+ u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
+
+ *pkick_time = 0;
+ pv_wait(ptr, val);
+ if (*pkick_time) {
+ this_cpu_add(qstats[qstat_pv_latency_wake],
+ sched_clock() - *pkick_time);
+ qstat_inc(qstat_pv_kick_wake, true);
+ }
+}
+
+#define pv_kick(c) __pv_kick(c)
+#define pv_wait(p, v) __pv_wait(p, v)
+
+/*
+ * PV unfair trylock count tracking function
+ */
+static inline int qstat_spin_steal_lock(struct qspinlock *lock)
+{
+ int ret = pv_queued_spin_steal_lock(lock);
+
+ qstat_inc(qstat_pv_lock_stealing, ret);
+ return ret;
+}
+#undef queued_spin_trylock
+#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+
+static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
+static inline void qstat_hop(int hopcnt) { }
+
+#endif /* CONFIG_QUEUED_LOCK_STAT */
diff --git a/kernel/module.c b/kernel/module.c
index 8f051a106676..38c7bd5583ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3571,6 +3571,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
+ /*
+ * Ftrace needs to clean up what it initialized.
+ * This does nothing if ftrace_module_init() wasn't called,
+ * but it must be called outside of module_mutex.
+ */
+ ftrace_release_mod(mod);
/* Free lock-classes; relies on the preceding sync_rcu() */
lockdep_free_key_range(mod->module_core, mod->core_size);
diff --git a/kernel/panic.c b/kernel/panic.c
index 4b150bc0c6c1..b333380c6bb2 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -61,6 +61,17 @@ void __weak panic_smp_self_stop(void)
cpu_relax();
}
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+ panic_smp_self_stop();
+}
+
+atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+
/**
* panic - halt the system
* @fmt: The text string to print
@@ -71,17 +82,17 @@ void __weak panic_smp_self_stop(void)
*/
void panic(const char *fmt, ...)
{
- static DEFINE_SPINLOCK(panic_lock);
static char buf[1024];
va_list args;
long i, i_next = 0;
int state = 0;
+ int old_cpu, this_cpu;
/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
- * after the panic_lock is acquired) from invoking panic again.
+ * after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
@@ -94,8 +105,16 @@ void panic(const char *fmt, ...)
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
+ *
+ * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
+ * comes here, so go ahead.
+ * `old_cpu == this_cpu' means we came from nmi_panic() which sets
+ * panic_cpu to this CPU. In this case, this is also the 1st CPU.
*/
- if (!spin_trylock(&panic_lock))
+ this_cpu = raw_smp_processor_id();
+ old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
+
+ if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
panic_smp_self_stop();
console_verbose();
@@ -117,9 +136,11 @@ void panic(const char *fmt, ...)
* everything else.
* If we want to run this after calling panic_notifiers, pass
* the "crash_kexec_post_notifiers" option to the kernel.
+ *
+ * Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (!crash_kexec_post_notifiers)
- crash_kexec(NULL);
+ __crash_kexec(NULL);
/*
* Note smp_send_stop is the usual smp shutdown function, which
@@ -142,9 +163,11 @@ void panic(const char *fmt, ...)
* panic_notifiers and dumping kmsg before kdump.
* Note: since some panic_notifiers can make crashed kernel
* more unstable, it can increase risks of the kdump failure too.
+ *
+ * Bypass the panic_cpu check and call __crash_kexec directly.
*/
if (crash_kexec_post_notifiers)
- crash_kexec(NULL);
+ __crash_kexec(NULL);
bust_spinlocks(0);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d89328e260df..d2988d047d66 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -162,6 +162,27 @@ static int rcu_torture_writer_state;
#define RTWS_SYNC 7
#define RTWS_STUTTER 8
#define RTWS_STOPPING 9
+static const char * const rcu_torture_writer_state_names[] = {
+ "RTWS_FIXED_DELAY",
+ "RTWS_DELAY",
+ "RTWS_REPLACE",
+ "RTWS_DEF_FREE",
+ "RTWS_EXP_SYNC",
+ "RTWS_COND_GET",
+ "RTWS_COND_SYNC",
+ "RTWS_SYNC",
+ "RTWS_STUTTER",
+ "RTWS_STOPPING",
+};
+
+static const char *rcu_torture_writer_state_getname(void)
+{
+ unsigned int i = READ_ONCE(rcu_torture_writer_state);
+
+ if (i >= ARRAY_SIZE(rcu_torture_writer_state_names))
+ return "???";
+ return rcu_torture_writer_state_names[i];
+}
#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
#define RCUTORTURE_RUNNABLE_INIT 1
@@ -1307,7 +1328,8 @@ rcu_torture_stats_print(void)
rcutorture_get_gp_data(cur_ops->ttype,
&flags, &gpnum, &completed);
- pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
+ pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+ rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
gpnum, completed, flags);
show_rcu_gp_kthreads();
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index a63a1ea5a41b..9b9cdd549caa 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -489,7 +489,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
*/
void synchronize_srcu(struct srcu_struct *sp)
{
- __synchronize_srcu(sp, rcu_gp_is_expedited()
+ __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
: SYNCHRONIZE_SRCU_TRYCOUNT);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f07343b54fe5..e41dd4131f7a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -68,10 +68,6 @@ MODULE_ALIAS("rcutree");
/* Data structures. */
-static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
-
/*
* In order to export the rcu_state name to the tracing tools, it
* needs to be added in the __tracepoint_string section.
@@ -246,24 +242,17 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
*/
void rcu_sched_qs(void)
{
- unsigned long flags;
-
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) {
- trace_rcu_grace_period(TPS("rcu_sched"),
- __this_cpu_read(rcu_sched_data.gpnum),
- TPS("cpuqs"));
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
- if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
- return;
- local_irq_save(flags);
- if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) {
- __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
- rcu_report_exp_rdp(&rcu_sched_state,
- this_cpu_ptr(&rcu_sched_data),
- true);
- }
- local_irq_restore(flags);
- }
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
+ return;
+ trace_rcu_grace_period(TPS("rcu_sched"),
+ __this_cpu_read(rcu_sched_data.gpnum),
+ TPS("cpuqs"));
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
+ if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+ return;
+ __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(&rcu_sched_data), true);
}
void rcu_bh_qs(void)
@@ -300,17 +289,16 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
* We inform the RCU core by emulating a zero-duration dyntick-idle
* period, which we in turn do by incrementing the ->dynticks counter
* by two.
+ *
+ * The caller must have disabled interrupts.
*/
static void rcu_momentary_dyntick_idle(void)
{
- unsigned long flags;
struct rcu_data *rdp;
struct rcu_dynticks *rdtp;
int resched_mask;
struct rcu_state *rsp;
- local_irq_save(flags);
-
/*
* Yes, we can lose flag-setting operations. This is OK, because
* the flag will be set again after some delay.
@@ -340,13 +328,12 @@ static void rcu_momentary_dyntick_idle(void)
smp_mb__after_atomic(); /* Later stuff after QS. */
break;
}
- local_irq_restore(flags);
}
/*
* Note a context switch. This is a quiescent state for RCU-sched,
* and requires special handling for preemptible RCU.
- * The caller must have disabled preemption.
+ * The caller must have disabled interrupts.
*/
void rcu_note_context_switch(void)
{
@@ -376,9 +363,14 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
*/
void rcu_all_qs(void)
{
+ unsigned long flags;
+
barrier(); /* Avoid RCU read-side critical sections leaking down. */
- if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+ if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) {
+ local_irq_save(flags);
rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ }
this_cpu_inc(rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
@@ -605,25 +597,25 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
* The caller must have disabled interrupts to prevent races with
* normal callback registry.
*/
-static int
+static bool
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
int i;
if (rcu_gp_in_progress(rsp))
- return 0; /* No, a grace period is already in progress. */
+ return false; /* No, a grace period is already in progress. */
if (rcu_future_needs_gp(rsp))
- return 1; /* Yes, a no-CBs CPU needs one. */
+ return true; /* Yes, a no-CBs CPU needs one. */
if (!rdp->nxttail[RCU_NEXT_TAIL])
- return 0; /* No, this is a no-CBs (or offline) CPU. */
+ return false; /* No, this is a no-CBs (or offline) CPU. */
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
- return 1; /* Yes, this CPU has newly registered callbacks. */
+ return true; /* Yes, CPU has newly registered callbacks. */
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
ULONG_CMP_LT(READ_ONCE(rsp->completed),
rdp->nxtcompleted[i]))
- return 1; /* Yes, CBs for future grace period. */
- return 0; /* No grace period needed. */
+ return true; /* Yes, CBs for future grace period. */
+ return false; /* No grace period needed. */
}
/*
@@ -740,7 +732,7 @@ void rcu_user_enter(void)
*
* Exit from an interrupt handler, which might possibly result in entering
* idle mode, in other words, leaving the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* This code assumes that the idle loop never does anything that might
* result in unbalanced calls to irq_enter() and irq_exit(). If your
@@ -753,11 +745,10 @@ void rcu_user_enter(void)
*/
void rcu_irq_exit(void)
{
- unsigned long flags;
long long oldval;
struct rcu_dynticks *rdtp;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting--;
@@ -768,6 +759,17 @@ void rcu_irq_exit(void)
else
rcu_eqs_enter_common(oldval, true);
rcu_sysidle_enter(1);
+}
+
+/*
+ * Wrapper for rcu_irq_exit() where interrupts are enabled.
+ */
+void rcu_irq_exit_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_exit();
local_irq_restore(flags);
}
@@ -865,7 +867,7 @@ void rcu_user_exit(void)
*
* Enter an interrupt handler, which might possibly result in exiting
* idle mode, in other words, entering the mode in which read-side critical
- * sections can occur.
+ * sections can occur. The caller must have disabled interrupts.
*
* Note that the Linux kernel is fully capable of entering an interrupt
* handler that it never exits, for example when doing upcalls to
@@ -881,11 +883,10 @@ void rcu_user_exit(void)
*/
void rcu_irq_enter(void)
{
- unsigned long flags;
struct rcu_dynticks *rdtp;
long long oldval;
- local_irq_save(flags);
+ RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!");
rdtp = this_cpu_ptr(&rcu_dynticks);
oldval = rdtp->dynticks_nesting;
rdtp->dynticks_nesting++;
@@ -896,6 +897,17 @@ void rcu_irq_enter(void)
else
rcu_eqs_exit_common(oldval, true);
rcu_sysidle_exit(1);
+}
+
+/*
+ * Wrapper for rcu_irq_enter() where interrupts are enabled.
+ */
+void rcu_irq_enter_irqson(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_irq_enter();
local_irq_restore(flags);
}
@@ -1187,6 +1199,16 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
}
/*
+ * Convert a ->gp_state value to a character string.
+ */
+static const char *gp_state_getname(short gs)
+{
+ if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
+ return "???";
+ return gp_state_names[gs];
+}
+
+/*
* Complain about starvation of grace-period kthread.
*/
static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
@@ -1196,12 +1218,16 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
- if (j - gpa > 2 * HZ)
- pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
+ if (j - gpa > 2 * HZ) {
+ pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n",
rsp->name, j - gpa,
rsp->gpnum, rsp->completed,
- rsp->gp_flags, rsp->gp_state,
- rsp->gp_kthread ? rsp->gp_kthread->state : 0);
+ rsp->gp_flags,
+ gp_state_getname(rsp->gp_state), rsp->gp_state,
+ rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
+ if (rsp->gp_kthread)
+ sched_show_task(rsp->gp_kthread);
+ }
}
/*
@@ -1214,7 +1240,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu))
@@ -1237,7 +1263,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
/* Only let one CPU complain about others per time interval. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1256,7 +1282,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
rsp->name);
print_cpu_stall_info_begin();
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -1327,7 +1353,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
rcu_dump_cpu_stacks(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
@@ -1534,10 +1560,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
* hold it, acquire the root rcu_node structure's lock in order to
* start one (if needed).
*/
- if (rnp != rnp_root) {
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
- }
+ if (rnp != rnp_root)
+ raw_spin_lock_rcu_node(rnp_root);
/*
* Get a new grace-period number. If there really is no grace
@@ -1786,11 +1810,10 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
if ((rdp->gpnum == READ_ONCE(rnp->gpnum) &&
rdp->completed == READ_ONCE(rnp->completed) &&
!unlikely(READ_ONCE(rdp->gpwrap))) || /* w/out lock. */
- !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
+ !raw_spin_trylock_rcu_node(rnp)) { /* irqs already off, so later. */
local_irq_restore(flags);
return;
}
- smp_mb__after_unlock_lock();
needwake = __note_gp_changes(rsp, rnp, rdp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
@@ -1805,21 +1828,20 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
}
/*
- * Initialize a new grace period. Return 0 if no grace period required.
+ * Initialize a new grace period. Return false if no grace period required.
*/
-static int rcu_gp_init(struct rcu_state *rsp)
+static bool rcu_gp_init(struct rcu_state *rsp)
{
unsigned long oldmask;
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (!READ_ONCE(rsp->gp_flags)) {
/* Spurious wakeup, tell caller to go back to sleep. */
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
@@ -1829,7 +1851,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
* Not supposed to be able to happen.
*/
raw_spin_unlock_irq(&rnp->lock);
- return 0;
+ return false;
}
/* Advance to a new grace period and initialize state. */
@@ -1847,8 +1869,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_leaf_node(rsp, rnp) {
rcu_gp_slow(rsp, gp_preinit_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
!rnp->wait_blkd_tasks) {
/* Nothing to do on this leaf rcu_node structure. */
@@ -1904,8 +1925,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
rcu_gp_slow(rsp, gp_init_delay);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
rdp = this_cpu_ptr(rsp->rda);
rcu_preempt_check_blocked_tasks(rnp);
rnp->qsmask = rnp->qsmaskinit;
@@ -1923,7 +1943,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
WRITE_ONCE(rsp->gp_activity, jiffies);
}
- return 1;
+ return true;
}
/*
@@ -1973,8 +1993,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
}
/* Clear flag to prevent immediate re-entry. */
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WRITE_ONCE(rsp->gp_flags,
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
raw_spin_unlock_irq(&rnp->lock);
@@ -1993,8 +2012,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
WRITE_ONCE(rsp->gp_activity, jiffies);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
gp_duration = jiffies - rsp->gp_start;
if (gp_duration > rsp->gp_max)
rsp->gp_max = gp_duration;
@@ -2019,8 +2037,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
* grace period is recorded in any of the rcu_node structures.
*/
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irq_rcu_node(rnp);
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->completed, rsp->gpnum);
@@ -2035,8 +2052,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
rcu_gp_slow(rsp, gp_cleanup_delay);
}
rnp = rcu_get_root(rsp);
- raw_spin_lock_irq(&rnp->lock);
- smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */
+ raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
rcu_nocb_gp_set(rnp, nocb);
/* Declare grace period done. */
@@ -2284,8 +2300,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
raw_spin_unlock_irqrestore(&rnp->lock, flags);
rnp_c = rnp;
rnp = rnp->parent;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
oldmask = rnp_c->qsmask;
}
@@ -2332,8 +2347,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
gps = rnp->gpnum;
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
}
@@ -2355,8 +2369,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
struct rcu_node *rnp;
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rdp->cpu_no_qs.b.norm &&
rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) ||
rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum ||
@@ -2582,8 +2595,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (!rnp)
break;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock(); /* GP memory ordering. */
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinit &= ~mask;
rnp->qsmask &= ~mask;
if (rnp->qsmaskinit) {
@@ -2611,8 +2623,7 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rnp->qsmaskinitnext &= ~mask;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
@@ -2809,8 +2820,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
rcu_for_each_leaf_node(rsp, rnp) {
cond_resched_rcu_qs();
mask = 0;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) {
if (rcu_state_p == &rcu_sched_state ||
rsp != rcu_state_p ||
@@ -2881,8 +2891,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
/* rnp_old == rcu_get_root(rsp), rnp == NULL. */
/* Reached the root of the rcu_node tree, acquire lock. */
- raw_spin_lock_irqsave(&rnp_old->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
raw_spin_unlock(&rnp_old->fqslock);
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
rsp->n_force_qs_lh++;
@@ -2914,7 +2923,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/* Does this CPU require a not-yet-started grace period? */
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
- raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+ raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
needwake = rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
if (needwake)
@@ -3005,8 +3014,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
if (!rcu_gp_in_progress(rsp)) {
struct rcu_node *rnp_root = rcu_get_root(rsp);
- raw_spin_lock(&rnp_root->lock);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp_root);
needwake = rcu_start_gp(rsp);
raw_spin_unlock(&rnp_root->lock);
if (needwake)
@@ -3365,7 +3373,6 @@ static unsigned long rcu_seq_snap(unsigned long *sp)
{
unsigned long s;
- smp_mb(); /* Caller's modifications seen first by other CPUs. */
s = (READ_ONCE(*sp) + 3) & ~0x1;
smp_mb(); /* Above access must not bleed into critical section. */
return s;
@@ -3392,6 +3399,7 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
+ smp_mb(); /* Caller's modifications seen first by other CPUs. */
return rcu_seq_snap(&rsp->expedited_sequence);
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
@@ -3426,8 +3434,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
* CPUs for the current rcu_node structure up the rcu_node tree.
*/
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmaskinit == rnp->expmaskinitnext) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
continue; /* No new CPUs, nothing to do. */
@@ -3447,8 +3454,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
rnp_up = rnp->parent;
done = false;
while (rnp_up) {
- raw_spin_lock_irqsave(&rnp_up->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
if (rnp_up->expmaskinit)
done = true;
rnp_up->expmaskinit |= mask;
@@ -3472,8 +3478,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
sync_exp_reset_tree_hotplug(rsp);
rcu_for_each_node_breadth_first(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
rnp->expmask = rnp->expmaskinit;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -3531,8 +3536,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
mask = rnp->grpmask;
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
rnp = rnp->parent;
- raw_spin_lock(&rnp->lock); /* irqs already disabled */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
WARN_ON_ONCE(!(rnp->expmask & mask));
rnp->expmask &= ~mask;
}
@@ -3549,8 +3553,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
__rcu_report_exp_rnp(rsp, rnp, wake, flags);
}
@@ -3564,8 +3567,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
{
unsigned long flags;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!(rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -3609,7 +3611,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
*/
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
- struct rcu_data *rdp;
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
struct rcu_node *rnp0;
struct rcu_node *rnp1 = NULL;
@@ -3623,7 +3625,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
if (sync_exp_work_done(rsp, rnp0, NULL,
- &rsp->expedited_workdone0, s))
+ &rdp->expedited_workdone0, s))
return NULL;
return rnp0;
}
@@ -3637,14 +3639,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* can be inexact, as it is just promoting locality and is not
* strictly needed for correctness.
*/
- rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
- if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+ if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
return NULL;
mutex_lock(&rdp->exp_funnel_mutex);
rnp0 = rdp->mynode;
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone2, s))
+ &rdp->expedited_workdone2, s))
return NULL;
mutex_lock(&rnp0->exp_funnel_mutex);
if (rnp1)
@@ -3654,7 +3655,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp1 = rnp0;
}
if (sync_exp_work_done(rsp, rnp1, rdp,
- &rsp->expedited_workdone3, s))
+ &rdp->expedited_workdone3, s))
return NULL;
return rnp1;
}
@@ -3708,8 +3709,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
sync_exp_reset_tree(rsp);
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
@@ -3741,24 +3741,22 @@ retry_ipi:
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
- } else {
- /* Failed, raced with offline. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask)) {
- raw_spin_unlock_irqrestore(&rnp->lock,
- flags);
- schedule_timeout_uninterruptible(1);
- if (cpu_online(cpu) &&
- (rnp->expmask & mask))
- goto retry_ipi;
- raw_spin_lock_irqsave(&rnp->lock,
- flags);
- }
- if (!(rnp->expmask & mask))
- mask_ofl_ipi &= ~mask;
+ continue;
+ }
+ /* Failed, raced with offline. */
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ schedule_timeout_uninterruptible(1);
+ if (cpu_online(cpu) &&
+ (rnp->expmask & mask))
+ goto retry_ipi;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
+ if (!(rnp->expmask & mask))
+ mask_ofl_ipi &= ~mask;
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
@@ -3773,6 +3771,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
unsigned long jiffies_stall;
unsigned long jiffies_start;
unsigned long mask;
+ int ndetected;
struct rcu_node *rnp;
struct rcu_node *rnp_root = rcu_get_root(rsp);
int ret;
@@ -3785,7 +3784,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root),
jiffies_stall);
- if (ret > 0)
+ if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
return;
if (ret < 0) {
/* Hit a signal, disable CPU stall warnings. */
@@ -3795,14 +3794,16 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
rsp->name);
+ ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
- (void)rcu_print_task_exp_stall(rnp);
+ ndetected = rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
struct rcu_data *rdp;
if (!(rnp->expmask & mask))
continue;
+ ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
"O."[cpu_online(cpu)],
@@ -3811,8 +3812,23 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
}
mask <<= 1;
}
- pr_cont(" } %lu jiffies s: %lu\n",
- jiffies - jiffies_start, rsp->expedited_sequence);
+ pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+ jiffies - jiffies_start, rsp->expedited_sequence,
+ rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
+ if (!ndetected) {
+ pr_err("blocking rcu_node structures:");
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ if (rnp == rnp_root)
+ continue; /* printed unconditionally */
+ if (sync_rcu_preempt_exp_done(rnp))
+ continue;
+ pr_cont(" l=%u:%d-%d:%#lx/%c",
+ rnp->level, rnp->grplo, rnp->grphi,
+ rnp->expmask,
+ ".T"[!!rnp->exp_tasks]);
+ }
+ pr_cont("\n");
+ }
rcu_for_each_leaf_node(rsp, rnp) {
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
@@ -3847,6 +3863,16 @@ void synchronize_sched_expedited(void)
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
+ /* If only one CPU, this is automatically a grace period. */
+ if (rcu_blocking_is_gp())
+ return;
+
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(call_rcu_sched);
+ return;
+ }
+
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
@@ -4135,7 +4161,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (rnp == NULL)
return;
- raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */
+ raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
rnp->qsmaskinit |= mask;
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
}
@@ -4152,7 +4178,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
@@ -4179,7 +4205,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@@ -4198,8 +4224,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
*/
rnp = rdp->mynode;
mask = rdp->grpmask;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
rnp->qsmaskinitnext |= mask;
rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
@@ -4327,14 +4352,14 @@ static int __init rcu_spawn_gp_kthread(void)
t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
BUG_ON(IS_ERR(t));
rnp = rcu_get_root(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rsp->gp_kthread = t;
if (kthread_prio) {
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
}
- wake_up_process(t);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ wake_up_process(t);
}
rcu_spawn_nocb_kthreads();
rcu_spawn_boost_kthreads();
@@ -4385,12 +4410,14 @@ static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt)
/*
* Helper function for rcu_init() that initializes one rcu_state structure.
*/
-static void __init rcu_init_one(struct rcu_state *rsp,
- struct rcu_data __percpu *rda)
+static void __init rcu_init_one(struct rcu_state *rsp)
{
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
static const char * const exp[] = RCU_EXP_NAME_INIT;
+ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
+ static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
@@ -4576,8 +4603,8 @@ void __init rcu_init(void)
rcu_bootup_announce();
rcu_init_geometry();
- rcu_init_one(&rcu_bh_state, &rcu_bh_data);
- rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+ rcu_init_one(&rcu_bh_state);
+ rcu_init_one(&rcu_sched_state);
if (dump_tree)
rcu_dump_rcu_node_tree(&rcu_sched_state);
__rcu_init_preempt();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9fb4e238d4dc..83360b4f4352 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -178,6 +178,8 @@ struct rcu_node {
/* beginning of each expedited GP. */
unsigned long expmaskinitnext;
/* Online CPUs for next expedited GP. */
+ /* Any CPU that has ever been online will */
+ /* have its bit set. */
unsigned long grpmask; /* Mask to apply to parent qsmask. */
/* Only one bit will be set in this mask. */
int grplo; /* lowest-numbered CPU or group here. */
@@ -384,6 +386,10 @@ struct rcu_data {
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex;
+ atomic_long_t expedited_workdone0; /* # done by others #0. */
+ atomic_long_t expedited_workdone1; /* # done by others #1. */
+ atomic_long_t expedited_workdone2; /* # done by others #2. */
+ atomic_long_t expedited_workdone3; /* # done by others #3. */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
@@ -498,10 +504,6 @@ struct rcu_state {
/* End of fields guarded by barrier_mutex. */
unsigned long expedited_sequence; /* Take a ticket. */
- atomic_long_t expedited_workdone0; /* # done by others #0. */
- atomic_long_t expedited_workdone1; /* # done by others #1. */
- atomic_long_t expedited_workdone2; /* # done by others #2. */
- atomic_long_t expedited_workdone3; /* # done by others #3. */
atomic_long_t expedited_normal; /* # fallbacks to normal. */
atomic_t expedited_need_qs; /* # CPUs left to check in. */
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
@@ -545,6 +547,18 @@ struct rcu_state {
#define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
#define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
+#ifndef RCU_TREE_NONCORE
+static const char * const gp_state_names[] = {
+ "RCU_GP_IDLE",
+ "RCU_GP_WAIT_GPS",
+ "RCU_GP_DONE_GPS",
+ "RCU_GP_WAIT_FQS",
+ "RCU_GP_DOING_FQS",
+ "RCU_GP_CLEANUP",
+ "RCU_GP_CLEANED",
+};
+#endif /* #ifndef RCU_TREE_NONCORE */
+
extern struct list_head rcu_struct_flavors;
/* Sequence through rcu_state structures for each RCU flavor. */
@@ -664,3 +678,42 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
#else /* #ifdef CONFIG_PPC */
#define smp_mb__after_unlock_lock() do { } while (0)
#endif /* #else #ifdef CONFIG_PPC */
+
+/*
+ * Wrappers for the rcu_node::lock acquire.
+ *
+ * Because the rcu_nodes form a tree, the tree traversal locking will observe
+ * different lock values, this in turn means that an UNLOCK of one level
+ * followed by a LOCK of another level does not imply a full memory barrier;
+ * and most importantly transitivity is lost.
+ *
+ * In order to restore full ordering between tree levels, augment the regular
+ * lock acquire functions with smp_mb__after_unlock_lock().
+ */
+static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
+{
+ raw_spin_lock_irq(&rnp->lock);
+ smp_mb__after_unlock_lock();
+}
+
+#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
+do { \
+ typecheck(unsigned long, flags); \
+ raw_spin_lock_irqsave(&(rnp)->lock, flags); \
+ smp_mb__after_unlock_lock(); \
+} while (0)
+
+static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
+{
+ bool locked = raw_spin_trylock(&rnp->lock);
+
+ if (locked)
+ smp_mb__after_unlock_lock();
+ return locked;
+}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 630c19772630..9467a8b7e756 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -63,8 +63,7 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
/*
* Check the RCU kernel configuration parameters and print informative
- * messages about anything out of the ordinary. If you like #ifdef, you
- * will love this function.
+ * messages about anything out of the ordinary.
*/
static void __init rcu_bootup_announce_oddness(void)
{
@@ -147,8 +146,8 @@ static void __init rcu_bootup_announce(void)
* the corresponding expedited grace period will also be the end of the
* normal grace period.
*/
-static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
- unsigned long flags) __releases(rnp->lock)
+static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
+ __releases(rnp->lock) /* But leaves rrupts disabled. */
{
int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
(rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
@@ -236,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
rnp->gp_tasks = &t->rcu_node_entry;
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
rnp->exp_tasks = &t->rcu_node_entry;
- raw_spin_unlock(&rnp->lock);
+ raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
/*
* Report the quiescent state for the expedited GP. This expedited
@@ -251,7 +250,6 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp,
} else {
WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
}
- local_irq_restore(flags);
}
/*
@@ -286,12 +284,11 @@ static void rcu_preempt_qs(void)
* predating the current grace period drain, in other words, until
* rnp->gp_tasks becomes NULL.
*
- * Caller must disable preemption.
+ * Caller must disable interrupts.
*/
static void rcu_preempt_note_context_switch(void)
{
struct task_struct *t = current;
- unsigned long flags;
struct rcu_data *rdp;
struct rcu_node *rnp;
@@ -301,8 +298,7 @@ static void rcu_preempt_note_context_switch(void)
/* Possibly blocking in an RCU read-side critical section. */
rdp = this_cpu_ptr(rcu_state_p->rda);
rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp);
t->rcu_read_unlock_special.b.blocked = true;
t->rcu_blocked_node = rnp;
@@ -318,7 +314,7 @@ static void rcu_preempt_note_context_switch(void)
(rnp->qsmask & rdp->grpmask)
? rnp->gpnum
: rnp->gpnum + 1);
- rcu_preempt_ctxt_queue(rnp, rdp, flags);
+ rcu_preempt_ctxt_queue(rnp, rdp);
} else if (t->rcu_read_lock_nesting < 0 &&
t->rcu_read_unlock_special.s) {
@@ -450,20 +446,13 @@ void rcu_read_unlock_special(struct task_struct *t)
/*
* Remove this task from the list it blocked on. The task
- * now remains queued on the rcu_node corresponding to
- * the CPU it first blocked on, so the first attempt to
- * acquire the task's rcu_node's ->lock will succeed.
- * Keep the loop and add a WARN_ON() out of sheer paranoia.
+ * now remains queued on the rcu_node corresponding to the
+ * CPU it first blocked on, so there is no longer any need
+ * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia.
*/
- for (;;) {
- rnp = t->rcu_blocked_node;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
- if (rnp == t->rcu_blocked_node)
- break;
- WARN_ON_ONCE(1);
- raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- }
+ rnp = t->rcu_blocked_node;
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+ WARN_ON_ONCE(rnp != t->rcu_blocked_node);
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = sync_rcu_preempt_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -527,7 +516,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
unsigned long flags;
struct task_struct *t;
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!rcu_preempt_blocked_readers_cgp(rnp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -748,6 +737,12 @@ void synchronize_rcu_expedited(void)
struct rcu_state *rsp = rcu_state_p;
unsigned long s;
+ /* If expedited grace periods are prohibited, fall back to normal. */
+ if (rcu_gp_is_normal()) {
+ wait_rcu_gp(call_rcu);
+ return;
+ }
+
s = rcu_exp_gp_seq_snap(rsp);
rnp_unlock = exp_funnel_lock(rsp, s);
@@ -788,7 +783,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
*/
static void __init __rcu_init_preempt(void)
{
- rcu_init_one(rcu_state_p, rcu_data_p);
+ rcu_init_one(rcu_state_p);
}
/*
@@ -989,8 +984,7 @@ static int rcu_boost(struct rcu_node *rnp)
READ_ONCE(rnp->boost_tasks) == NULL)
return 0; /* Nothing left to boost. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
/*
* Recheck under the lock: all tasks in need of boosting
@@ -1176,8 +1170,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
"rcub/%d", rnp_index);
if (IS_ERR(t))
return PTR_ERR(t);
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = kthread_prio;
@@ -1524,7 +1517,8 @@ static void rcu_prepare_for_idle(void)
struct rcu_state *rsp;
int tne;
- if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL))
+ if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
+ rcu_is_nocb_cpu(smp_processor_id()))
return;
/* Handle nohz enablement switches conservatively. */
@@ -1538,10 +1532,6 @@ static void rcu_prepare_for_idle(void)
if (!tne)
return;
- /* If this is a no-CBs CPU, no callbacks, just return. */
- if (rcu_is_nocb_cpu(smp_processor_id()))
- return;
-
/*
* If a non-lazy callback arrived at a CPU having only lazy
* callbacks, invoke RCU core for the side-effect of recalculating
@@ -1567,8 +1557,7 @@ static void rcu_prepare_for_idle(void)
if (!*rdp->nxttail[RCU_DONE_TAIL])
continue;
rnp = rdp->mynode;
- raw_spin_lock(&rnp->lock); /* irqs already disabled. */
- smp_mb__after_unlock_lock();
+ raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
if (needwake)
@@ -2068,8 +2057,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
bool needwake;
struct rcu_node *rnp = rdp->mynode;
- raw_spin_lock_irqsave(&rnp->lock, flags);
- smp_mb__after_unlock_lock();
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
needwake = rcu_start_future_gp(rnp, rdp, &c);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
if (needwake)
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index ef7093cc9b5c..1088e64f01ad 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -1,5 +1,5 @@
/*
- * Read-Copy Update tracing for classic implementation
+ * Read-Copy Update tracing for hierarchical implementation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,6 +16,7 @@
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2008
+ * Author: Paul E. McKenney
*
* Papers: http://www.rdrop.com/users/paulmck/RCU
*
@@ -33,9 +34,7 @@
#include <linux/sched.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
-#include <linux/module.h>
#include <linux/completion.h>
-#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
@@ -183,14 +182,20 @@ static const struct file_operations rcudata_fops = {
static int show_rcuexp(struct seq_file *m, void *v)
{
+ int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
-
+ struct rcu_data *rdp;
+ unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+
+ for_each_possible_cpu(cpu) {
+ rdp = per_cpu_ptr(rsp->rda, cpu);
+ s0 += atomic_long_read(&rdp->expedited_workdone0);
+ s1 += atomic_long_read(&rdp->expedited_workdone1);
+ s2 += atomic_long_read(&rdp->expedited_workdone2);
+ s3 += atomic_long_read(&rdp->expedited_workdone3);
+ }
seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence,
- atomic_long_read(&rsp->expedited_workdone0),
- atomic_long_read(&rsp->expedited_workdone1),
- atomic_long_read(&rsp->expedited_workdone2),
- atomic_long_read(&rsp->expedited_workdone3),
+ rsp->expedited_sequence, s0, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);
@@ -319,7 +324,7 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
unsigned long gpmax;
struct rcu_node *rnp = &rsp->node[0];
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
completed = READ_ONCE(rsp->completed);
gpnum = READ_ONCE(rsp->gpnum);
if (completed == gpnum)
@@ -487,16 +492,4 @@ free_out:
debugfs_remove_recursive(rcudir);
return 1;
}
-
-static void __exit rcutree_trace_cleanup(void)
-{
- debugfs_remove_recursive(rcudir);
-}
-
-
-module_init(rcutree_trace_init);
-module_exit(rcutree_trace_cleanup);
-
-MODULE_AUTHOR("Paul E. McKenney");
-MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
-MODULE_LICENSE("GPL");
+device_initcall(rcutree_trace_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5f748c5a40f0..76b94e19430b 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -60,7 +60,12 @@ MODULE_ALIAS("rcupdate");
#endif
#define MODULE_PARAM_PREFIX "rcupdate."
+#ifndef CONFIG_TINY_RCU
module_param(rcu_expedited, int, 0);
+module_param(rcu_normal, int, 0);
+static int rcu_normal_after_boot;
+module_param(rcu_normal_after_boot, int, 0);
+#endif /* #ifndef CONFIG_TINY_RCU */
#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
/**
@@ -113,6 +118,17 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
#ifndef CONFIG_TINY_RCU
+/*
+ * Should expedited grace-period primitives always fall back to their
+ * non-expedited counterparts? Intended for use within RCU. Note
+ * that if the user specifies both rcu_expedited and rcu_normal, then
+ * rcu_normal wins.
+ */
+bool rcu_gp_is_normal(void)
+{
+ return READ_ONCE(rcu_normal);
+}
+
static atomic_t rcu_expedited_nesting =
ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
@@ -157,8 +173,6 @@ void rcu_unexpedite_gp(void)
}
EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
-#endif /* #ifndef CONFIG_TINY_RCU */
-
/*
* Inform RCU of the end of the in-kernel boot sequence.
*/
@@ -166,8 +180,12 @@ void rcu_end_inkernel_boot(void)
{
if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
rcu_unexpedite_gp();
+ if (rcu_normal_after_boot)
+ WRITE_ONCE(rcu_normal, 1);
}
+#endif /* #ifndef CONFIG_TINY_RCU */
+
#ifdef CONFIG_PREEMPT_RCU
/*
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 750ed601ddf7..a5d966cb8891 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -212,7 +212,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
ag = autogroup_task_get(p);
down_write(&ag->lock);
- err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
+ err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
if (!err)
ag->nice = nice;
up_write(&ag->lock);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index caf4041f5b0a..bc54e84675da 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -354,7 +354,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
return;
sched_clock_tick();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 732e993b564b..77d97a6fc715 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void)
if (current->policy == SCHED_RR) {
struct sched_rt_entity *rt_se = &current->rt;
- return rt_se->run_list.prev == rt_se->run_list.next;
+ return list_is_singular(&rt_se->run_list);
}
/*
@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p)
return;
}
- load->weight = scale_load(prio_to_weight[prio]);
- load->inv_weight = prio_to_wmult[prio];
+ load->weight = scale_load(sched_prio_to_weight[prio]);
+ load->inv_weight = sched_prio_to_wmult[prio];
}
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
{
lockdep_assert_held(&rq->lock);
- dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
+ dequeue_task(rq, p, 0);
set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock);
@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu);
- p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0);
return rq;
@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
!p->on_rq);
+ /*
+ * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
+ * because schedstat_wait_{start,end} rebase migrating task's wait_start
+ * time relying on p->on_rq.
+ */
+ WARN_ON_ONCE(p->state == TASK_RUNNING &&
+ p->sched_class == &fair_sched_class &&
+ (p->on_rq && !task_on_rq_migrating(p)));
+
#ifdef CONFIG_LOCKDEP
/*
* The caller should hold either p->pi_lock or rq->lock, when changing
@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
src_rq = task_rq(p);
dst_rq = cpu_rq(cpu);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu);
activate_task(dst_rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0);
} else {
/*
@@ -1905,6 +1916,97 @@ static void ttwu_queue(struct task_struct *p, int cpu)
raw_spin_unlock(&rq->lock);
}
+/*
+ * Notes on Program-Order guarantees on SMP systems.
+ *
+ * MIGRATION
+ *
+ * The basic program-order guarantee on SMP systems is that when a task [t]
+ * migrates, all its activity on its old cpu [c0] happens-before any subsequent
+ * execution on its new cpu [c1].
+ *
+ * For migration (of runnable tasks) this is provided by the following means:
+ *
+ * A) UNLOCK of the rq(c0)->lock scheduling out task t
+ * B) migration for t is required to synchronize *both* rq(c0)->lock and
+ * rq(c1)->lock (if not at the same time, then in that order).
+ * C) LOCK of the rq(c1)->lock scheduling in task
+ *
+ * Transitivity guarantees that B happens after A and C after B.
+ * Note: we only require RCpc transitivity.
+ * Note: the cpu doing B need not be c0 or c1
+ *
+ * Example:
+ *
+ * CPU0 CPU1 CPU2
+ *
+ * LOCK rq(0)->lock
+ * sched-out X
+ * sched-in Y
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(0)->lock // orders against CPU0
+ * dequeue X
+ * UNLOCK rq(0)->lock
+ *
+ * LOCK rq(1)->lock
+ * enqueue X
+ * UNLOCK rq(1)->lock
+ *
+ * LOCK rq(1)->lock // orders against CPU2
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(1)->lock
+ *
+ *
+ * BLOCKING -- aka. SLEEP + WAKEUP
+ *
+ * For blocking we (obviously) need to provide the same guarantee as for
+ * migration. However the means are completely different as there is no lock
+ * chain to provide order. Instead we do:
+ *
+ * 1) smp_store_release(X->on_cpu, 0)
+ * 2) smp_cond_acquire(!X->on_cpu)
+ *
+ * Example:
+ *
+ * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule)
+ *
+ * LOCK rq(0)->lock LOCK X->pi_lock
+ * dequeue X
+ * sched-out X
+ * smp_store_release(X->on_cpu, 0);
+ *
+ * smp_cond_acquire(!X->on_cpu);
+ * X->state = WAKING
+ * set_task_cpu(X,2)
+ *
+ * LOCK rq(2)->lock
+ * enqueue X
+ * X->state = RUNNING
+ * UNLOCK rq(2)->lock
+ *
+ * LOCK rq(2)->lock // orders against CPU1
+ * sched-out Z
+ * sched-in X
+ * UNLOCK rq(2)->lock
+ *
+ * UNLOCK X->pi_lock
+ * UNLOCK rq(0)->lock
+ *
+ *
+ * However; for wakeups there is a second guarantee we must provide, namely we
+ * must observe the state that lead to our wakeup. That is, not only must our
+ * task observe its own prior state, it must also observe the stores prior to
+ * its wakeup.
+ *
+ * This means that any means of doing remote wakeups must order the CPU doing
+ * the wakeup against the CPU the task is going to end up running on. This,
+ * however, is already required for the regular Program-Order guarantee above,
+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+ *
+ */
+
/**
* try_to_wake_up - wake up a thread
* @p: the thread to be awakened
@@ -1968,19 +2070,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
/*
* If the owning (remote) cpu is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task.
- */
- while (p->on_cpu)
- cpu_relax();
- /*
- * Combined with the control dependency above, we have an effective
- * smp_load_acquire() without the need for full barriers.
*
* Pairs with the smp_store_release() in finish_lock_switch().
*
* This ensures that tasks getting woken will be fully ordered against
* their previous state and preserve Program Order.
*/
- smp_rmb();
+ smp_cond_acquire(!p->on_cpu);
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
@@ -2109,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ p->se.cfs_rq = NULL;
+#endif
+
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
@@ -3109,7 +3209,6 @@ static void __sched notrace __schedule(bool preempt)
cpu = smp_processor_id();
rq = cpu_rq(cpu);
- rcu_note_context_switch();
prev = rq->curr;
/*
@@ -3128,13 +3227,16 @@ static void __sched notrace __schedule(bool preempt)
if (sched_feat(HRTICK))
hrtick_clear(rq);
+ local_irq_disable();
+ rcu_note_context_switch();
+
/*
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
*/
smp_mb__before_spinlock();
- raw_spin_lock_irq(&rq->lock);
+ raw_spin_lock(&rq->lock);
lockdep_pin_lock(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@ -7355,6 +7457,9 @@ int in_sched_functions(unsigned long addr)
*/
struct task_group root_task_group;
LIST_HEAD(task_groups);
+
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
#endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
@@ -7412,11 +7517,12 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CGROUP_SCHED
+ task_group_cache = KMEM_CACHE(task_group, 0);
+
list_add(&root_task_group.list, &task_groups);
INIT_LIST_HEAD(&root_task_group.children);
INIT_LIST_HEAD(&root_task_group.siblings);
autogroup_init(&init_task);
-
#endif /* CONFIG_CGROUP_SCHED */
for_each_possible_cpu(i) {
@@ -7697,7 +7803,7 @@ static void free_sched_group(struct task_group *tg)
free_fair_sched_group(tg);
free_rt_sched_group(tg);
autogroup_free(tg);
- kfree(tg);
+ kmem_cache_free(task_group_cache, tg);
}
/* allocate runqueue etc for a new task group */
@@ -7705,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent)
{
struct task_group *tg;
- tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+ tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
if (!tg)
return ERR_PTR(-ENOMEM);
@@ -8610,3 +8716,44 @@ void dump_cpu_task(int cpu)
pr_info("Task dump for CPU %d:\n", cpu);
sched_show_task(cpu_curr(cpu));
}
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+const int sched_prio_to_weight[40] = {
+ /* -20 */ 88761, 71755, 56483, 46273, 36291,
+ /* -15 */ 29154, 23254, 18705, 14949, 11916,
+ /* -10 */ 9548, 7620, 6100, 4904, 3906,
+ /* -5 */ 3121, 2501, 1991, 1586, 1277,
+ /* 0 */ 1024, 820, 655, 526, 423,
+ /* 5 */ 335, 272, 215, 172, 137,
+ /* 10 */ 110, 87, 70, 56, 45,
+ /* 15 */ 36, 29, 23, 18, 15,
+};
+
+/*
+ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+const u32 sched_prio_to_wmult[40] = {
+ /* -20 */ 48388, 59856, 76040, 92818, 118348,
+ /* -15 */ 147320, 184698, 229616, 287308, 360437,
+ /* -10 */ 449829, 563644, 704093, 875809, 1099582,
+ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
+ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
+ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
+ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
+ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 05de80b48586..d5ff5c6bf829 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -466,7 +466,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
return;
if (sched_clock_irqtime) {
@@ -680,7 +680,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
{
unsigned long long delta = vtime_delta(tsk);
- WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
+ WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap += delta;
/* CHECKME: always safe to convert nsecs to cputime? */
@@ -696,37 +696,37 @@ static void __vtime_account_system(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_gen_account_irq_exit(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_cpu;
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_user_enter(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
void vtime_guest_enter(struct task_struct *tsk)
@@ -738,19 +738,19 @@ void vtime_guest_enter(struct task_struct *tsk)
* synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta.
*/
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
current->flags |= PF_VCPU;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk)
{
- write_seqlock(&tsk->vtime_seqlock);
+ write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk);
current->flags &= ~PF_VCPU;
- write_sequnlock(&tsk->vtime_seqlock);
+ write_seqcount_end(&tsk->vtime_seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
@@ -763,24 +763,26 @@ void vtime_account_idle(struct task_struct *tsk)
void arch_vtime_task_switch(struct task_struct *prev)
{
- write_seqlock(&prev->vtime_seqlock);
- prev->vtime_snap_whence = VTIME_SLEEPING;
- write_sequnlock(&prev->vtime_seqlock);
+ write_seqcount_begin(&prev->vtime_seqcount);
+ prev->vtime_snap_whence = VTIME_INACTIVE;
+ write_seqcount_end(&prev->vtime_seqcount);
- write_seqlock(&current->vtime_seqlock);
+ write_seqcount_begin(&current->vtime_seqcount);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id());
- write_sequnlock(&current->vtime_seqlock);
+ write_seqcount_end(&current->vtime_seqcount);
}
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
- write_seqlock_irqsave(&t->vtime_seqlock, flags);
+ local_irq_save(flags);
+ write_seqcount_begin(&t->vtime_seqcount);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu);
- write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
+ write_seqcount_end(&t->vtime_seqcount);
+ local_irq_restore(flags);
}
cputime_t task_gtime(struct task_struct *t)
@@ -788,17 +790,17 @@ cputime_t task_gtime(struct task_struct *t)
unsigned int seq;
cputime_t gtime;
- if (!context_tracking_is_enabled())
+ if (!vtime_accounting_enabled())
return t->gtime;
do {
- seq = read_seqbegin(&t->vtime_seqlock);
+ seq = read_seqcount_begin(&t->vtime_seqcount);
gtime = t->gtime;
- if (t->flags & PF_VCPU)
+ if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
gtime += vtime_delta(t);
- } while (read_seqretry(&t->vtime_seqlock, seq));
+ } while (read_seqcount_retry(&t->vtime_seqcount, seq));
return gtime;
}
@@ -821,7 +823,7 @@ fetch_task_cputime(struct task_struct *t,
*udelta = 0;
*sdelta = 0;
- seq = read_seqbegin(&t->vtime_seqlock);
+ seq = read_seqcount_begin(&t->vtime_seqcount);
if (u_dst)
*u_dst = *u_src;
@@ -829,7 +831,7 @@ fetch_task_cputime(struct task_struct *t,
*s_dst = *s_src;
/* Task is sleeping, nothing to add */
- if (t->vtime_snap_whence == VTIME_SLEEPING ||
+ if (t->vtime_snap_whence == VTIME_INACTIVE ||
is_idle_task(t))
continue;
@@ -845,7 +847,7 @@ fetch_task_cputime(struct task_struct *t,
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
- } while (read_seqretry(&t->vtime_seqlock, seq));
+ } while (read_seqcount_retry(&t->vtime_seqcount, seq));
}
@@ -853,6 +855,14 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t udelta, sdelta;
+ if (!vtime_accounting_enabled()) {
+ if (utime)
+ *utime = t->utime;
+ if (stime)
+ *stime = t->stime;
+ return;
+ }
+
fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta);
if (utime)
@@ -866,6 +876,14 @@ void task_cputime_scaled(struct task_struct *t,
{
cputime_t udelta, sdelta;
+ if (!vtime_accounting_enabled()) {
+ if (utimescaled)
+ *utimescaled = t->utimescaled;
+ if (stimescaled)
+ *stimescaled = t->stimescaled;
+ return;
+ }
+
fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 8b0a15e285f9..cd64c979d0e1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -176,8 +176,10 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
}
}
- if (leftmost)
+ if (leftmost) {
dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
+ dl_rq->earliest_dl.next = p->dl.deadline;
+ }
rb_link_node(&p->pushable_dl_tasks, parent, link);
rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
@@ -195,6 +197,10 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
next_node = rb_next(&p->pushable_dl_tasks);
dl_rq->pushable_dl_tasks_leftmost = next_node;
+ if (next_node) {
+ dl_rq->earliest_dl.next = rb_entry(next_node,
+ struct task_struct, pushable_dl_tasks)->dl.deadline;
+ }
}
rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
@@ -782,42 +788,14 @@ static void update_curr_dl(struct rq *rq)
#ifdef CONFIG_SMP
-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);
-
-static inline u64 next_deadline(struct rq *rq)
-{
- struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
-
- if (next && dl_prio(next->prio))
- return next->dl.deadline;
- else
- return 0;
-}
-
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
{
struct rq *rq = rq_of_dl_rq(dl_rq);
if (dl_rq->earliest_dl.curr == 0 ||
dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
- /*
- * If the dl_rq had no -deadline tasks, or if the new task
- * has shorter deadline than the current one on dl_rq, we
- * know that the previous earliest becomes our next earliest,
- * as the new task becomes the earliest itself.
- */
- dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
dl_rq->earliest_dl.curr = deadline;
cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
- } else if (dl_rq->earliest_dl.next == 0 ||
- dl_time_before(deadline, dl_rq->earliest_dl.next)) {
- /*
- * On the other hand, if the new -deadline task has a
- * a later deadline than the earliest one on dl_rq, but
- * it is earlier than the next (if any), we must
- * recompute the next-earliest.
- */
- dl_rq->earliest_dl.next = next_deadline(rq);
}
}
@@ -839,7 +817,6 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline;
- dl_rq->earliest_dl.next = next_deadline(rq);
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
}
}
@@ -1274,28 +1251,6 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
return 0;
}
-/* Returns the second earliest -deadline task, NULL otherwise */
-static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)
-{
- struct rb_node *next_node = rq->dl.rb_leftmost;
- struct sched_dl_entity *dl_se;
- struct task_struct *p = NULL;
-
-next_node:
- next_node = rb_next(next_node);
- if (next_node) {
- dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
- p = dl_task_of(dl_se);
-
- if (pick_dl_task(rq, p, cpu))
- return p;
-
- goto next_node;
- }
-
- return NULL;
-}
-
/*
* Return the earliest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 90e26b11deaa..1926606ece80 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -738,12 +738,56 @@ static void update_curr_fair(struct rq *rq)
update_curr(cfs_rq_of(&rq->curr->se));
}
+#ifdef CONFIG_SCHEDSTATS
+static inline void
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ u64 wait_start = rq_clock(rq_of(cfs_rq));
+
+ if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
+ likely(wait_start > se->statistics.wait_start))
+ wait_start -= se->statistics.wait_start;
+
+ se->statistics.wait_start = wait_start;
+}
+
+static void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ struct task_struct *p;
+ u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+
+ if (entity_is_task(se)) {
+ p = task_of(se);
+ if (task_on_rq_migrating(p)) {
+ /*
+ * Preserve migrating task's wait time so wait_start
+ * time stamp can be adjusted to accumulate wait time
+ * prior to migration.
+ */
+ se->statistics.wait_start = delta;
+ return;
+ }
+ trace_sched_stat_wait(p, delta);
+ }
+
+ se->statistics.wait_max = max(se->statistics.wait_max, delta);
+ se->statistics.wait_count++;
+ se->statistics.wait_sum += delta;
+ se->statistics.wait_start = 0;
+}
+#else
static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
}
+static inline void
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+}
+#endif
+
/*
* Task is being enqueued - update stats:
*/
@@ -757,23 +801,6 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
update_stats_wait_start(cfs_rq, se);
}
-static void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
- schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
- schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
-#ifdef CONFIG_SCHEDSTATS
- if (entity_is_task(se)) {
- trace_sched_stat_wait(task_of(se),
- rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
- }
-#endif
- schedstat_set(se->statistics.wait_start, 0);
-}
-
static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
@@ -2155,6 +2182,7 @@ void task_numa_work(struct callback_head *work)
unsigned long migrate, next_scan, now = jiffies;
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
+ u64 runtime = p->se.sum_exec_runtime;
struct vm_area_struct *vma;
unsigned long start, end;
unsigned long nr_pte_updates = 0;
@@ -2277,6 +2305,17 @@ out:
else
reset_ptenuma_scan(p);
up_read(&mm->mmap_sem);
+
+ /*
+ * Make sure tasks use at least 32x as much time to run other code
+ * than they used here, to limit NUMA PTE scanning overhead to 3% max.
+ * Usually update_task_scan_period slows down scanning enough; on an
+ * overloaded system we need to limit overhead on a per task basis.
+ */
+ if (unlikely(p->se.sum_exec_runtime != runtime)) {
+ u64 diff = p->se.sum_exec_runtime - runtime;
+ p->node_stamp += 32 * diff;
+ }
}
/*
@@ -2670,12 +2709,64 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
{
long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
+ /*
+ * No need to update load_avg for root_task_group as it is not used.
+ */
+ if (cfs_rq->tg == &root_task_group)
+ return;
+
if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
atomic_long_add(delta, &cfs_rq->tg->load_avg);
cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
}
}
+/*
+ * Called within set_task_rq() right before setting a task's cpu. The
+ * caller only guarantees p->pi_lock is held; no other assumptions,
+ * including the state of rq->lock, should be made.
+ */
+void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next)
+{
+ if (!sched_feat(ATTACH_AGE_LOAD))
+ return;
+
+ /*
+ * We are supposed to update the task to "current" time, then its up to
+ * date and ready to go to new CPU/cfs_rq. But we have difficulty in
+ * getting what current time is, so simply throw away the out-of-date
+ * time. This will result in the wakee task is less decayed, but giving
+ * the wakee more load sounds not bad.
+ */
+ if (se->avg.last_update_time && prev) {
+ u64 p_last_update_time;
+ u64 n_last_update_time;
+
+#ifndef CONFIG_64BIT
+ u64 p_last_update_time_copy;
+ u64 n_last_update_time_copy;
+
+ do {
+ p_last_update_time_copy = prev->load_last_update_time_copy;
+ n_last_update_time_copy = next->load_last_update_time_copy;
+
+ smp_rmb();
+
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+
+ } while (p_last_update_time != p_last_update_time_copy ||
+ n_last_update_time != n_last_update_time_copy);
+#else
+ p_last_update_time = prev->avg.last_update_time;
+ n_last_update_time = next->avg.last_update_time;
+#endif
+ __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
+ &se->avg, 0, 0, NULL);
+ se->avg.last_update_time = n_last_update_time;
+ }
+}
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -2689,7 +2780,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
int decayed, removed = 0;
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
- long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+ s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
sa->load_avg = max_t(long, sa->load_avg - r, 0);
sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
removed = 1;
@@ -2809,48 +2900,48 @@ dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
}
-/*
- * Task first catches up with cfs_rq, and then subtract
- * itself from the cfs_rq (task must be off the queue now).
- */
-void remove_entity_load_avg(struct sched_entity *se)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 last_update_time;
-
#ifndef CONFIG_64BIT
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
u64 last_update_time_copy;
+ u64 last_update_time;
do {
last_update_time_copy = cfs_rq->load_last_update_time_copy;
smp_rmb();
last_update_time = cfs_rq->avg.last_update_time;
} while (last_update_time != last_update_time_copy);
-#else
- last_update_time = cfs_rq->avg.last_update_time;
-#endif
- __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
- atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
- atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
+ return last_update_time;
}
-
-/*
- * Update the rq's load with the elapsed running time before entering
- * idle. if the last scheduled task is not a CFS task, idle_enter will
- * be the only way to update the runnable statistic.
- */
-void idle_enter_fair(struct rq *this_rq)
+#else
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
{
+ return cfs_rq->avg.last_update_time;
}
+#endif
/*
- * Update the rq's load with the elapsed idle time before a task is
- * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
- * be the only way to update the runnable statistic.
+ * Task first catches up with cfs_rq, and then subtract
+ * itself from the cfs_rq (task must be off the queue now).
*/
-void idle_exit_fair(struct rq *this_rq)
+void remove_entity_load_avg(struct sched_entity *se)
{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ u64 last_update_time;
+
+ /*
+ * Newly created task or never used group entity should not be removed
+ * from its (source) cfs_rq
+ */
+ if (se->avg.last_update_time == 0)
+ return;
+
+ last_update_time = cfs_rq_last_update_time(cfs_rq);
+
+ __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+ atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
+ atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
}
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
@@ -4240,42 +4331,37 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
*/
/*
- * The exact cpuload at various idx values, calculated at every tick would be
- * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+ * The exact cpuload calculated at every tick would be:
+ *
+ * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
*
- * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
- * on nth tick when cpu may be busy, then we have:
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
+ * If a cpu misses updates for n ticks (as it was idle) and update gets
+ * called on the n+1-th tick when cpu may be busy, then we have:
+ *
+ * load_n = (1 - 1/2^i)^n * load_0
+ * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load
*
* decay_load_missed() below does efficient calculation of
- * load = ((2^idx - 1) / 2^idx)^(n-1) * load
- * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
+ *
+ * load' = (1 - 1/2^i)^n * load
+ *
+ * Because x^(n+m) := x^n * x^m we can decompose any x^n in power-of-2 factors.
+ * This allows us to precompute the above in said factors, thereby allowing the
+ * reduction of an arbitrary n in O(log_2 n) steps. (See also
+ * fixed_power_int())
*
* The calculation is approximated on a 128 point scale.
- * degrade_zero_ticks is the number of ticks after which load at any
- * particular idx is approximated to be zero.
- * degrade_factor is a precomputed table, a row for each load idx.
- * Each column corresponds to degradation factor for a power of two ticks,
- * based on 128 point scale.
- * Example:
- * row 2, col 3 (=12) says that the degradation at load idx 2 after
- * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
- *
- * With this power of 2 load factors, we can degrade the load n times
- * by looking at 1 bits in n and doing as many mult/shift instead of
- * n mult/shifts needed by the exact degradation.
*/
#define DEGRADE_SHIFT 7
-static const unsigned char
- degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
-static const unsigned char
- degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
- {0, 0, 0, 0, 0, 0, 0, 0},
- {64, 32, 8, 0, 0, 0, 0, 0},
- {96, 72, 40, 12, 1, 0, 0},
- {112, 98, 75, 43, 15, 1, 0},
- {120, 112, 98, 76, 45, 16, 2} };
+
+static const u8 degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
+static const u8 degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 64, 32, 8, 0, 0, 0, 0, 0 },
+ { 96, 72, 40, 12, 1, 0, 0, 0 },
+ { 112, 98, 75, 43, 15, 1, 0, 0 },
+ { 120, 112, 98, 76, 45, 16, 2, 0 }
+};
/*
* Update cpu_load for any missed ticks, due to tickless idle. The backlog
@@ -4306,14 +4392,46 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
return load;
}
-/*
+/**
+ * __update_cpu_load - update the rq->cpu_load[] statistics
+ * @this_rq: The rq to update statistics for
+ * @this_load: The current load
+ * @pending_updates: The number of missed updates
+ * @active: !0 for NOHZ_FULL
+ *
* Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC). With tickless idle this will not be called
- * every tick. We fix it up based on jiffies.
+ * scheduler tick (TICK_NSEC).
+ *
+ * This function computes a decaying average:
+ *
+ * load[i]' = (1 - 1/2^i) * load[i] + (1/2^i) * load
+ *
+ * Because of NOHZ it might not get called on every tick which gives need for
+ * the @pending_updates argument.
+ *
+ * load[i]_n = (1 - 1/2^i) * load[i]_n-1 + (1/2^i) * load_n-1
+ * = A * load[i]_n-1 + B ; A := (1 - 1/2^i), B := (1/2^i) * load
+ * = A * (A * load[i]_n-2 + B) + B
+ * = A * (A * (A * load[i]_n-3 + B) + B) + B
+ * = A^3 * load[i]_n-3 + (A^2 + A + 1) * B
+ * = A^n * load[i]_0 + (A^(n-1) + A^(n-2) + ... + 1) * B
+ * = A^n * load[i]_0 + ((1 - A^n) / (1 - A)) * B
+ * = (1 - 1/2^i)^n * (load[i]_0 - load) + load
+ *
+ * In the above we've assumed load_n := load, which is true for NOHZ_FULL as
+ * any change in load would have resulted in the tick being turned back on.
+ *
+ * For regular NOHZ, this reduces to:
+ *
+ * load[i]_n = (1 - 1/2^i)^n * load[i]_0
+ *
+ * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
+ * term. See the @active paramter.
*/
static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates)
+ unsigned long pending_updates, int active)
{
+ unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
int i, scale;
this_rq->nr_load_updates++;
@@ -4325,8 +4443,9 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
/* scale is effectively 1 << i now, and >> i divides by scale */
- old_load = this_rq->cpu_load[i];
+ old_load = this_rq->cpu_load[i] - tickless_load;
old_load = decay_load_missed(old_load, pending_updates - 1, i);
+ old_load += tickless_load;
new_load = this_load;
/*
* Round up the averaging division if load is increasing. This
@@ -4381,16 +4500,17 @@ static void update_idle_cpu_load(struct rq *this_rq)
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
this_rq->last_load_update_tick = curr_jiffies;
- __update_cpu_load(this_rq, load, pending_updates);
+ __update_cpu_load(this_rq, load, pending_updates, 0);
}
/*
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
*/
-void update_cpu_load_nohz(void)
+void update_cpu_load_nohz(int active)
{
struct rq *this_rq = this_rq();
unsigned long curr_jiffies = READ_ONCE(jiffies);
+ unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
unsigned long pending_updates;
if (curr_jiffies == this_rq->last_load_update_tick)
@@ -4401,10 +4521,11 @@ void update_cpu_load_nohz(void)
if (pending_updates) {
this_rq->last_load_update_tick = curr_jiffies;
/*
- * We were idle, this means load 0, the current load might be
- * !0 due to remote wakeups and the sort.
+ * In the regular NOHZ case, we were idle, this means load 0.
+ * In the NOHZ_FULL case, we were non-idle, we should consider
+ * its weighted load.
*/
- __update_cpu_load(this_rq, 0, pending_updates);
+ __update_cpu_load(this_rq, load, pending_updates, active);
}
raw_spin_unlock(&this_rq->lock);
}
@@ -4420,7 +4541,7 @@ void update_cpu_load_active(struct rq *this_rq)
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
*/
this_rq->last_load_update_tick = jiffies;
- __update_cpu_load(this_rq, load, 1);
+ __update_cpu_load(this_rq, load, 1, 1);
}
/*
@@ -5007,8 +5128,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
/*
* Called immediately before a task is migrated to a new cpu; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the
- * previous cpu. However, the caller only guarantees p->pi_lock is held; no
- * other assumptions, including the state of rq->lock, should be made.
+ * previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
*/
static void migrate_task_rq_fair(struct task_struct *p)
{
@@ -5721,8 +5841,8 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
{
lockdep_assert_held(&env->src_rq->lock);
- deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
+ deactivate_task(env->src_rq, p, 0);
set_task_cpu(p, env->dst_cpu);
}
@@ -5855,8 +5975,8 @@ static void attach_task(struct rq *rq, struct task_struct *p)
lockdep_assert_held(&rq->lock);
BUG_ON(task_rq(p) != rq);
- p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0);
}
@@ -6302,7 +6422,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
bool *overload)
{
unsigned long load;
- int i;
+ int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
@@ -6319,7 +6439,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
- if (rq->nr_running > 1)
+ nr_running = rq->nr_running;
+ if (nr_running > 1)
*overload = true;
#ifdef CONFIG_NUMA_BALANCING
@@ -6327,7 +6448,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
sgs->sum_weighted_load += weighted_cpuload(i);
- if (idle_cpu(i))
+ /*
+ * No need to call idle_cpu() if nr_running is not 0
+ */
+ if (!nr_running && idle_cpu(i))
sgs->idle_cpus++;
}
@@ -7248,8 +7372,6 @@ static int idle_balance(struct rq *this_rq)
int pulled_task = 0;
u64 curr_cost = 0;
- idle_enter_fair(this_rq);
-
/*
* We must set idle_stamp _before_ calling idle_balance(), such that we
* measure the duration of idle_balance() as idle time.
@@ -7330,10 +7452,8 @@ out:
if (this_rq->nr_running != this_rq->cfs.h_nr_running)
pulled_task = -1;
- if (pulled_task) {
- idle_exit_fair(this_rq);
+ if (pulled_task)
this_rq->idle_stamp = 0;
- }
return pulled_task;
}
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index c4ae0f1fdf9b..47ce94931f1b 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -47,7 +47,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
- idle_exit_fair(rq);
rq_last_tick_reset(rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b242775bf670..10f16374df7f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -248,7 +248,12 @@ struct task_group {
unsigned long shares;
#ifdef CONFIG_SMP
- atomic_long_t load_avg;
+ /*
+ * load_avg can be heavily contended at clock tick time, so put
+ * it in its own cacheline separated from the fields above which
+ * will also be accessed at each tick.
+ */
+ atomic_long_t load_avg ____cacheline_aligned;
#endif
#endif
@@ -335,7 +340,15 @@ extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
-#endif
+
+#ifdef CONFIG_SMP
+extern void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next);
+#else /* !CONFIG_SMP */
+static inline void set_task_rq_fair(struct sched_entity *se,
+ struct cfs_rq *prev, struct cfs_rq *next) { }
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */
@@ -933,6 +946,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
+ set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu];
#endif
@@ -1076,7 +1090,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
* In particular, the load of prev->state in finish_task_switch() must
* happen before this.
*
- * Pairs with the control dependency and rmb in try_to_wake_up().
+ * Pairs with the smp_cond_acquire() in try_to_wake_up().
*/
smp_store_release(&prev->on_cpu, 0);
#endif
@@ -1113,46 +1127,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WEIGHT_IDLEPRIO 3
#define WMULT_IDLEPRIO 1431655765
-/*
- * Nice levels are multiplicative, with a gentle 10% change for every
- * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
- * nice 1, it will get ~10% less CPU time than another CPU-bound task
- * that remained on nice 0.
- *
- * The "10% effect" is relative and cumulative: from _any_ nice level,
- * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
- * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
- * If a task goes up by ~10% and another task goes down by ~10% then
- * the relative distance between them is ~25%.)
- */
-static const int prio_to_weight[40] = {
- /* -20 */ 88761, 71755, 56483, 46273, 36291,
- /* -15 */ 29154, 23254, 18705, 14949, 11916,
- /* -10 */ 9548, 7620, 6100, 4904, 3906,
- /* -5 */ 3121, 2501, 1991, 1586, 1277,
- /* 0 */ 1024, 820, 655, 526, 423,
- /* 5 */ 335, 272, 215, 172, 137,
- /* 10 */ 110, 87, 70, 56, 45,
- /* 15 */ 36, 29, 23, 18, 15,
-};
-
-/*
- * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
- *
- * In cases where the weight does not change often, we can use the
- * precalculated inverse to speed up arithmetics by turning divisions
- * into multiplications:
- */
-static const u32 prio_to_wmult[40] = {
- /* -20 */ 48388, 59856, 76040, 92818, 118348,
- /* -15 */ 147320, 184698, 229616, 287308, 360437,
- /* -10 */ 449829, 563644, 704093, 875809, 1099582,
- /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
- /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
- /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
- /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
- /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
-};
+extern const int sched_prio_to_weight[40];
+extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_HEAD 0x02
@@ -1252,16 +1228,8 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq);
-extern void idle_enter_fair(struct rq *this_rq);
-extern void idle_exit_fair(struct rq *this_rq);
-
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
-#else
-
-static inline void idle_enter_fair(struct rq *rq) { }
-static inline void idle_exit_fair(struct rq *rq) { }
-
#endif
#ifdef CONFIG_CPU_IDLE
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index a3bbaee77c58..edb6de4f5908 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -28,7 +28,6 @@
*/
struct cpu_stop_done {
atomic_t nr_todo; /* nr left to execute */
- bool executed; /* actually executed? */
int ret; /* collected return value */
struct completion completion; /* fired if nr_todo reaches 0 */
};
@@ -63,14 +62,10 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
}
/* signal completion unless @done is NULL */
-static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
+static void cpu_stop_signal_done(struct cpu_stop_done *done)
{
- if (done) {
- if (executed)
- done->executed = true;
- if (atomic_dec_and_test(&done->nr_todo))
- complete(&done->completion);
- }
+ if (atomic_dec_and_test(&done->nr_todo))
+ complete(&done->completion);
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
@@ -81,17 +76,21 @@ static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
}
/* queue @work to @stopper. if offline, @work is completed immediately */
-static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
+static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
unsigned long flags;
+ bool enabled;
spin_lock_irqsave(&stopper->lock, flags);
- if (stopper->enabled)
+ enabled = stopper->enabled;
+ if (enabled)
__cpu_stop_queue_work(stopper, work);
- else
- cpu_stop_signal_done(work->done, false);
+ else if (work->done)
+ cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags);
+
+ return enabled;
}
/**
@@ -124,9 +123,10 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
cpu_stop_init_done(&done, 1);
- cpu_stop_queue_work(cpu, &work);
+ if (!cpu_stop_queue_work(cpu, &work))
+ return -ENOENT;
wait_for_completion(&done.completion);
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/* This controls the threads on each CPU. */
@@ -258,7 +258,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
struct cpu_stop_work work1, work2;
struct multi_stop_data msdata;
- preempt_disable();
msdata = (struct multi_stop_data){
.fn = fn,
.data = arg,
@@ -277,16 +276,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
if (cpu1 > cpu2)
swap(cpu1, cpu2);
- if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) {
- preempt_enable();
+ if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
return -ENOENT;
- }
-
- preempt_enable();
wait_for_completion(&done.completion);
-
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/**
@@ -302,23 +296,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
*
* CONTEXT:
* Don't care.
+ *
+ * RETURNS:
+ * true if cpu_stop_work was queued successfully and @fn will be called,
+ * false otherwise.
*/
-void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
- cpu_stop_queue_work(cpu, work_buf);
+ return cpu_stop_queue_work(cpu, work_buf);
}
/* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex);
-static void queue_stop_cpus_work(const struct cpumask *cpumask,
+static bool queue_stop_cpus_work(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg,
struct cpu_stop_done *done)
{
struct cpu_stop_work *work;
unsigned int cpu;
+ bool queued = false;
/*
* Disable preemption while queueing to avoid getting
@@ -331,9 +330,12 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
work->fn = fn;
work->arg = arg;
work->done = done;
- cpu_stop_queue_work(cpu, work);
+ if (cpu_stop_queue_work(cpu, work))
+ queued = true;
}
lg_global_unlock(&stop_cpus_lock);
+
+ return queued;
}
static int __stop_cpus(const struct cpumask *cpumask,
@@ -342,9 +344,10 @@ static int __stop_cpus(const struct cpumask *cpumask,
struct cpu_stop_done done;
cpu_stop_init_done(&done, cpumask_weight(cpumask));
- queue_stop_cpus_work(cpumask, fn, arg, &done);
+ if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
+ return -ENOENT;
wait_for_completion(&done.completion);
- return done.executed ? done.ret : -ENOENT;
+ return done.ret;
}
/**
@@ -432,7 +435,6 @@ static void cpu_stopper_thread(unsigned int cpu)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
struct cpu_stop_work *work;
- int ret;
repeat:
work = NULL;
@@ -448,23 +450,19 @@ repeat:
cpu_stop_fn_t fn = work->fn;
void *arg = work->arg;
struct cpu_stop_done *done = work->done;
- char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
-
- /* cpu stop callbacks are not allowed to sleep */
- preempt_disable();
+ int ret;
+ /* cpu stop callbacks must not sleep, make in_atomic() == T */
+ preempt_count_inc();
ret = fn(arg);
- if (ret)
- done->ret = ret;
-
- /* restore preemption and check it's still balanced */
- preempt_enable();
+ if (done) {
+ if (ret)
+ done->ret = ret;
+ cpu_stop_signal_done(done);
+ }
+ preempt_count_dec();
WARN_ONCE(preempt_count(),
- "cpu_stop: %s(%p) leaked preempt count\n",
- kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
- ksym_buf), arg);
-
- cpu_stop_signal_done(done, true);
+ "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
goto repeat;
}
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7fbba635a549..e840ed867a5d 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -271,11 +271,27 @@ static int alarmtimer_suspend(struct device *dev)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = alarmtimer_get_rtcdev();
+ if (rtc)
+ rtc_timer_cancel(rtc, &rtctimer);
+ return 0;
+}
+
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ return 0;
+}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
@@ -800,6 +816,7 @@ out:
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
+ .resume = alarmtimer_resume,
};
static struct platform_driver alarmtimer_driver = {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1347882d131e..664de539299b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -218,8 +218,8 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
- pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
- cs->name);
+ pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
+ smp_processor_id(), cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
watchdog->name, wdnow, wdlast, watchdog->mask);
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 149cc8086aea..36f2ca09aa5e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,8 +16,11 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
+#include <linux/math64.h>
#include "ntp_internal.h"
+#include "timekeeping_internal.h"
+
/*
* NTP timekeeping variables:
@@ -70,7 +73,7 @@ static long time_esterror = NTP_PHASE_LIMIT;
static s64 time_freq;
/* time at last adjustment (secs): */
-static long time_reftime;
+static time64_t time_reftime;
static long time_adjust;
@@ -297,25 +300,27 @@ static void ntp_update_offset(long offset)
if (!(time_status & STA_PLL))
return;
- if (!(time_status & STA_NANO))
+ if (!(time_status & STA_NANO)) {
+ /* Make sure the multiplication below won't overflow */
+ offset = clamp(offset, -USEC_PER_SEC, USEC_PER_SEC);
offset *= NSEC_PER_USEC;
+ }
/*
* Scale the phase adjustment and
* clamp to the operating range.
*/
- offset = min(offset, MAXPHASE);
- offset = max(offset, -MAXPHASE);
+ offset = clamp(offset, -MAXPHASE, MAXPHASE);
/*
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
- secs = get_seconds() - time_reftime;
+ secs = (long)(__ktime_get_real_seconds() - time_reftime);
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
- time_reftime = get_seconds();
+ time_reftime = __ktime_get_real_seconds();
offset64 = offset;
freq_adj = ntp_update_offset_fll(offset64, secs);
@@ -390,10 +395,11 @@ ktime_t ntp_get_next_leap(void)
*
* Also handles leap second processing, and returns leap offset
*/
-int second_overflow(unsigned long secs)
+int second_overflow(time64_t secs)
{
s64 delta;
int leap = 0;
+ s32 rem;
/*
* Leap second processing. If in leap-insert state at the end of the
@@ -404,19 +410,19 @@ int second_overflow(unsigned long secs)
case TIME_OK:
if (time_status & STA_INS) {
time_state = TIME_INS;
- ntp_next_leap_sec = secs + SECS_PER_DAY -
- (secs % SECS_PER_DAY);
+ div_s64_rem(secs, SECS_PER_DAY, &rem);
+ ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
} else if (time_status & STA_DEL) {
time_state = TIME_DEL;
- ntp_next_leap_sec = secs + SECS_PER_DAY -
- ((secs+1) % SECS_PER_DAY);
+ div_s64_rem(secs + 1, SECS_PER_DAY, &rem);
+ ntp_next_leap_sec = secs + SECS_PER_DAY - rem;
}
break;
case TIME_INS:
if (!(time_status & STA_INS)) {
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- } else if (secs % SECS_PER_DAY == 0) {
+ } else if (secs == ntp_next_leap_sec) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
@@ -427,7 +433,7 @@ int second_overflow(unsigned long secs)
if (!(time_status & STA_DEL)) {
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_OK;
- } else if ((secs + 1) % SECS_PER_DAY == 0) {
+ } else if (secs == ntp_next_leap_sec) {
leap = 1;
ntp_next_leap_sec = TIME64_MAX;
time_state = TIME_WAIT;
@@ -590,7 +596,7 @@ static inline void process_adj_status(struct timex *txc, struct timespec64 *ts)
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
- time_reftime = get_seconds();
+ time_reftime = __ktime_get_real_seconds();
/* only set allowed bits */
time_status &= STA_RONLY;
@@ -674,8 +680,14 @@ int ntp_validate_timex(struct timex *txc)
return -EINVAL;
}
- if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
- return -EPERM;
+ if (txc->modes & ADJ_SETOFFSET) {
+ /* In order to inject time, you gotta be super-user! */
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
+
+ if (!timeval_inject_offset_valid(&txc->time))
+ return -EINVAL;
+ }
/*
* Check for potential multiplication overflows that can
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index af924470eac0..d8a7c11fa71a 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -6,7 +6,7 @@ extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);
extern ktime_t ntp_get_next_leap(void);
-extern int second_overflow(unsigned long secs);
+extern int second_overflow(time64_t secs);
extern int ntp_validate_timex(struct timex *);
extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7aa2e8..9cff0ab82b63 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf,
static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
{
struct posix_clock *clk = get_posix_clock(fp);
- int result = 0;
+ unsigned int result = 0;
if (!clk)
- return -ENODEV;
+ return POLLERR;
if (clk->ops.poll)
result = clk->ops.poll(clk, fp, wait);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7c7ec4515983..9cc20af58c76 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -143,7 +143,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
}
@@ -430,7 +430,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
tick_do_update_jiffies64(now);
local_irq_restore(flags);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
/*
@@ -603,15 +603,31 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
/*
* If the tick is due in the next period, keep it ticking or
- * restart it proper.
+ * force prod the timer.
*/
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
tick.tv64 = 0;
+ /*
+ * We've not stopped the tick yet, and there's a timer in the
+ * next period, so no point in stopping it either, bail.
+ */
if (!ts->tick_stopped)
goto out;
+
+ /*
+ * If, OTOH, we did stop it, but there's a pending (expired)
+ * timer reprogram the timer hardware to fire now.
+ *
+ * We will not restart the tick proper, just prod the timer
+ * hardware into firing an interrupt to process the pending
+ * timers. Just like tick_irq_exit() will not restart the tick
+ * for 'normal' interrupts.
+ *
+ * Only once we exit the idle loop will we re-enable the tick,
+ * see tick_nohz_idle_exit().
+ */
if (delta == 0) {
- /* Tick is stopped, but required now. Enforce it */
tick_nohz_restart(ts, now);
goto out;
}
@@ -694,14 +710,14 @@ out:
return tick;
}
-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- update_cpu_load_nohz();
+ update_cpu_load_nohz(active);
calc_load_exit_idle();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
@@ -725,7 +741,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (can_stop_full_tick())
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get());
+ tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
#endif
}
@@ -875,7 +891,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
- if (vtime_accounting_enabled())
+ if (vtime_accounting_cpu_enabled())
return;
/*
* We stopped the tick in idle. Update process times would miss the
@@ -916,7 +932,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, now);
+ tick_nohz_restart_sched_tick(ts, now, 0);
tick_nohz_account_idle_ticks(ts);
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d563c1960302..34b4cedfa80d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -305,8 +305,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
delta = timekeeping_get_delta(tkr);
- nsec = delta * tkr->mult + tkr->xtime_nsec;
- nsec >>= tkr->shift;
+ nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
/* If arch requires, add in get_arch_timeoffset() */
return nsec + arch_gettimeoffset();
@@ -846,6 +845,19 @@ time64_t ktime_get_real_seconds(void)
}
EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
+/**
+ * __ktime_get_real_seconds - The same as ktime_get_real_seconds
+ * but without the sequence counter protect. This internal function
+ * is called just when timekeeping lock is already held.
+ */
+time64_t __ktime_get_real_seconds(void)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ return tk->xtime_sec;
+}
+
+
#ifdef CONFIG_NTP_PPS
/**
@@ -959,7 +971,7 @@ int timekeeping_inject_offset(struct timespec *ts)
struct timespec64 ts64, tmp;
int ret = 0;
- if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ if (!timespec_inject_offset_valid(ts))
return -EINVAL;
ts64 = timespec_to_timespec64(*ts);
@@ -1592,9 +1604,12 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
{
s64 interval = tk->cycle_interval;
s64 xinterval = tk->xtime_interval;
+ u32 base = tk->tkr_mono.clock->mult;
+ u32 max = tk->tkr_mono.clock->maxadj;
+ u32 cur_adj = tk->tkr_mono.mult;
s64 tick_error;
bool negative;
- u32 adj;
+ u32 adj_scale;
/* Remove any current error adj from freq calculation */
if (tk->ntp_err_mult)
@@ -1613,13 +1628,33 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
/* preserve the direction of correction */
negative = (tick_error < 0);
- /* Sort out the magnitude of the correction */
+ /* If any adjustment would pass the max, just return */
+ if (negative && (cur_adj - 1) <= (base - max))
+ return;
+ if (!negative && (cur_adj + 1) >= (base + max))
+ return;
+ /*
+ * Sort out the magnitude of the correction, but
+ * avoid making so large a correction that we go
+ * over the max adjustment.
+ */
+ adj_scale = 0;
tick_error = abs(tick_error);
- for (adj = 0; tick_error > interval; adj++)
+ while (tick_error > interval) {
+ u32 adj = 1 << (adj_scale + 1);
+
+ /* Check if adjustment gets us within 1 unit from the max */
+ if (negative && (cur_adj - adj) <= (base - max))
+ break;
+ if (!negative && (cur_adj + adj) >= (base + max))
+ break;
+
+ adj_scale++;
tick_error >>= 1;
+ }
/* scale the corrections */
- timekeeping_apply_adjustment(tk, offset, negative, adj);
+ timekeeping_apply_adjustment(tk, offset, negative, adj_scale);
}
/*
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ea005a7f9da..5be76270ec4a 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -17,7 +17,11 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
{
cycle_t ret = (now - last) & mask;
- return (s64) ret > 0 ? ret : 0;
+ /*
+ * Prevent time going backwards by checking the MSB of mask in
+ * the result. If set, return 0.
+ */
+ return ret & ~(mask >> 1) ? 0 : ret;
}
#else
static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
@@ -26,4 +30,6 @@ static inline cycle_t clocksource_delta(cycle_t now, cycle_t last, cycle_t mask)
}
#endif
+extern time64_t __ktime_get_real_seconds(void);
+
#endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 1c2b28536feb..060df67dbdd1 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -273,6 +273,7 @@ static const char **find_next(void *v, loff_t *pos)
if (*pos < last_index + start_index)
return __start___tracepoint_str + (*pos - last_index);
+ start_index += last_index;
return find_next_mod_format(start_index, v, fmt, pos);
}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18f34cf75f74..b3ace6ebbba3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -20,6 +20,7 @@
#include <linux/smpboot.h>
#include <linux/sched/rt.h>
#include <linux/tick.h>
+#include <linux/workqueue.h>
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
@@ -225,7 +226,15 @@ static void __touch_watchdog(void)
__this_cpu_write(watchdog_touch_ts, get_timestamp());
}
-void touch_softlockup_watchdog(void)
+/**
+ * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
+ *
+ * Call when the scheduler may have stalled for legitimate reasons
+ * preventing the watchdog task from executing - e.g. the scheduler
+ * entering idle state. This should only be used for scheduler events.
+ * Use touch_softlockup_watchdog() for everything else.
+ */
+void touch_softlockup_watchdog_sched(void)
{
/*
* Preemption can be enabled. It doesn't matter which CPU's timestamp
@@ -233,6 +242,12 @@ void touch_softlockup_watchdog(void)
*/
raw_cpu_write(watchdog_touch_ts, 0);
}
+
+void touch_softlockup_watchdog(void)
+{
+ touch_softlockup_watchdog_sched();
+ wq_watchdog_touch(raw_smp_processor_id());
+}
EXPORT_SYMBOL(touch_softlockup_watchdog);
void touch_all_softlockup_watchdogs(void)
@@ -246,6 +261,7 @@ void touch_all_softlockup_watchdogs(void)
*/
for_each_watchdog_cpu(cpu)
per_cpu(watchdog_touch_ts, cpu) = 0;
+ wq_watchdog_touch(-1);
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR
@@ -351,7 +367,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
- panic("Hard LOCKUP");
+ nmi_panic(regs, "Hard LOCKUP");
__this_cpu_write(hard_watchdog_warn, true);
return;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c579dbab2e36..61a0264e28f9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
int id; /* I: pool ID */
unsigned int flags; /* X: flags */
+ unsigned long watchdog_ts; /* L: watchdog timestamp */
+
struct list_head worklist; /* L: list of pending works */
int nr_workers; /* L: total number of workers */
@@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
struct pool_workqueue *pwq = get_work_pwq(work);
trace_workqueue_activate_work(work);
+ if (list_empty(&pwq->pool->worklist))
+ pwq->pool->watchdog_ts = jiffies;
move_linked_works(work, &pwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
pwq->nr_active++;
@@ -1385,6 +1389,8 @@ retry:
trace_workqueue_activate_work(work);
pwq->nr_active++;
worklist = &pwq->pool->worklist;
+ if (list_empty(worklist))
+ pwq->pool->watchdog_ts = jiffies;
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &pwq->delayed_works;
@@ -2157,6 +2163,8 @@ recheck:
list_first_entry(&pool->worklist,
struct work_struct, entry);
+ pool->watchdog_ts = jiffies;
+
if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
/* optimization path, not strictly necessary */
process_one_work(worker, work);
@@ -2240,6 +2248,7 @@ repeat:
struct pool_workqueue, mayday_node);
struct worker_pool *pool = pwq->pool;
struct work_struct *work, *n;
+ bool first = true;
__set_current_state(TASK_RUNNING);
list_del_init(&pwq->mayday_node);
@@ -2256,9 +2265,14 @@ repeat:
* process'em.
*/
WARN_ON_ONCE(!list_empty(scheduled));
- list_for_each_entry_safe(work, n, &pool->worklist, entry)
- if (get_work_pwq(work) == pwq)
+ list_for_each_entry_safe(work, n, &pool->worklist, entry) {
+ if (get_work_pwq(work) == pwq) {
+ if (first)
+ pool->watchdog_ts = jiffies;
move_linked_works(work, scheduled, &n);
+ }
+ first = false;
+ }
if (!list_empty(scheduled)) {
process_scheduled_works(rescuer);
@@ -2316,6 +2330,37 @@ repeat:
goto repeat;
}
+/**
+ * check_flush_dependency - check for flush dependency sanity
+ * @target_wq: workqueue being flushed
+ * @target_work: work item being flushed (NULL for workqueue flushes)
+ *
+ * %current is trying to flush the whole @target_wq or @target_work on it.
+ * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
+ * reclaiming memory or running on a workqueue which doesn't have
+ * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
+ * a deadlock.
+ */
+static void check_flush_dependency(struct workqueue_struct *target_wq,
+ struct work_struct *target_work)
+{
+ work_func_t target_func = target_work ? target_work->func : NULL;
+ struct worker *worker;
+
+ if (target_wq->flags & WQ_MEM_RECLAIM)
+ return;
+
+ worker = current_wq_worker();
+
+ WARN_ONCE(current->flags & PF_MEMALLOC,
+ "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
+ current->pid, current->comm, target_wq->name, target_func);
+ WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+ "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
+ worker->current_pwq->wq->name, worker->current_func,
+ target_wq->name, target_func);
+}
+
struct wq_barrier {
struct work_struct work;
struct completion done;
@@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
}
+ check_flush_dependency(wq, NULL);
+
mutex_unlock(&wq->mutex);
wait_for_completion(&this_flusher.done);
@@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
pwq = worker->current_pwq;
}
+ check_flush_dependency(pwq->wq, work);
+
insert_wq_barrier(pwq, barr, work, worker);
spin_unlock_irq(&pool->lock);
@@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
pool->cpu = -1;
pool->node = NUMA_NO_NODE;
pool->flags |= POOL_DISASSOCIATED;
+ pool->watchdog_ts = jiffies;
INIT_LIST_HEAD(&pool->worklist);
INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
@@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs)
{
struct apply_wqattrs_ctx *ctx;
- int ret = -ENOMEM;
/* only unbound workqueues can change attributes */
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
@@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
return -EINVAL;
ctx = apply_wqattrs_prepare(wq, attrs);
+ if (!ctx)
+ return -ENOMEM;
/* the ctx has been prepared successfully, let's commit it */
- if (ctx) {
- apply_wqattrs_commit(ctx);
- ret = 0;
- }
-
+ apply_wqattrs_commit(ctx);
apply_wqattrs_cleanup(ctx);
- return ret;
+ return 0;
}
/**
@@ -4308,7 +4355,9 @@ void show_workqueue_state(void)
pr_info("pool %d:", pool->id);
pr_cont_pool_info(pool);
- pr_cont(" workers=%d", pool->nr_workers);
+ pr_cont(" hung=%us workers=%d",
+ jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+ pool->nr_workers);
if (pool->manager)
pr_cont(" manager: %d",
task_pid_nr(pool->manager->task));
@@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
#endif /* CONFIG_SYSFS */
+/*
+ * Workqueue watchdog.
+ *
+ * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
+ * flush dependency, a concurrency managed work item which stays RUNNING
+ * indefinitely. Workqueue stalls can be very difficult to debug as the
+ * usual warning mechanisms don't trigger and internal workqueue state is
+ * largely opaque.
+ *
+ * Workqueue watchdog monitors all worker pools periodically and dumps
+ * state if some pools failed to make forward progress for a while where
+ * forward progress is defined as the first item on ->worklist changing.
+ *
+ * This mechanism is controlled through the kernel parameter
+ * "workqueue.watchdog_thresh" which can be updated at runtime through the
+ * corresponding sysfs parameter file.
+ */
+#ifdef CONFIG_WQ_WATCHDOG
+
+static void wq_watchdog_timer_fn(unsigned long data);
+
+static unsigned long wq_watchdog_thresh = 30;
+static struct timer_list wq_watchdog_timer =
+ TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
+
+static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+
+static void wq_watchdog_reset_touched(void)
+{
+ int cpu;
+
+ wq_watchdog_touched = jiffies;
+ for_each_possible_cpu(cpu)
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+}
+
+static void wq_watchdog_timer_fn(unsigned long data)
+{
+ unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
+ bool lockup_detected = false;
+ struct worker_pool *pool;
+ int pi;
+
+ if (!thresh)
+ return;
+
+ rcu_read_lock();
+
+ for_each_pool(pool, pi) {
+ unsigned long pool_ts, touched, ts;
+
+ if (list_empty(&pool->worklist))
+ continue;
+
+ /* get the latest of pool and touched timestamps */
+ pool_ts = READ_ONCE(pool->watchdog_ts);
+ touched = READ_ONCE(wq_watchdog_touched);
+
+ if (time_after(pool_ts, touched))
+ ts = pool_ts;
+ else
+ ts = touched;
+
+ if (pool->cpu >= 0) {
+ unsigned long cpu_touched =
+ READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
+ pool->cpu));
+ if (time_after(cpu_touched, ts))
+ ts = cpu_touched;
+ }
+
+ /* did we stall? */
+ if (time_after(jiffies, ts + thresh)) {
+ lockup_detected = true;
+ pr_emerg("BUG: workqueue lockup - pool");
+ pr_cont_pool_info(pool);
+ pr_cont(" stuck for %us!\n",
+ jiffies_to_msecs(jiffies - pool_ts) / 1000);
+ }
+ }
+
+ rcu_read_unlock();
+
+ if (lockup_detected)
+ show_workqueue_state();
+
+ wq_watchdog_reset_touched();
+ mod_timer(&wq_watchdog_timer, jiffies + thresh);
+}
+
+void wq_watchdog_touch(int cpu)
+{
+ if (cpu >= 0)
+ per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+ else
+ wq_watchdog_touched = jiffies;
+}
+
+static void wq_watchdog_set_thresh(unsigned long thresh)
+{
+ wq_watchdog_thresh = 0;
+ del_timer_sync(&wq_watchdog_timer);
+
+ if (thresh) {
+ wq_watchdog_thresh = thresh;
+ wq_watchdog_reset_touched();
+ mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
+ }
+}
+
+static int wq_watchdog_param_set_thresh(const char *val,
+ const struct kernel_param *kp)
+{
+ unsigned long thresh;
+ int ret;
+
+ ret = kstrtoul(val, 0, &thresh);
+ if (ret)
+ return ret;
+
+ if (system_wq)
+ wq_watchdog_set_thresh(thresh);
+ else
+ wq_watchdog_thresh = thresh;
+
+ return 0;
+}
+
+static const struct kernel_param_ops wq_watchdog_thresh_ops = {
+ .set = wq_watchdog_param_set_thresh,
+ .get = param_get_ulong,
+};
+
+module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
+ 0644);
+
+static void wq_watchdog_init(void)
+{
+ wq_watchdog_set_thresh(wq_watchdog_thresh);
+}
+
+#else /* CONFIG_WQ_WATCHDOG */
+
+static inline void wq_watchdog_init(void) { }
+
+#endif /* CONFIG_WQ_WATCHDOG */
+
static void __init wq_numa_init(void)
{
cpumask_var_t *tbl;
@@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)
!system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq);
+
+ wq_watchdog_init();
+
return 0;
}
early_initcall(init_workqueues);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29d5adc..c98e93c0a084 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -812,6 +812,17 @@ config BOOTPARAM_HUNG_TASK_PANIC_VALUE
default 0 if !BOOTPARAM_HUNG_TASK_PANIC
default 1 if BOOTPARAM_HUNG_TASK_PANIC
+config WQ_WATCHDOG
+ bool "Detect Workqueue Stalls"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here to enable stall detection on workqueues. If a
+ worker pool doesn't make forward progress on a pending work
+ item for over a given amount of time, 30s by default, a
+ warning message is printed along with dump of workqueue
+ state. This can be configured through kernel parameter
+ "workqueue.watchdog_thresh" and its sysfs counterpart.
+
endmenu # "Debug lockups and hangs"
config PANIC_ON_OOPS
@@ -1523,8 +1534,7 @@ config FAIL_IO_TIMEOUT
config FAIL_MMC_REQUEST
bool "Fault-injection capability for MMC IO"
- select DEBUG_FS
- depends on FAULT_INJECTION && MMC
+ depends on FAULT_INJECTION_DEBUG_FS && MMC
help
Provide fault-injection capability for MMC IO.
This will make the mmc core return data errors. This is
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 83c33a5bcffb..d62de8bf022d 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -16,6 +16,10 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
+#ifdef CONFIG_X86
+#include <asm/processor.h> /* for boot_cpu_has below */
+#endif
+
#define TEST(bit, op, c_op, val) \
do { \
atomic##bit##_set(&v, v0); \
@@ -27,6 +31,65 @@ do { \
(unsigned long long)r); \
} while (0)
+/*
+ * Test for a atomic operation family,
+ * @test should be a macro accepting parameters (bit, op, ...)
+ */
+
+#define FAMILY_TEST(test, bit, op, args...) \
+do { \
+ test(bit, op, ##args); \
+ test(bit, op##_acquire, ##args); \
+ test(bit, op##_release, ##args); \
+ test(bit, op##_relaxed, ##args); \
+} while (0)
+
+#define TEST_RETURN(bit, op, c_op, val) \
+do { \
+ atomic##bit##_set(&v, v0); \
+ r = v0; \
+ r c_op val; \
+ BUG_ON(atomic##bit##_##op(val, &v) != r); \
+ BUG_ON(atomic##bit##_read(&v) != r); \
+} while (0)
+
+#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
+do { \
+ FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
+} while (0)
+
+#define TEST_ARGS(bit, op, init, ret, expect, args...) \
+do { \
+ atomic##bit##_set(&v, init); \
+ BUG_ON(atomic##bit##_##op(&v, ##args) != ret); \
+ BUG_ON(atomic##bit##_read(&v) != expect); \
+} while (0)
+
+#define XCHG_FAMILY_TEST(bit, init, new) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, xchg, init, init, new, new); \
+} while (0)
+
+#define CMPXCHG_FAMILY_TEST(bit, init, new, wrong) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
+ init, init, new, init, new); \
+ FAMILY_TEST(TEST_ARGS, bit, cmpxchg, \
+ init, init, init, wrong, new); \
+} while (0)
+
+#define INC_RETURN_FAMILY_TEST(bit, i) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, inc_return, \
+ i, (i) + one, (i) + one); \
+} while (0)
+
+#define DEC_RETURN_FAMILY_TEST(bit, i) \
+do { \
+ FAMILY_TEST(TEST_ARGS, bit, dec_return, \
+ i, (i) - one, (i) - one); \
+} while (0)
+
static __init void test_atomic(void)
{
int v0 = 0xaaa31337;
@@ -45,6 +108,18 @@ static __init void test_atomic(void)
TEST(, and, &=, v1);
TEST(, xor, ^=, v1);
TEST(, andnot, &= ~, v1);
+
+ RETURN_FAMILY_TEST(, add_return, +=, onestwos);
+ RETURN_FAMILY_TEST(, add_return, +=, -one);
+ RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
+ RETURN_FAMILY_TEST(, sub_return, -=, -one);
+
+ INC_RETURN_FAMILY_TEST(, v0);
+ DEC_RETURN_FAMILY_TEST(, v0);
+
+ XCHG_FAMILY_TEST(, v0, v1);
+ CMPXCHG_FAMILY_TEST(, v0, v1, onestwos);
+
}
#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
@@ -74,25 +149,10 @@ static __init void test_atomic64(void)
TEST(64, xor, ^=, v1);
TEST(64, andnot, &= ~, v1);
- INIT(v0);
- r += onestwos;
- BUG_ON(atomic64_add_return(onestwos, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r += -one;
- BUG_ON(atomic64_add_return(-one, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r -= onestwos;
- BUG_ON(atomic64_sub_return(onestwos, &v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
- r -= -one;
- BUG_ON(atomic64_sub_return(-one, &v) != r);
- BUG_ON(v.counter != r);
+ RETURN_FAMILY_TEST(64, add_return, +=, onestwos);
+ RETURN_FAMILY_TEST(64, add_return, +=, -one);
+ RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
+ RETURN_FAMILY_TEST(64, sub_return, -=, -one);
INIT(v0);
atomic64_inc(&v);
@@ -100,33 +160,15 @@ static __init void test_atomic64(void)
BUG_ON(v.counter != r);
INIT(v0);
- r += one;
- BUG_ON(atomic64_inc_return(&v) != r);
- BUG_ON(v.counter != r);
-
- INIT(v0);
atomic64_dec(&v);
r -= one;
BUG_ON(v.counter != r);
- INIT(v0);
- r -= one;
- BUG_ON(atomic64_dec_return(&v) != r);
- BUG_ON(v.counter != r);
+ INC_RETURN_FAMILY_TEST(64, v0);
+ DEC_RETURN_FAMILY_TEST(64, v0);
- INIT(v0);
- BUG_ON(atomic64_xchg(&v, v1) != v0);
- r = v1;
- BUG_ON(v.counter != r);
-
- INIT(v0);
- BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
- r = v1;
- BUG_ON(v.counter != r);
-
- INIT(v0);
- BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
- BUG_ON(v.counter != r);
+ XCHG_FAMILY_TEST(64, v0, v1);
+ CMPXCHG_FAMILY_TEST(64, v0, v1, v2);
INIT(v0);
BUG_ON(atomic64_add_unless(&v, one, v0));
diff --git a/lib/list_debug.c b/lib/list_debug.c
index c24c2f7e296f..3859bf63561c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -37,7 +37,7 @@ void __list_add(struct list_head *new,
next->prev = new;
new->next = next;
new->prev = prev;
- prev->next = new;
+ WRITE_ONCE(prev->next, new);
}
EXPORT_SYMBOL(__list_add);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 3b6380784c28..91e32bc8517f 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -33,6 +33,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
+unsigned long long max_possible_pfn;
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
diff --git a/mm/mremap.c b/mm/mremap.c
index c25bc6268e46..de824e72c3e8 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -319,6 +319,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
+ /* Tell pfnmap has moved from this vma */
+ if (unlikely(vma->vm_flags & VM_PFNMAP))
+ untrack_pfn_moved(vma);
+
if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index e57cf24babd6..99feb2b07fc5 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(contig_page_data);
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
+unsigned long long max_possible_pfn;
static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)
diff --git a/mm/shmem.c b/mm/shmem.c
index 2afcdbbdb685..5813b7fa85b6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2438,7 +2438,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
int len;
struct inode *inode;
struct page *page;
- char *kaddr;
struct shmem_inode_info *info;
len = strlen(symname) + 1;
@@ -2477,9 +2476,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
}
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
- kaddr = kmap_atomic(page);
- memcpy(kaddr, symname, len);
- kunmap_atomic(kaddr);
+ inode_nohighmem(inode);
+ memcpy(page_address(page), symname, len);
SetPageUptodate(page);
set_page_dirty(page);
unlock_page(page);
@@ -2492,23 +2490,34 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
return 0;
}
-static const char *shmem_follow_link(struct dentry *dentry, void **cookie)
+static void shmem_put_link(void *arg)
{
- struct page *page = NULL;
- int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
- if (error)
- return ERR_PTR(error);
- unlock_page(page);
- *cookie = page;
- return kmap(page);
+ mark_page_accessed(arg);
+ put_page(arg);
}
-static void shmem_put_link(struct inode *unused, void *cookie)
+static const char *shmem_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
{
- struct page *page = cookie;
- kunmap(page);
- mark_page_accessed(page);
- page_cache_release(page);
+ struct page *page = NULL;
+ int error;
+ if (!dentry) {
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+ if (!PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+ } else {
+ error = shmem_getpage(inode, 0, &page, SGP_READ, NULL);
+ if (error)
+ return ERR_PTR(error);
+ unlock_page(page);
+ }
+ set_delayed_call(done, shmem_put_link, page);
+ return page_address(page);
}
#ifdef CONFIG_TMPFS_XATTR
@@ -2555,122 +2564,74 @@ static int shmem_initxattrs(struct inode *inode,
return 0;
}
-static const struct xattr_handler *shmem_xattr_handlers[] = {
-#ifdef CONFIG_TMPFS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
- NULL
-};
-
-static int shmem_xattr_validate(const char *name)
-{
- struct { const char *prefix; size_t len; } arr[] = {
- { XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN },
- { XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN }
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(arr); i++) {
- size_t preflen = arr[i].len;
- if (strncmp(name, arr[i].prefix, preflen) == 0) {
- if (!name[preflen])
- return -EINVAL;
- return 0;
- }
- }
- return -EOPNOTSUPP;
-}
-
-static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
+static int shmem_xattr_handler_get(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
-
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+ name = xattr_full_name(handler, name);
return simple_xattr_get(&info->xattrs, name, buffer, size);
}
-static int shmem_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
+static int shmem_xattr_handler_set(const struct xattr_handler *handler,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+ name = xattr_full_name(handler, name);
return simple_xattr_set(&info->xattrs, name, value, size, flags);
}
-static int shmem_removexattr(struct dentry *dentry, const char *name)
-{
- struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- int err;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
+static const struct xattr_handler shmem_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
- err = shmem_xattr_validate(name);
- if (err)
- return err;
+static const struct xattr_handler shmem_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
- return simple_xattr_remove(&info->xattrs, name);
-}
+static const struct xattr_handler *shmem_xattr_handlers[] = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+#endif
+ &shmem_security_xattr_handler,
+ &shmem_trusted_xattr_handler,
+ NULL
+};
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(&info->xattrs, buffer, size);
+ return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
static const struct inode_operations shmem_short_symlink_operations = {
.readlink = generic_readlink,
- .follow_link = simple_follow_link,
+ .get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
};
static const struct inode_operations shmem_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = shmem_follow_link,
- .put_link = shmem_put_link,
+ .get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
};
@@ -3142,10 +3103,10 @@ static const struct inode_operations shmem_inode_operations = {
.getattr = shmem_getattr,
.setattr = shmem_setattr,
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
.set_acl = simple_set_acl,
#endif
};
@@ -3164,10 +3125,10 @@ static const struct inode_operations shmem_dir_inode_operations = {
.tmpfile = shmem_tmpfile,
#endif
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
.setattr = shmem_setattr,
@@ -3177,10 +3138,10 @@ static const struct inode_operations shmem_dir_inode_operations = {
static const struct inode_operations shmem_special_inode_operations = {
#ifdef CONFIG_TMPFS_XATTR
- .setxattr = shmem_setxattr,
- .getxattr = shmem_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = shmem_listxattr,
- .removexattr = shmem_removexattr,
+ .removexattr = generic_removexattr,
#endif
#ifdef CONFIG_TMPFS_POSIX_ACL
.setattr = shmem_setattr,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4ebc17d948cb..c54fd2924f25 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1483,6 +1483,7 @@ static void __init start_shepherd_timer(void)
BUG();
cpumask_copy(cpu_stat_off, cpu_online_mask);
+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
}
@@ -1550,7 +1551,6 @@ static int __init setup_vmstat(void)
start_shepherd_timer();
cpu_notifier_register_done();
- vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 12045dea276c..8a7ada8bb947 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -142,7 +142,10 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
struct net_bridge_port *p;
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (net_eq(dev_net(br->dev), &init_net))
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ else
+ r = -ENOENT;
spin_lock_bh(&br->lock);
diff --git a/net/core/dst.c b/net/core/dst.c
index e6dc77252fe9..a1656e3b8d72 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -301,12 +301,13 @@ void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
+ unsigned short nocache = dst->flags & DST_NOCACHE;
newrefcnt = atomic_dec_return(&dst->__refcnt);
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
+ if (!newrefcnt && unlikely(nocache))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 63e5be0abd86..bc35f1842512 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -601,8 +601,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, &fl4);
+ if (err < 0)
+ goto done;
+ }
if (!inet->hdrincl) {
rfv.msg = msg;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2d656eef7f8e..d4c51158470f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2478,6 +2478,9 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
int newly_acked_sacked = prior_unsacked -
(tp->packets_out - tp->sacked_out);
+ if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
+ return;
+
tp->prr_delivered += newly_acked_sacked;
if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0c7b0e61b917..c43890848641 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1025,8 +1025,11 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flow_flags,
faddr, saddr, dport, inet->inet_sport);
- if (!saddr && ipc.oif)
- l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (!saddr && ipc.oif) {
+ err = l3mdev_get_saddr(net, ipc.oif, fl4);
+ if (err < 0)
+ goto out;
+ }
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e82a1ad80aa5..16bc83b2842a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -658,8 +658,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a4631477cedf..ef05cd9403d4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -953,32 +953,20 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+ struct path *res)
{
- struct dentry *dentry;
- struct path path;
- int err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- return err;
+ int err;
- /*
- * All right, let's create it.
- */
- err = security_path_mknod(&path, dentry, mode, 0);
+ err = security_path_mknod(path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path.mnt);
+ res->mnt = mntget(path->mnt);
res->dentry = dget(dentry);
}
}
- done_path_create(&path, dentry);
+
return err;
}
@@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err;
+ int err, name_err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path;
+ struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ name_err = 0;
+ dentry = NULL;
+ if (sun_path[0]) {
+ /* Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+ if (IS_ERR(dentry)) {
+ /* delay report until after 'already bound' check */
+ name_err = PTR_ERR(dentry);
+ dentry = NULL;
+ }
+ }
+
err = mutex_lock_interruptible(&u->readlock);
if (err)
- goto out;
+ goto out_path;
err = -EINVAL;
if (u->addr)
goto out_up;
+ if (name_err) {
+ err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+ goto out_up;
+ }
+
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- struct path path;
+ if (dentry) {
+ struct path u_path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
+ err = unix_mknod(dentry, &path, mode, &u_path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = path;
+ u->path = u_path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -1063,6 +1073,10 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->readlock);
+out_path:
+ if (dentry)
+ done_path_create(&path, dentry);
+
out:
return err;
}
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 301d70b0174f..e167592793a7 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -586,7 +586,7 @@ main(int argc, char *argv[])
do_file(file);
break;
case SJ_FAIL: /* error in do_file or below */
- sprintf("%s: failed\n", file);
+ fprintf(stderr, "%s: failed\n", file);
++n_error;
break;
case SJ_SUCCEED: /* premature success */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ff81026f6ddb..37fdd5416a64 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1519,8 +1519,6 @@ static int smack_inode_getsecurity(const struct inode *inode,
* @inode: the object
* @buffer: where they go
* @buffer_size: size of buffer
- *
- * Returns 0 on success, -EINVAL otherwise
*/
static int smack_inode_listsecurity(struct inode *inode, char *buffer,
size_t buffer_size)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index fe96428aa403..3a89d82f8057 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -67,6 +67,10 @@ enum {
ALC_HEADSET_TYPE_OMTP,
};
+enum {
+ ALC_KEY_MICMUTE_INDEX,
+};
+
struct alc_customize_define {
unsigned int sku_cfg;
unsigned char port_connectivity;
@@ -123,6 +127,7 @@ struct alc_spec {
unsigned int pll_coef_idx, pll_coef_bit;
unsigned int coef0;
struct input_dev *kb_dev;
+ u8 alc_mute_keycode_map[1];
};
/*
@@ -3462,12 +3467,43 @@ static void gpio2_mic_hotkey_event(struct hda_codec *codec,
/* GPIO2 just toggles on a keypress/keyrelease cycle. Therefore
send both key on and key off event for every interrupt. */
- input_report_key(spec->kb_dev, KEY_MICMUTE, 1);
+ input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 1);
input_sync(spec->kb_dev);
- input_report_key(spec->kb_dev, KEY_MICMUTE, 0);
+ input_report_key(spec->kb_dev, spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX], 0);
input_sync(spec->kb_dev);
}
+static int alc_register_micmute_input_device(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+ int i;
+
+ spec->kb_dev = input_allocate_device();
+ if (!spec->kb_dev) {
+ codec_err(codec, "Out of memory (input_allocate_device)\n");
+ return -ENOMEM;
+ }
+
+ spec->alc_mute_keycode_map[ALC_KEY_MICMUTE_INDEX] = KEY_MICMUTE;
+
+ spec->kb_dev->name = "Microphone Mute Button";
+ spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
+ spec->kb_dev->keycodesize = sizeof(spec->alc_mute_keycode_map[0]);
+ spec->kb_dev->keycodemax = ARRAY_SIZE(spec->alc_mute_keycode_map);
+ spec->kb_dev->keycode = spec->alc_mute_keycode_map;
+ for (i = 0; i < ARRAY_SIZE(spec->alc_mute_keycode_map); i++)
+ set_bit(spec->alc_mute_keycode_map[i], spec->kb_dev->keybit);
+
+ if (input_register_device(spec->kb_dev)) {
+ codec_err(codec, "input_register_device failed\n");
+ input_free_device(spec->kb_dev);
+ spec->kb_dev = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -3485,20 +3521,8 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
struct alc_spec *spec = codec->spec;
if (action == HDA_FIXUP_ACT_PRE_PROBE) {
- spec->kb_dev = input_allocate_device();
- if (!spec->kb_dev) {
- codec_err(codec, "Out of memory (input_allocate_device)\n");
+ if (alc_register_micmute_input_device(codec) != 0)
return;
- }
- spec->kb_dev->name = "Microphone Mute Button";
- spec->kb_dev->evbit[0] = BIT_MASK(EV_KEY);
- spec->kb_dev->keybit[BIT_WORD(KEY_MICMUTE)] = BIT_MASK(KEY_MICMUTE);
- if (input_register_device(spec->kb_dev)) {
- codec_err(codec, "input_register_device failed\n");
- input_free_device(spec->kb_dev);
- spec->kb_dev = NULL;
- return;
- }
snd_hda_add_verbs(codec, gpio_init);
snd_hda_codec_write_cache(codec, codec->core.afg, 0,
@@ -3528,6 +3552,47 @@ static void alc280_fixup_hp_gpio2_mic_hotkey(struct hda_codec *codec,
}
}
+static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ /* Line2 = mic mute hotkey
+ GPIO2 = mic mute LED */
+ static const struct hda_verb gpio_init[] = {
+ { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 },
+ { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 },
+ {}
+ };
+
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ if (alc_register_micmute_input_device(codec) != 0)
+ return;
+
+ snd_hda_add_verbs(codec, gpio_init);
+ snd_hda_jack_detect_enable_callback(codec, 0x1b,
+ gpio2_mic_hotkey_event);
+
+ spec->gen.cap_sync_hook = alc_fixup_gpio_mic_mute_hook;
+ spec->gpio_led = 0;
+ spec->mute_led_polarity = 0;
+ spec->gpio_mic_led_mask = 0x04;
+ return;
+ }
+
+ if (!spec->kb_dev)
+ return;
+
+ switch (action) {
+ case HDA_FIXUP_ACT_PROBE:
+ spec->init_amp = ALC_INIT_DEFAULT;
+ break;
+ case HDA_FIXUP_ACT_FREE:
+ input_unregister_device(spec->kb_dev);
+ spec->kb_dev = NULL;
+ }
+}
+
static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
@@ -4628,6 +4693,7 @@ enum {
ALC275_FIXUP_DELL_XPS,
ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
ALC293_FIXUP_LENOVO_SPK_NOISE,
+ ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -5237,6 +5303,10 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_THINKPAD_ACPI
},
+ [ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc233_fixup_lenovo_line2_mic_hotkey,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5386,6 +5456,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+ SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index b3ea24d64c50..93b400800905 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1537,7 +1537,7 @@ static int arizona_hw_params(struct snd_pcm_substream *substream,
bool reconfig;
unsigned int aif_tx_state, aif_rx_state;
- if (params_rate(params) % 8000)
+ if (params_rate(params) % 4000)
rates = &arizona_44k1_bclk_rates[0];
else
rates = &arizona_48k_bclk_rates[0];
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index ef76940f9dcb..3e3c7f6be29d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -1667,9 +1667,13 @@ static int rt5645_spk_event(struct snd_soc_dapm_widget *w,
RT5645_PWR_CLS_D_L,
RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
RT5645_PWR_CLS_D_L);
+ snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+ RT5645_DET_CLK_MASK, RT5645_DET_CLK_MODE1);
break;
case SND_SOC_DAPM_PRE_PMD:
+ snd_soc_update_bits(codec, RT5645_GEN_CTRL3,
+ RT5645_DET_CLK_MASK, RT5645_DET_CLK_DIS);
snd_soc_write(codec, RT5645_EQ_CTRL2, 0);
snd_soc_update_bits(codec, RT5645_PWR_DIG1,
RT5645_PWR_CLS_D | RT5645_PWR_CLS_D_R |
diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h
index 093e46d559fb..205e0715c99a 100644
--- a/sound/soc/codecs/rt5645.h
+++ b/sound/soc/codecs/rt5645.h
@@ -2122,6 +2122,10 @@ enum {
/* General Control3 (0xfc) */
#define RT5645_JD_PSV_MODE (0x1 << 12)
#define RT5645_IRQ_CLK_GATE_CTRL (0x1 << 11)
+#define RT5645_DET_CLK_MASK (0x3 << 9)
+#define RT5645_DET_CLK_DIS (0x0 << 9)
+#define RT5645_DET_CLK_MODE1 (0x1 << 9)
+#define RT5645_DET_CLK_MODE2 (0x2 << 9)
#define RT5645_MICINDET_MANU (0x1 << 7)
#define RT5645_RING2_SLEEVE_GND (0x1 << 5)
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index ffea427aeca8..ad4d0f82603e 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -1240,7 +1240,6 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
*/
ret = snd_soc_tplg_component_load(&platform->component,
&skl_tplg_ops, fw, 0);
- release_firmware(fw);
if (ret < 0) {
dev_err(bus->dev, "tplg component load failed%d\n", ret);
return -EINVAL;
@@ -1249,5 +1248,7 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
skl->resource.max_mcps = SKL_MAX_MCPS;
skl->resource.max_mem = SKL_FW_MAX_MEM;
+ skl->tplg = fw;
+
return 0;
}
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 5319529aedf7..caa69c4598a6 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -25,6 +25,7 @@
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/platform_device.h>
+#include <linux/firmware.h>
#include <sound/pcm.h>
#include "skl.h"
@@ -520,6 +521,9 @@ static void skl_remove(struct pci_dev *pci)
struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
struct skl *skl = ebus_to_skl(ebus);
+ if (skl->tplg)
+ release_firmware(skl->tplg);
+
if (pci_dev_run_wake(pci))
pm_runtime_get_noresume(&pci->dev);
pci_dev_put(pci);
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index dd2e79ae45a8..a0709e344d44 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -68,6 +68,8 @@ struct skl {
struct skl_dsp_resource resource;
struct list_head ppl_list;
struct list_head dapm_path_list;
+
+ const struct firmware *tplg;
};
#define skl_to_ebus(s) (&(s)->ebus)
diff --git a/tools/Makefile b/tools/Makefile
index 7dc820a8c1f1..0ba0df3b516f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -96,7 +96,7 @@ cgroup_install firewire_install hv_install lguest_install perf_install usb_insta
$(call descend,$(@:_install=),install)
selftests_install:
- $(call descend,testing/$(@:_clean=),install)
+ $(call descend,testing/$(@:_install=),install)
turbostat_install x86_energy_perf_policy_install:
$(call descend,power/x86/$(@:_install=),install)
diff --git a/tools/build/Makefile b/tools/build/Makefile
index a93036272d43..0d5a0e3a8fa9 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -25,7 +25,7 @@ export Q srctree CC LD
MAKEFLAGS := --no-print-directory
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-all: fixdep
+all: $(OUTPUT)fixdep
clean:
$(call QUIET_CLEAN, fixdep)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 37ff4c9f92f1..02db3cdff20f 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -7,7 +7,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
@@ -101,7 +101,6 @@ ifeq ($(feature-all), 1)
#
$(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
else
- $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1)
$(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
endif
@@ -123,13 +122,31 @@ define feature_print_text_code
MSG = $(shell printf '...%30s: %s' $(1) $(2))
endef
+#
+# generates feature value assignment for name, like:
+# $(call feature_assign,dwarf) == feature-dwarf=1
+#
+feature_assign = feature-$(1)=$(feature-$(1))
+
FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER)
-FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat))))
-FEATURE_DUMP_FILE := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
+FEATURE_DUMP := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
-ifeq ($(dwarf-post-unwind),1)
- FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text))
-endif
+feature_dump_check = $(eval $(feature_dump_check_code))
+define feature_dump_check_code
+ ifeq ($(findstring $(1),$(FEATURE_DUMP)),)
+ $(2) := 1
+ endif
+endef
+
+#
+# First check if any test from FEATURE_DISPLAY
+# and set feature_display := 1 if it does
+$(foreach feat,$(FEATURE_DISPLAY),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_display))
+
+#
+# Now also check if any other test changed,
+# so we force FEATURE-DUMP generation
+$(foreach feat,$(FEATURE_TESTS),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_dump_changed))
# The $(feature_display) controls the default detection message
# output. It's set if:
@@ -138,13 +155,13 @@ endif
# - one of the $(FEATURE_DISPLAY) is not detected
# - VF is enabled
-ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)")
- $(shell echo "$(FEATURE_DUMP)" > $(FEATURE_DUMP_FILENAME))
- feature_display := 1
+ifeq ($(feature_dump_changed),1)
+ $(shell rm -f $(FEATURE_DUMP_FILENAME))
+ $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
endif
feature_display_check = $(eval $(feature_check_display_code))
-define feature_display_check_code
+define feature_check_display_code
ifneq ($(feature-$(1)), 1)
feature_display := 1
endif
@@ -161,11 +178,6 @@ ifeq ($(feature_display),1)
$(info )
$(info Auto-detecting system features:)
$(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),))
-
- ifeq ($(dwarf-post-unwind),1)
- $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
- endif
-
ifneq ($(feature_verbose),1)
$(info )
endif
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index 4e09ad617a60..be630bed66d2 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -4,7 +4,7 @@ ifdef CROSS_COMPILE
fixdep:
else
fixdep:
- $(Q)$(MAKE) -C $(srctree)/tools/build fixdep
+ $(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
endif
.PHONY: fixdep
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cea04ce9f35c..bf8f0352264d 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -1,4 +1,3 @@
-
FILES= \
test-all.bin \
test-backtrace.bin \
@@ -38,38 +37,40 @@ FILES= \
test-bpf.bin \
test-get_cpuid.bin
+FILES := $(addprefix $(OUTPUT),$(FILES))
+
CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
-__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS)
- BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1
+__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
+ BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
###############################
-test-all.bin:
+$(OUTPUT)test-all.bin:
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
-test-hello.bin:
+$(OUTPUT)test-hello.bin:
$(BUILD)
-test-pthread-attr-setaffinity-np.bin:
+$(OUTPUT)test-pthread-attr-setaffinity-np.bin:
$(BUILD) -D_GNU_SOURCE -lpthread
-test-stackprotector-all.bin:
+$(OUTPUT)test-stackprotector-all.bin:
$(BUILD) -fstack-protector-all
-test-fortify-source.bin:
+$(OUTPUT)test-fortify-source.bin:
$(BUILD) -O2 -D_FORTIFY_SOURCE=2
-test-bionic.bin:
+$(OUTPUT)test-bionic.bin:
$(BUILD)
-test-libelf.bin:
+$(OUTPUT)test-libelf.bin:
$(BUILD) -lelf
-test-glibc.bin:
+$(OUTPUT)test-glibc.bin:
$(BUILD)
DWARFLIBS := -ldw
@@ -77,37 +78,37 @@ ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
endif
-test-dwarf.bin:
+$(OUTPUT)test-dwarf.bin:
$(BUILD) $(DWARFLIBS)
-test-libelf-mmap.bin:
+$(OUTPUT)test-libelf-mmap.bin:
$(BUILD) -lelf
-test-libelf-getphdrnum.bin:
+$(OUTPUT)test-libelf-getphdrnum.bin:
$(BUILD) -lelf
-test-libnuma.bin:
+$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
-test-numa_num_possible_cpus.bin:
+$(OUTPUT)test-numa_num_possible_cpus.bin:
$(BUILD) -lnuma
-test-libunwind.bin:
+$(OUTPUT)test-libunwind.bin:
$(BUILD) -lelf
-test-libunwind-debug-frame.bin:
+$(OUTPUT)test-libunwind-debug-frame.bin:
$(BUILD) -lelf
-test-libaudit.bin:
+$(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
-test-libslang.bin:
+$(OUTPUT)test-libslang.bin:
$(BUILD) -I/usr/include/slang -lslang
-test-gtk2.bin:
+$(OUTPUT)test-gtk2.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
-test-gtk2-infobar.bin:
+$(OUTPUT)test-gtk2-infobar.bin:
$(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
grep-libs = $(filter -l%,$(1))
@@ -119,63 +120,63 @@ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
-test-libperl.bin:
+$(OUTPUT)test-libperl.bin:
$(BUILD) $(FLAGS_PERL_EMBED)
-test-libpython.bin:
+$(OUTPUT)test-libpython.bin:
$(BUILD)
-test-libpython-version.bin:
+$(OUTPUT)test-libpython-version.bin:
$(BUILD)
-test-libbfd.bin:
+$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
-test-liberty.bin:
- $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
+$(OUTPUT)test-liberty.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
-test-liberty-z.bin:
- $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
+$(OUTPUT)test-liberty-z.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
-test-cplus-demangle.bin:
+$(OUTPUT)test-cplus-demangle.bin:
$(BUILD) -liberty
-test-backtrace.bin:
+$(OUTPUT)test-backtrace.bin:
$(BUILD)
-test-timerfd.bin:
+$(OUTPUT)test-timerfd.bin:
$(BUILD)
-test-libdw-dwarf-unwind.bin:
+$(OUTPUT)test-libdw-dwarf-unwind.bin:
$(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind)
-test-libbabeltrace.bin:
+$(OUTPUT)test-libbabeltrace.bin:
$(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
-test-sync-compare-and-swap.bin:
+$(OUTPUT)test-sync-compare-and-swap.bin:
$(BUILD)
-test-compile-32.bin:
- $(CC) -m32 -o $(OUTPUT)$@ test-compile.c
+$(OUTPUT)test-compile-32.bin:
+ $(CC) -m32 -o $@ test-compile.c
-test-compile-x32.bin:
- $(CC) -mx32 -o $(OUTPUT)$@ test-compile.c
+$(OUTPUT)test-compile-x32.bin:
+ $(CC) -mx32 -o $@ test-compile.c
-test-zlib.bin:
+$(OUTPUT)test-zlib.bin:
$(BUILD) -lz
-test-lzma.bin:
+$(OUTPUT)test-lzma.bin:
$(BUILD) -llzma
-test-get_cpuid.bin:
+$(OUTPUT)test-get_cpuid.bin:
$(BUILD)
-test-bpf.bin:
+$(OUTPUT)test-bpf.bin:
$(BUILD)
--include *.d
+-include $(OUTPUT)*.d
###############################
clean:
- rm -f $(FILES) *.d $(FILES:.bin=.make.output)
+ rm -f $(FILES) $(OUTPUT)*.d $(FILES:.bin=.make.output)
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 40bd21488032..28f5493da491 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -11,6 +11,8 @@ int __bitmap_weight(const unsigned long *bitmap, int bits);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, int bits);
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+
#define BITMAP_LAST_WORD_MASK(nbits) \
( \
((nbits) % BITS_PER_LONG) ? \
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
new file mode 100644
index 000000000000..e26223f1f287
--- /dev/null
+++ b/tools/include/linux/string.h
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_STRING_H_
+#define _TOOLS_LINUX_STRING_H_
+
+
+#include <linux/types.h> /* for size_t */
+
+void *memdup(const void *src, size_t len);
+
+int strtobool(const char *s, bool *res);
+
+#ifndef __UCLIBC__
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+#endif
+
+#endif /* _LINUX_STRING_H_ */
diff --git a/tools/perf/util/bitmap.c b/tools/lib/bitmap.c
index 0a1adc1111fd..0a1adc1111fd 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/lib/bitmap.c
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a3caaf3eafbd..919b71780710 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -71,7 +71,21 @@ FEATURE_DISPLAY = libelf bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
export prefix libdir src obj
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a6331050ab79..5bdc6eab6852 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -83,3 +83,17 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
log_buf[0] = 0;
return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
+
+int bpf_map_update_elem(int fd, void *key, void *value,
+ u64 flags)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 854b7361b784..a76465541292 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -20,4 +20,6 @@ int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
u32 kern_version, char *log_buf,
size_t log_buf_sz);
+int bpf_map_update_elem(int fd, void *key, void *value,
+ u64 flags);
#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e176bad19bcb..8334a5a9d5d7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -152,29 +152,36 @@ struct bpf_program {
} *reloc_desc;
int nr_reloc;
- int fd;
+ struct {
+ int nr;
+ int *fds;
+ } instances;
+ bpf_program_prep_t preprocessor;
struct bpf_object *obj;
void *priv;
bpf_program_clear_priv_t clear_priv;
};
+struct bpf_map {
+ int fd;
+ char *name;
+ struct bpf_map_def def;
+ void *priv;
+ bpf_map_clear_priv_t clear_priv;
+};
+
static LIST_HEAD(bpf_objects_list);
struct bpf_object {
char license[64];
u32 kern_version;
- void *maps_buf;
- size_t maps_buf_sz;
struct bpf_program *programs;
size_t nr_programs;
- int *map_fds;
- /*
- * This field is required because maps_buf will be freed and
- * maps_buf_sz will be set to 0 after loaded.
- */
- size_t nr_map_fds;
+ struct bpf_map *maps;
+ size_t nr_maps;
+
bool loaded;
/*
@@ -188,6 +195,7 @@ struct bpf_object {
Elf *elf;
GElf_Ehdr ehdr;
Elf_Data *symbols;
+ size_t strtabidx;
struct {
GElf_Shdr shdr;
Elf_Data *data;
@@ -206,10 +214,25 @@ struct bpf_object {
static void bpf_program__unload(struct bpf_program *prog)
{
+ int i;
+
if (!prog)
return;
- zclose(prog->fd);
+ /*
+ * If the object is opened but the program was never loaded,
+ * it is possible that prog->instances.nr == -1.
+ */
+ if (prog->instances.nr > 0) {
+ for (i = 0; i < prog->instances.nr; i++)
+ zclose(prog->instances.fds[i]);
+ } else if (prog->instances.nr != -1) {
+ pr_warning("Internal error: instances.nr is %d\n",
+ prog->instances.nr);
+ }
+
+ prog->instances.nr = -1;
+ zfree(&prog->instances.fds);
}
static void bpf_program__exit(struct bpf_program *prog)
@@ -260,7 +283,8 @@ bpf_program__init(void *data, size_t size, char *name, int idx,
memcpy(prog->insns, data,
prog->insns_cnt * sizeof(struct bpf_insn));
prog->idx = idx;
- prog->fd = -1;
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
return 0;
errout:
@@ -469,21 +493,77 @@ static int
bpf_object__init_maps(struct bpf_object *obj, void *data,
size_t size)
{
- if (size == 0) {
+ size_t nr_maps;
+ int i;
+
+ nr_maps = size / sizeof(struct bpf_map_def);
+ if (!data || !nr_maps) {
pr_debug("%s doesn't need map definition\n",
obj->path);
return 0;
}
- obj->maps_buf = malloc(size);
- if (!obj->maps_buf) {
- pr_warning("malloc maps failed: %s\n", obj->path);
+ pr_debug("maps in %s: %zd bytes\n", obj->path, size);
+
+ obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
+ if (!obj->maps) {
+ pr_warning("alloc maps for object failed\n");
return -ENOMEM;
}
+ obj->nr_maps = nr_maps;
- obj->maps_buf_sz = size;
- memcpy(obj->maps_buf, data, size);
- pr_debug("maps in %s: %ld bytes\n", obj->path, (long)size);
+ for (i = 0; i < nr_maps; i++) {
+ struct bpf_map_def *def = &obj->maps[i].def;
+
+ /*
+ * fill all fd with -1 so won't close incorrect
+ * fd (fd=0 is stdin) when failure (zclose won't close
+ * negative fd)).
+ */
+ obj->maps[i].fd = -1;
+
+ /* Save map definition into obj->maps */
+ *def = ((struct bpf_map_def *)data)[i];
+ }
+ return 0;
+}
+
+static int
+bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+{
+ int i;
+ Elf_Data *symbols = obj->efile.symbols;
+
+ if (!symbols || maps_shndx < 0)
+ return -EINVAL;
+
+ for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ GElf_Sym sym;
+ size_t map_idx;
+ const char *map_name;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != maps_shndx)
+ continue;
+
+ map_name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ map_idx = sym.st_value / sizeof(struct bpf_map_def);
+ if (map_idx >= obj->nr_maps) {
+ pr_warning("index of map \"%s\" is buggy: %zu > %zu\n",
+ map_name, map_idx, obj->nr_maps);
+ continue;
+ }
+ obj->maps[map_idx].name = strdup(map_name);
+ if (!obj->maps[map_idx].name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+ pr_debug("map %zu is \"%s\"\n", map_idx,
+ obj->maps[map_idx].name);
+ }
return 0;
}
@@ -492,7 +572,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
Elf_Scn *scn = NULL;
- int idx = 0, err = 0;
+ int idx = 0, err = 0, maps_shndx = -1;
/* Elf is corrupted/truncated, avoid calling elf_strptr. */
if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -542,16 +622,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
err = bpf_object__init_kversion(obj,
data->d_buf,
data->d_size);
- else if (strcmp(name, "maps") == 0)
+ else if (strcmp(name, "maps") == 0) {
err = bpf_object__init_maps(obj, data->d_buf,
data->d_size);
- else if (sh.sh_type == SHT_SYMTAB) {
+ maps_shndx = idx;
+ } else if (sh.sh_type == SHT_SYMTAB) {
if (obj->efile.symbols) {
pr_warning("bpf: multiple SYMTAB in %s\n",
obj->path);
err = -LIBBPF_ERRNO__FORMAT;
- } else
+ } else {
obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ }
} else if ((sh.sh_type == SHT_PROGBITS) &&
(sh.sh_flags & SHF_EXECINSTR) &&
(data->d_size > 0)) {
@@ -586,6 +669,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
if (err)
goto out;
}
+
+ if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
+ pr_warning("Corrupted ELF file: index of strtab invalid\n");
+ return LIBBPF_ERRNO__FORMAT;
+ }
+ if (maps_shndx >= 0)
+ err = bpf_object__init_maps_name(obj, maps_shndx);
out:
return err;
}
@@ -668,37 +758,15 @@ static int
bpf_object__create_maps(struct bpf_object *obj)
{
unsigned int i;
- size_t nr_maps;
- int *pfd;
-
- nr_maps = obj->maps_buf_sz / sizeof(struct bpf_map_def);
- if (!obj->maps_buf || !nr_maps) {
- pr_debug("don't need create maps for %s\n",
- obj->path);
- return 0;
- }
-
- obj->map_fds = malloc(sizeof(int) * nr_maps);
- if (!obj->map_fds) {
- pr_warning("realloc perf_bpf_map_fds failed\n");
- return -ENOMEM;
- }
- obj->nr_map_fds = nr_maps;
- /* fill all fd with -1 */
- memset(obj->map_fds, -1, sizeof(int) * nr_maps);
-
- pfd = obj->map_fds;
- for (i = 0; i < nr_maps; i++) {
- struct bpf_map_def def;
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map_def *def = &obj->maps[i].def;
+ int *pfd = &obj->maps[i].fd;
- def = *(struct bpf_map_def *)(obj->maps_buf +
- i * sizeof(struct bpf_map_def));
-
- *pfd = bpf_create_map(def.type,
- def.key_size,
- def.value_size,
- def.max_entries);
+ *pfd = bpf_create_map(def->type,
+ def->key_size,
+ def->value_size,
+ def->max_entries);
if (*pfd < 0) {
size_t j;
int err = *pfd;
@@ -706,22 +774,17 @@ bpf_object__create_maps(struct bpf_object *obj)
pr_warning("failed to create map: %s\n",
strerror(errno));
for (j = 0; j < i; j++)
- zclose(obj->map_fds[j]);
- obj->nr_map_fds = 0;
- zfree(&obj->map_fds);
+ zclose(obj->maps[j].fd);
return err;
}
pr_debug("create map: fd=%d\n", *pfd);
- pfd++;
}
- zfree(&obj->maps_buf);
- obj->maps_buf_sz = 0;
return 0;
}
static int
-bpf_program__relocate(struct bpf_program *prog, int *map_fds)
+bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
{
int i;
@@ -741,7 +804,7 @@ bpf_program__relocate(struct bpf_program *prog, int *map_fds)
return -LIBBPF_ERRNO__RELOC;
}
insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
- insns[insn_idx].imm = map_fds[map_idx];
+ insns[insn_idx].imm = obj->maps[map_idx].fd;
}
zfree(&prog->reloc_desc);
@@ -760,7 +823,7 @@ bpf_object__relocate(struct bpf_object *obj)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_program__relocate(prog, obj->map_fds);
+ err = bpf_program__relocate(prog, obj);
if (err) {
pr_warning("failed to relocate '%s'\n",
prog->section_name);
@@ -784,8 +847,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
Elf_Data *data = obj->efile.reloc[i].data;
int idx = shdr->sh_info;
struct bpf_program *prog;
- size_t nr_maps = obj->maps_buf_sz /
- sizeof(struct bpf_map_def);
+ size_t nr_maps = obj->nr_maps;
if (shdr->sh_type != SHT_REL) {
pr_warning("internal error at %d\n", __LINE__);
@@ -860,13 +922,73 @@ static int
bpf_program__load(struct bpf_program *prog,
char *license, u32 kern_version)
{
- int err, fd;
+ int err = 0, fd, i;
- err = load_program(prog->insns, prog->insns_cnt,
- license, kern_version, &fd);
- if (!err)
- prog->fd = fd;
+ if (prog->instances.nr < 0 || !prog->instances.fds) {
+ if (prog->preprocessor) {
+ pr_warning("Internal error: can't load program '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog->instances.fds = malloc(sizeof(int));
+ if (!prog->instances.fds) {
+ pr_warning("Not enough memory for BPF fds\n");
+ return -ENOMEM;
+ }
+ prog->instances.nr = 1;
+ prog->instances.fds[0] = -1;
+ }
+
+ if (!prog->preprocessor) {
+ if (prog->instances.nr != 1) {
+ pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
+ prog->section_name, prog->instances.nr);
+ }
+ err = load_program(prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
+ if (!err)
+ prog->instances.fds[0] = fd;
+ goto out;
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ struct bpf_prog_prep_result result;
+ bpf_program_prep_t preprocessor = prog->preprocessor;
+
+ bzero(&result, sizeof(result));
+ err = preprocessor(prog, i, prog->insns,
+ prog->insns_cnt, &result);
+ if (err) {
+ pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (!result.new_insn_ptr || !result.new_insn_cnt) {
+ pr_debug("Skip loading the %dth instance of program '%s'\n",
+ i, prog->section_name);
+ prog->instances.fds[i] = -1;
+ if (result.pfd)
+ *result.pfd = -1;
+ continue;
+ }
+
+ err = load_program(result.new_insn_ptr,
+ result.new_insn_cnt,
+ license, kern_version, &fd);
+
+ if (err) {
+ pr_warning("Loading the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+ if (result.pfd)
+ *result.pfd = fd;
+ prog->instances.fds[i] = fd;
+ }
+out:
if (err)
pr_warning("failed to load program '%s'\n",
prog->section_name);
@@ -970,10 +1092,8 @@ int bpf_object__unload(struct bpf_object *obj)
if (!obj)
return -EINVAL;
- for (i = 0; i < obj->nr_map_fds; i++)
- zclose(obj->map_fds[i]);
- zfree(&obj->map_fds);
- obj->nr_map_fds = 0;
+ for (i = 0; i < obj->nr_maps; i++)
+ zclose(obj->maps[i].fd);
for (i = 0; i < obj->nr_programs; i++)
bpf_program__unload(&obj->programs[i]);
@@ -1016,7 +1136,16 @@ void bpf_object__close(struct bpf_object *obj)
bpf_object__elf_finish(obj);
bpf_object__unload(obj);
- zfree(&obj->maps_buf);
+ for (i = 0; i < obj->nr_maps; i++) {
+ zfree(&obj->maps[i].name);
+ if (obj->maps[i].clear_priv)
+ obj->maps[i].clear_priv(&obj->maps[i],
+ obj->maps[i].priv);
+ obj->maps[i].priv = NULL;
+ obj->maps[i].clear_priv = NULL;
+ }
+ zfree(&obj->maps);
+ obj->nr_maps = 0;
if (obj->programs && obj->nr_programs) {
for (i = 0; i < obj->nr_programs; i++)
@@ -1121,5 +1250,142 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
int bpf_program__fd(struct bpf_program *prog)
{
- return prog->fd;
+ return bpf_program__nth_fd(prog, 0);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+ bpf_program_prep_t prep)
+{
+ int *instances_fds;
+
+ if (nr_instances <= 0 || !prep)
+ return -EINVAL;
+
+ if (prog->instances.nr > 0 || prog->instances.fds) {
+ pr_warning("Can't set pre-processor after loading\n");
+ return -EINVAL;
+ }
+
+ instances_fds = malloc(sizeof(int) * nr_instances);
+ if (!instances_fds) {
+ pr_warning("alloc memory failed for fds\n");
+ return -ENOMEM;
+ }
+
+ /* fill all fd with -1 */
+ memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+ prog->instances.nr = nr_instances;
+ prog->instances.fds = instances_fds;
+ prog->preprocessor = prep;
+ return 0;
+}
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n)
+{
+ int fd;
+
+ if (n >= prog->instances.nr || n < 0) {
+ pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
+ n, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ fd = prog->instances.fds[n];
+ if (fd < 0) {
+ pr_warning("%dth instance of program '%s' is invalid\n",
+ n, prog->section_name);
+ return -ENOENT;
+ }
+
+ return fd;
+}
+
+int bpf_map__get_fd(struct bpf_map *map)
+{
+ if (!map)
+ return -EINVAL;
+
+ return map->fd;
+}
+
+int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef)
+{
+ if (!map || !pdef)
+ return -EINVAL;
+
+ *pdef = map->def;
+ return 0;
+}
+
+const char *bpf_map__get_name(struct bpf_map *map)
+{
+ if (!map)
+ return NULL;
+ return map->name;
+}
+
+int bpf_map__set_private(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (map->priv) {
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ }
+
+ map->priv = priv;
+ map->clear_priv = clear_priv;
+ return 0;
+}
+
+int bpf_map__get_private(struct bpf_map *map, void **ppriv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (ppriv)
+ *ppriv = map->priv;
+ return 0;
+}
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+{
+ size_t idx;
+ struct bpf_map *s, *e;
+
+ if (!obj || !obj->maps)
+ return NULL;
+
+ s = obj->maps;
+ e = obj->maps + obj->nr_maps;
+
+ if (prev == NULL)
+ return s;
+
+ if ((prev < s) || (prev >= e)) {
+ pr_warning("error in %s: map handler doesn't belong to object\n",
+ __func__);
+ return NULL;
+ }
+
+ idx = (prev - obj->maps) + 1;
+ if (idx >= obj->nr_maps)
+ return NULL;
+ return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const char *name)
+{
+ struct bpf_map *pos;
+
+ bpf_map__for_each(pos, obj) {
+ if (pos->name && !strcmp(pos->name, name))
+ return pos;
+ }
+ return NULL;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c9a9aef2806c..a51594c7b518 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -88,6 +88,70 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
int bpf_program__fd(struct bpf_program *prog);
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transform into
+ * multiple variants to be attached to different code.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * are APIs for this propose.
+ *
+ * - bpf_program_prep_t:
+ * It defines 'preprocessor', which is a caller defined function
+ * passed to libbpf through bpf_program__set_prep(), and will be
+ * called before program is loaded. The processor should adjust
+ * the program one time for each instances according to the number
+ * passed to it.
+ *
+ * - bpf_program__set_prep:
+ * Attachs a preprocessor to a BPF program. The number of instances
+ * whould be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ * After the program is loaded, get resuling fds from bpf program for
+ * each instances.
+ *
+ * If bpf_program__set_prep() is not used, the program whould be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+ /*
+ * If not NULL, load new instruction array.
+ * If set to NULL, don't load this instance.
+ */
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+
+ /* If not NULL, result fd is set to it */
+ int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ * - prog: The bpf_program being loaded.
+ * - n: Index of instance being generated.
+ * - insns: BPF instructions array.
+ * - insns_cnt:Number of instructions in insns.
+ * - res: Output parameter, result of transformation.
+ *
+ * Return value:
+ * - Zero: pre-processing success.
+ * - Non-zero: pre-processing, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+ bpf_program_prep_t prep);
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n);
+
/*
* We don't need __attribute__((packed)) now since it is
* unnecessary for 'bpf_map_def' because they are all aligned.
@@ -101,4 +165,28 @@ struct bpf_map_def {
unsigned int max_entries;
};
+/*
+ * There is another 'struct bpf_map' in include/linux/map.h. However,
+ * it is not a uapi header so no need to consider name clash.
+ */
+struct bpf_map;
+struct bpf_map *
+bpf_object__get_map_by_name(struct bpf_object *obj, const char *name);
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
+#define bpf_map__for_each(pos, obj) \
+ for ((pos) = bpf_map__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_map__next((pos), (obj)))
+
+int bpf_map__get_fd(struct bpf_map *map);
+int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef);
+const char *bpf_map__get_name(struct bpf_map *map);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+int bpf_map__set_private(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+int bpf_map__get_private(struct bpf_map *map, void **ppriv);
+
#endif
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
new file mode 100644
index 000000000000..9122a9e80046
--- /dev/null
+++ b/tools/lib/find_bit.c
@@ -0,0 +1,84 @@
+/* bit search implementation
+ *
+ * Copied from lib/find_bit.c to tools/lib/find_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+
+#if !defined(find_next_bit)
+
+/*
+ * This is a common helper function for find_next_bit and
+ * find_next_zero_bit. The difference is the "invert" argument, which
+ * is XORed with each fetched word before searching it for one bits.
+ */
+static unsigned long _find_next_bit(const unsigned long *addr,
+ unsigned long nbits, unsigned long start, unsigned long invert)
+{
+ unsigned long tmp;
+
+ if (!nbits || start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+ /* Handle 1st word. */
+ tmp &= BITMAP_FIRST_WORD_MASK(start);
+ start = round_down(start, BITS_PER_LONG);
+
+ while (!tmp) {
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+ }
+
+ return min(start + __ffs(tmp), nbits);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, 0UL);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long idx;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ if (addr[idx])
+ return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+ }
+
+ return size;
+}
+#endif
diff --git a/tools/lib/string.c b/tools/lib/string.c
new file mode 100644
index 000000000000..bd239bc1d557
--- /dev/null
+++ b/tools/lib/string.c
@@ -0,0 +1,89 @@
+/*
+ * linux/tools/lib/string.c
+ *
+ * Copied from linux/lib/string.c, where it is:
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * More specifically, the first copied function was strtobool, which
+ * was introduced by:
+ *
+ * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
+ * Author: Jonathan Cameron <jic23@cam.ac.uk>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+
+/**
+ * memdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+ * @len: memory region length
+ */
+void *memdup(const void *src, size_t len)
+{
+ void *p = malloc(len);
+
+ if (p)
+ memcpy(p, src, len);
+
+ return p;
+}
+
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0'.
+ * Otherwise it will return -EINVAL. Value pointed to by res is
+ * updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ break;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * strlcpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ *
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
diff --git a/tools/lib/subcmd/Build b/tools/lib/subcmd/Build
new file mode 100644
index 000000000000..ee31288788c1
--- /dev/null
+++ b/tools/lib/subcmd/Build
@@ -0,0 +1,7 @@
+libsubcmd-y += exec-cmd.o
+libsubcmd-y += help.o
+libsubcmd-y += pager.o
+libsubcmd-y += parse-options.o
+libsubcmd-y += run-command.o
+libsubcmd-y += sigchain.o
+libsubcmd-y += subcmd-config.o
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
new file mode 100644
index 000000000000..629cf8c14e68
--- /dev/null
+++ b/tools/lib/subcmd/Makefile
@@ -0,0 +1,48 @@
+include ../../scripts/Makefile.include
+include ../../perf/config/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+RM = rm -f
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libsubcmd.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+CFLAGS += -I$(srctree)/tools/include/
+CFLAGS += -I$(srctree)/include/uapi
+CFLAGS += -I$(srctree)/include
+
+SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SUBCMD_IN): FORCE
+ @$(MAKE) $(build)=libsubcmd
+
+$(LIBFILE): $(SUBCMD_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+
+clean:
+ $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
new file mode 100644
index 000000000000..1ae833af1a4a
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -0,0 +1,209 @@
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "subcmd-util.h"
+#include "exec-cmd.h"
+#include "subcmd-config.h"
+
+#define MAX_ARGS 32
+#define PATH_MAX 4096
+
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env)
+{
+ subcmd_config.exec_name = exec_name;
+ subcmd_config.prefix = prefix;
+ subcmd_config.exec_path = exec_path;
+ subcmd_config.exec_path_env = exec_path_env;
+}
+
+#define is_dir_sep(c) ((c) == '/')
+
+static int is_absolute_path(const char *path)
+{
+ return path[0] == '/';
+}
+
+static const char *get_pwd_cwd(void)
+{
+ static char cwd[PATH_MAX + 1];
+ char *pwd;
+ struct stat cwd_stat, pwd_stat;
+ if (getcwd(cwd, PATH_MAX) == NULL)
+ return NULL;
+ pwd = getenv("PWD");
+ if (pwd && strcmp(pwd, cwd)) {
+ stat(cwd, &cwd_stat);
+ if (!stat(pwd, &pwd_stat) &&
+ pwd_stat.st_dev == cwd_stat.st_dev &&
+ pwd_stat.st_ino == cwd_stat.st_ino) {
+ strlcpy(cwd, pwd, PATH_MAX);
+ }
+ }
+ return cwd;
+}
+
+static const char *make_nonrelative_path(const char *path)
+{
+ static char buf[PATH_MAX + 1];
+
+ if (is_absolute_path(path)) {
+ if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ } else {
+ const char *cwd = get_pwd_cwd();
+ if (!cwd)
+ die("Cannot determine the current working directory");
+ if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ }
+ return buf;
+}
+
+char *system_path(const char *path)
+{
+ char *buf = NULL;
+
+ if (is_absolute_path(path))
+ return strdup(path);
+
+ astrcatf(&buf, "%s/%s", subcmd_config.prefix, path);
+
+ return buf;
+}
+
+const char *extract_argv0_path(const char *argv0)
+{
+ const char *slash;
+
+ if (!argv0 || !*argv0)
+ return NULL;
+ slash = argv0 + strlen(argv0);
+
+ while (argv0 <= slash && !is_dir_sep(*slash))
+ slash--;
+
+ if (slash >= argv0) {
+ argv0_path = strndup(argv0, slash - argv0);
+ return argv0_path ? slash + 1 : NULL;
+ }
+
+ return argv0;
+}
+
+void set_argv_exec_path(const char *exec_path)
+{
+ argv_exec_path = exec_path;
+ /*
+ * Propagate this setting to external programs.
+ */
+ setenv(subcmd_config.exec_path_env, exec_path, 1);
+}
+
+
+/* Returns the highest-priority location to look for subprograms. */
+char *get_argv_exec_path(void)
+{
+ char *env;
+
+ if (argv_exec_path)
+ return strdup(argv_exec_path);
+
+ env = getenv(subcmd_config.exec_path_env);
+ if (env && *env)
+ return strdup(env);
+
+ return system_path(subcmd_config.exec_path);
+}
+
+static void add_path(char **out, const char *path)
+{
+ if (path && *path) {
+ if (is_absolute_path(path))
+ astrcat(out, path);
+ else
+ astrcat(out, make_nonrelative_path(path));
+
+ astrcat(out, ":");
+ }
+}
+
+void setup_path(void)
+{
+ const char *old_path = getenv("PATH");
+ char *new_path = NULL;
+ char *tmp = get_argv_exec_path();
+
+ add_path(&new_path, tmp);
+ add_path(&new_path, argv0_path);
+ free(tmp);
+
+ if (old_path)
+ astrcat(&new_path, old_path);
+ else
+ astrcat(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+ setenv("PATH", new_path, 1);
+
+ free(new_path);
+}
+
+static const char **prepare_exec_cmd(const char **argv)
+{
+ int argc;
+ const char **nargv;
+
+ for (argc = 0; argv[argc]; argc++)
+ ; /* just counting */
+ nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+ nargv[0] = subcmd_config.exec_name;
+ for (argc = 0; argv[argc]; argc++)
+ nargv[argc + 1] = argv[argc];
+ nargv[argc + 1] = NULL;
+ return nargv;
+}
+
+int execv_cmd(const char **argv) {
+ const char **nargv = prepare_exec_cmd(argv);
+
+ /* execvp() can only ever return if it fails */
+ execvp(subcmd_config.exec_name, (char **)nargv);
+
+ free(nargv);
+ return -1;
+}
+
+
+int execl_cmd(const char *cmd,...)
+{
+ int argc;
+ const char *argv[MAX_ARGS + 1];
+ const char *arg;
+ va_list param;
+
+ va_start(param, cmd);
+ argv[0] = cmd;
+ argc = 1;
+ while (argc < MAX_ARGS) {
+ arg = argv[argc++] = va_arg(param, char *);
+ if (!arg)
+ break;
+ }
+ va_end(param);
+ if (MAX_ARGS <= argc) {
+ fprintf(stderr, " Error: too many args to run %s\n", cmd);
+ return -1;
+ }
+
+ argv[argc] = NULL;
+ return execv_cmd(argv);
+}
diff --git a/tools/lib/subcmd/exec-cmd.h b/tools/lib/subcmd/exec-cmd.h
new file mode 100644
index 000000000000..5d08bda31d90
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.h
@@ -0,0 +1,16 @@
+#ifndef __SUBCMD_EXEC_CMD_H
+#define __SUBCMD_EXEC_CMD_H
+
+extern void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env);
+
+extern void set_argv_exec_path(const char *exec_path);
+extern const char *extract_argv0_path(const char *path);
+extern void setup_path(void);
+extern int execv_cmd(const char **argv); /* NULL terminated */
+extern int execl_cmd(const char *cmd, ...);
+/* get_argv_exec_path and system_path return malloc'd string, caller must free it */
+extern char *get_argv_exec_path(void);
+extern char *system_path(const char *path);
+
+#endif /* __SUBCMD_EXEC_CMD_H */
diff --git a/tools/perf/util/help.c b/tools/lib/subcmd/help.c
index 86c37c472263..e228c3cb3716 100644
--- a/tools/perf/util/help.c
+++ b/tools/lib/subcmd/help.c
@@ -1,9 +1,15 @@
-#include "cache.h"
-#include "../builtin.h"
-#include "exec_cmd.h"
-#include "levenshtein.h"
-#include "help.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "subcmd-util.h"
+#include "help.h"
+#include "exec-cmd.h"
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
{
@@ -17,7 +23,7 @@ void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
cmds->names[cmds->cnt++] = ent;
}
-static void clean_cmdnames(struct cmdnames *cmds)
+void clean_cmdnames(struct cmdnames *cmds)
{
unsigned int i;
@@ -28,14 +34,14 @@ static void clean_cmdnames(struct cmdnames *cmds)
cmds->alloc = 0;
}
-static int cmdname_compare(const void *a_, const void *b_)
+int cmdname_compare(const void *a_, const void *b_)
{
struct cmdname *a = *(struct cmdname **)a_;
struct cmdname *b = *(struct cmdname **)b_;
return strcmp(a->name, b->name);
}
-static void uniq(struct cmdnames *cmds)
+void uniq(struct cmdnames *cmds)
{
unsigned int i, j;
@@ -71,6 +77,28 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
cmds->cnt = cj;
}
+static void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
static void pretty_print_string_list(struct cmdnames *cmds, int longest)
{
int cols = 1, rows;
@@ -114,6 +142,14 @@ static int is_executable(const char *name)
return st.st_mode & S_IXUSR;
}
+static int has_extension(const char *filename, const char *ext)
+{
+ size_t len = strlen(filename);
+ size_t extlen = strlen(ext);
+
+ return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
static void list_commands_in_dir(struct cmdnames *cmds,
const char *path,
const char *prefix)
@@ -121,8 +157,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
int prefix_len;
DIR *dir = opendir(path);
struct dirent *de;
- struct strbuf buf = STRBUF_INIT;
- int len;
+ char *buf = NULL;
if (!dir)
return;
@@ -130,8 +165,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
prefix = "perf-";
prefix_len = strlen(prefix);
- strbuf_addf(&buf, "%s/", path);
- len = buf.len;
+ astrcatf(&buf, "%s/", path);
while ((de = readdir(dir)) != NULL) {
int entlen;
@@ -139,9 +173,8 @@ static void list_commands_in_dir(struct cmdnames *cmds,
if (prefixcmp(de->d_name, prefix))
continue;
- strbuf_setlen(&buf, len);
- strbuf_addstr(&buf, de->d_name);
- if (!is_executable(buf.buf))
+ astrcat(&buf, de->d_name);
+ if (!is_executable(buf))
continue;
entlen = strlen(de->d_name) - prefix_len;
@@ -151,7 +184,7 @@ static void list_commands_in_dir(struct cmdnames *cmds,
add_cmdname(cmds, de->d_name + prefix_len, entlen);
}
closedir(dir);
- strbuf_release(&buf);
+ free(buf);
}
void load_command_list(const char *prefix,
@@ -159,7 +192,7 @@ void load_command_list(const char *prefix,
struct cmdnames *other_cmds)
{
const char *env_path = getenv("PATH");
- const char *exec_path = perf_exec_path();
+ char *exec_path = get_argv_exec_path();
if (exec_path) {
list_commands_in_dir(main_cmds, exec_path, prefix);
@@ -172,7 +205,7 @@ void load_command_list(const char *prefix,
char *paths, *path, *colon;
path = paths = strdup(env_path);
while (1) {
- if ((colon = strchr(path, PATH_SEP)))
+ if ((colon = strchr(path, ':')))
*colon = 0;
if (!exec_path || strcmp(path, exec_path))
list_commands_in_dir(other_cmds, path, prefix);
@@ -187,6 +220,7 @@ void load_command_list(const char *prefix,
sizeof(*other_cmds->names), cmdname_compare);
uniq(other_cmds);
}
+ free(exec_path);
exclude_cmds(other_cmds, main_cmds);
}
@@ -203,13 +237,14 @@ void list_commands(const char *title, struct cmdnames *main_cmds,
longest = other_cmds->names[i]->len;
if (main_cmds->cnt) {
- const char *exec_path = perf_exec_path();
+ char *exec_path = get_argv_exec_path();
printf("available %s in '%s'\n", title, exec_path);
printf("----------------");
mput_char('-', strlen(title) + strlen(exec_path));
putchar('\n');
pretty_print_string_list(main_cmds, longest);
putchar('\n');
+ free(exec_path);
}
if (other_cmds->cnt) {
@@ -231,109 +266,3 @@ int is_in_cmdlist(struct cmdnames *c, const char *s)
return 1;
return 0;
}
-
-static int autocorrect;
-static struct cmdnames aliases;
-
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
-{
- if (!strcmp(var, "help.autocorrect"))
- autocorrect = perf_config_int(var,value);
- /* Also use aliases for command lookup */
- if (!prefixcmp(var, "alias."))
- add_cmdname(&aliases, var + 6, strlen(var + 6));
-
- return perf_default_config(var, value, cb);
-}
-
-static int levenshtein_compare(const void *p1, const void *p2)
-{
- const struct cmdname *const *c1 = p1, *const *c2 = p2;
- const char *s1 = (*c1)->name, *s2 = (*c2)->name;
- int l1 = (*c1)->len;
- int l2 = (*c2)->len;
- return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
-}
-
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
-{
- unsigned int i;
-
- ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
- for (i = 0; i < old->cnt; i++)
- cmds->names[cmds->cnt++] = old->names[i];
- zfree(&old->names);
- old->cnt = 0;
-}
-
-const char *help_unknown_cmd(const char *cmd)
-{
- unsigned int i, n = 0, best_similarity = 0;
- struct cmdnames main_cmds, other_cmds;
-
- memset(&main_cmds, 0, sizeof(main_cmds));
- memset(&other_cmds, 0, sizeof(main_cmds));
- memset(&aliases, 0, sizeof(aliases));
-
- perf_config(perf_unknown_cmd_config, NULL);
-
- load_command_list("perf-", &main_cmds, &other_cmds);
-
- add_cmd_list(&main_cmds, &aliases);
- add_cmd_list(&main_cmds, &other_cmds);
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(main_cmds.names), cmdname_compare);
- uniq(&main_cmds);
-
- if (main_cmds.cnt) {
- /* This reuses cmdname->len for similarity index */
- for (i = 0; i < main_cmds.cnt; ++i)
- main_cmds.names[i]->len =
- levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
-
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(*main_cmds.names), levenshtein_compare);
-
- best_similarity = main_cmds.names[0]->len;
- n = 1;
- while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
- ++n;
- }
-
- if (autocorrect && n == 1) {
- const char *assumed = main_cmds.names[0]->name;
-
- main_cmds.names[0] = NULL;
- clean_cmdnames(&main_cmds);
- fprintf(stderr, "WARNING: You called a perf program named '%s', "
- "which does not exist.\n"
- "Continuing under the assumption that you meant '%s'\n",
- cmd, assumed);
- if (autocorrect > 0) {
- fprintf(stderr, "in %0.1f seconds automatically...\n",
- (float)autocorrect/10.0);
- poll(NULL, 0, autocorrect * 100);
- }
- return assumed;
- }
-
- fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
-
- if (main_cmds.cnt && best_similarity < 6) {
- fprintf(stderr, "\nDid you mean %s?\n",
- n < 2 ? "this": "one of these");
-
- for (i = 0; i < n; i++)
- fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
- }
-
- exit(1);
-}
-
-int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused,
- const char *prefix __maybe_unused)
-{
- printf("perf version %s\n", perf_version_string);
- return 0;
-}
diff --git a/tools/perf/util/help.h b/tools/lib/subcmd/help.h
index 7f5c6dedd714..e145a020780c 100644
--- a/tools/perf/util/help.h
+++ b/tools/lib/subcmd/help.h
@@ -1,12 +1,14 @@
-#ifndef __PERF_HELP_H
-#define __PERF_HELP_H
+#ifndef __SUBCMD_HELP_H
+#define __SUBCMD_HELP_H
+
+#include <sys/types.h>
struct cmdnames {
size_t alloc;
size_t cnt;
struct cmdname {
size_t len; /* also used for similarity index in help.c */
- char name[FLEX_ARRAY];
+ char name[];
} **names;
};
@@ -20,10 +22,13 @@ void load_command_list(const char *prefix,
struct cmdnames *main_cmds,
struct cmdnames *other_cmds);
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len);
+void clean_cmdnames(struct cmdnames *cmds);
+int cmdname_compare(const void *a, const void *b);
+void uniq(struct cmdnames *cmds);
/* Here we require that excludes is a sorted list. */
void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
int is_in_cmdlist(struct cmdnames *c, const char *s);
void list_commands(const char *title, struct cmdnames *main_cmds,
struct cmdnames *other_cmds);
-#endif /* __PERF_HELP_H */
+#endif /* __SUBCMD_HELP_H */
diff --git a/tools/perf/util/pager.c b/tools/lib/subcmd/pager.c
index 53ef006a951c..d50f3b58606b 100644
--- a/tools/perf/util/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -1,6 +1,12 @@
-#include "cache.h"
+#include <sys/select.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include "pager.h"
#include "run-command.h"
#include "sigchain.h"
+#include "subcmd-config.h"
/*
* This is split up from the rest of git so that we can do
@@ -9,6 +15,11 @@
static int spawned_pager;
+void pager_init(const char *pager_env)
+{
+ subcmd_config.pager_env = pager_env;
+}
+
static void pager_preexec(void)
{
/*
@@ -46,7 +57,7 @@ static void wait_for_pager_signal(int signo)
void setup_pager(void)
{
- const char *pager = getenv("PERF_PAGER");
+ const char *pager = getenv(subcmd_config.pager_env);
if (!isatty(1))
return;
@@ -85,11 +96,5 @@ void setup_pager(void)
int pager_in_use(void)
{
- const char *env;
-
- if (spawned_pager)
- return 1;
-
- env = getenv("PERF_PAGER_IN_USE");
- return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0;
+ return spawned_pager;
}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
new file mode 100644
index 000000000000..8b83714ecf73
--- /dev/null
+++ b/tools/lib/subcmd/pager.h
@@ -0,0 +1,9 @@
+#ifndef __SUBCMD_PAGER_H
+#define __SUBCMD_PAGER_H
+
+extern void pager_init(const char *pager_env);
+
+extern void setup_pager(void);
+extern int pager_in_use(void);
+
+#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/perf/util/parse-options.c b/tools/lib/subcmd/parse-options.c
index 9fca09296eb3..981bb4481fd5 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -1,37 +1,66 @@
-#include "util.h"
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+#include "subcmd-util.h"
#include "parse-options.h"
-#include "cache.h"
-#include "header.h"
-#include <linux/string.h>
+#include "subcmd-config.h"
+#include "pager.h"
#define OPT_SHORT 1
#define OPT_UNSET 2
-static struct strbuf error_buf = STRBUF_INIT;
+char *error_buf;
static int opterror(const struct option *opt, const char *reason, int flags)
{
if (flags & OPT_SHORT)
- return error("switch `%c' %s", opt->short_name, reason);
- if (flags & OPT_UNSET)
- return error("option `no-%s' %s", opt->long_name, reason);
- return error("option `%s' %s", opt->long_name, reason);
+ fprintf(stderr, " Error: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Error: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Error: option `%s' %s", opt->long_name, reason);
+
+ return -1;
+}
+
+static const char *skip_prefix(const char *str, const char *prefix)
+{
+ size_t len = strlen(prefix);
+ return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ fprintf(stderr, " Warning: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Warning: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Warning: option `%s' %s", opt->long_name, reason);
}
static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
int flags, const char **arg)
{
+ const char *res;
+
if (p->opt) {
- *arg = p->opt;
+ res = p->opt;
p->opt = NULL;
} else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
**(p->argv + 1) == '-')) {
- *arg = (const char *)opt->defval;
+ res = (const char *)opt->defval;
} else if (p->argc > 1) {
p->argc--;
- *arg = *++p->argv;
+ res = *++p->argv;
} else
return opterror(opt, "requires a value", flags);
+ if (arg)
+ *arg = res;
return 0;
}
@@ -55,11 +84,11 @@ static int get_value(struct parse_opt_ctx_t *p,
if (((flags & OPT_SHORT) && p->excl_opt->short_name) ||
p->excl_opt->long_name == NULL) {
- scnprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
- p->excl_opt->short_name);
+ snprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
+ p->excl_opt->short_name);
} else {
- scnprintf(msg, sizeof(msg), "cannot be used with %s",
- p->excl_opt->long_name);
+ snprintf(msg, sizeof(msg), "cannot be used with %s",
+ p->excl_opt->long_name);
}
opterror(opt, msg, flags);
return -3;
@@ -91,6 +120,64 @@ static int get_value(struct parse_opt_ctx_t *p,
}
}
+ if (opt->flags & PARSE_OPT_NOBUILD) {
+ char reason[128];
+ bool noarg = false;
+
+ err = snprintf(reason, sizeof(reason),
+ opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored because %s " :
+ "is not available because %s",
+ opt->build_opt);
+ reason[sizeof(reason) - 1] = '\0';
+
+ if (err < 0)
+ strncpy(reason, opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored" :
+ "is not available",
+ sizeof(reason));
+
+ if (!(opt->flags & PARSE_OPT_CANSKIP))
+ return opterror(opt, reason, flags);
+
+ err = 0;
+ if (unset)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_NOARG)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ noarg = true;
+
+ switch (opt->type) {
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_BIT:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ noarg = true;
+ break;
+ case OPTION_CALLBACK:
+ case OPTION_STRING:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ case OPTION_LONG:
+ case OPTION_U64:
+ default:
+ break;
+ }
+
+ if (!noarg)
+ err = get_arg(p, opt, flags, NULL);
+ if (err)
+ return err;
+
+ optwarning(opt, reason, flags);
+ return 0;
+ }
+
switch (opt->type) {
case OPTION_BIT:
if (unset)
@@ -327,14 +414,16 @@ match:
return get_value(p, options, flags);
}
- if (ambiguous_option)
- return error("Ambiguous option: %s "
- "(could be --%s%s or --%s%s)",
- arg,
- (ambiguous_flags & OPT_UNSET) ? "no-" : "",
- ambiguous_option->long_name,
- (abbrev_flags & OPT_UNSET) ? "no-" : "",
- abbrev_option->long_name);
+ if (ambiguous_option) {
+ fprintf(stderr,
+ " Error: Ambiguous option: %s (could be --%s%s or --%s%s)",
+ arg,
+ (ambiguous_flags & OPT_UNSET) ? "no-" : "",
+ ambiguous_option->long_name,
+ (abbrev_flags & OPT_UNSET) ? "no-" : "",
+ abbrev_option->long_name);
+ return -1;
+ }
if (abbrev_option)
return get_value(p, abbrev_option, abbrev_flags);
return -2;
@@ -346,7 +435,7 @@ static void check_typos(const char *arg, const struct option *options)
return;
if (!prefixcmp(arg, "no-")) {
- error ("did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
exit(129);
}
@@ -354,14 +443,14 @@ static void check_typos(const char *arg, const struct option *options)
if (!options->long_name)
continue;
if (!prefixcmp(options->long_name, arg)) {
- error ("did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
exit(129);
}
}
}
-void parse_options_start(struct parse_opt_ctx_t *ctx,
- int argc, const char **argv, int flags)
+static void parse_options_start(struct parse_opt_ctx_t *ctx,
+ int argc, const char **argv, int flags)
{
memset(ctx, 0, sizeof(*ctx));
ctx->argc = argc - 1;
@@ -378,9 +467,9 @@ static int usage_with_options_internal(const char * const *,
const struct option *, int,
struct parse_opt_ctx_t *);
-int parse_options_step(struct parse_opt_ctx_t *ctx,
- const struct option *options,
- const char * const usagestr[])
+static int parse_options_step(struct parse_opt_ctx_t *ctx,
+ const struct option *options,
+ const char * const usagestr[])
{
int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
int excl_short_opt = 1;
@@ -489,7 +578,7 @@ exclusive:
return PARSE_OPT_HELP;
}
-int parse_options_end(struct parse_opt_ctx_t *ctx)
+static int parse_options_end(struct parse_opt_ctx_t *ctx)
{
memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
ctx->out[ctx->cpidx + ctx->argc] = NULL;
@@ -501,22 +590,20 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o
{
struct parse_opt_ctx_t ctx;
- perf_env__set_cmdline(&perf_env, argc, argv);
-
/* build usage string if it's not provided */
if (subcommands && !usagestr[0]) {
- struct strbuf buf = STRBUF_INIT;
+ char *buf = NULL;
+
+ astrcatf(&buf, "%s %s [<options>] {", subcmd_config.exec_name, argv[0]);
- strbuf_addf(&buf, "perf %s [<options>] {", argv[0]);
for (int i = 0; subcommands[i]; i++) {
if (i)
- strbuf_addstr(&buf, "|");
- strbuf_addstr(&buf, subcommands[i]);
+ astrcat(&buf, "|");
+ astrcat(&buf, subcommands[i]);
}
- strbuf_addstr(&buf, "}");
+ astrcat(&buf, "}");
- usagestr[0] = strdup(buf.buf);
- strbuf_release(&buf);
+ usagestr[0] = buf;
}
parse_options_start(&ctx, argc, argv, flags);
@@ -541,13 +628,11 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o
putchar('\n');
exit(130);
default: /* PARSE_OPT_UNKNOWN */
- if (ctx.argv[0][1] == '-') {
- strbuf_addf(&error_buf, "unknown option `%s'",
- ctx.argv[0] + 2);
- } else {
- strbuf_addf(&error_buf, "unknown switch `%c'",
- *ctx.opt);
- }
+ if (ctx.argv[0][1] == '-')
+ astrcatf(&error_buf, "unknown option `%s'",
+ ctx.argv[0] + 2);
+ else
+ astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt);
usage_with_options(usagestr, options);
}
@@ -647,6 +732,10 @@ static void print_option_help(const struct option *opts, int full)
pad = USAGE_OPTS_WIDTH;
}
fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+ if (opts->flags & PARSE_OPT_NOBUILD)
+ fprintf(stderr, "%*s(not built-in because %s)\n",
+ USAGE_OPTS_WIDTH + USAGE_GAP, "",
+ opts->build_opt);
}
static int option__cmp(const void *va, const void *vb)
@@ -672,16 +761,18 @@ static int option__cmp(const void *va, const void *vb)
static struct option *options__order(const struct option *opts)
{
- int nr_opts = 0;
+ int nr_opts = 0, len;
const struct option *o = opts;
struct option *ordered;
for (o = opts; o->type != OPTION_END; o++)
++nr_opts;
- ordered = memdup(opts, sizeof(*o) * (nr_opts + 1));
- if (ordered == NULL)
+ len = sizeof(*o) * (nr_opts + 1);
+ ordered = malloc(len);
+ if (!ordered)
goto out;
+ memcpy(ordered, opts, len);
qsort(ordered, nr_opts, sizeof(*o), option__cmp);
out:
@@ -719,9 +810,9 @@ static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx
return false;
}
-int usage_with_options_internal(const char * const *usagestr,
- const struct option *opts, int full,
- struct parse_opt_ctx_t *ctx)
+static int usage_with_options_internal(const char * const *usagestr,
+ const struct option *opts, int full,
+ struct parse_opt_ctx_t *ctx)
{
struct option *ordered;
@@ -730,9 +821,9 @@ int usage_with_options_internal(const char * const *usagestr,
setup_pager();
- if (strbuf_avail(&error_buf)) {
- fprintf(stderr, " Error: %s\n", error_buf.buf);
- strbuf_release(&error_buf);
+ if (error_buf) {
+ fprintf(stderr, " Error: %s\n", error_buf);
+ zfree(&error_buf);
}
fprintf(stderr, "\n Usage: %s\n", *usagestr++);
@@ -768,7 +859,6 @@ int usage_with_options_internal(const char * const *usagestr,
void usage_with_options(const char * const *usagestr,
const struct option *opts)
{
- exit_browser(false);
usage_with_options_internal(usagestr, opts, 0, NULL);
exit(129);
}
@@ -777,13 +867,15 @@ void usage_with_options_msg(const char * const *usagestr,
const struct option *opts, const char *fmt, ...)
{
va_list ap;
-
- exit_browser(false);
+ char *tmp = error_buf;
va_start(ap, fmt);
- strbuf_addv(&error_buf, fmt, ap);
+ if (vasprintf(&error_buf, fmt, ap) == -1)
+ die("vasprintf failed");
va_end(ap);
+ free(tmp);
+
usage_with_options_internal(usagestr, opts, 0, NULL);
exit(129);
}
@@ -853,15 +945,39 @@ int parse_opt_verbosity_cb(const struct option *opt,
return 0;
}
-void set_option_flag(struct option *opts, int shortopt, const char *longopt,
- int flag)
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
{
for (; opts->type != OPTION_END; opts++) {
if ((shortopt && opts->short_name == shortopt) ||
(opts->long_name && longopt &&
- !strcmp(opts->long_name, longopt))) {
- opts->flags |= flag;
- break;
- }
+ !strcmp(opts->long_name, longopt)))
+ return opts;
}
+ return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+ int flag)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (opt)
+ opt->flags |= flag;
+ return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+ const char *longopt,
+ const char *build_opt,
+ bool can_skip)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (!opt)
+ return;
+
+ opt->flags |= PARSE_OPT_NOBUILD;
+ opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+ opt->build_opt = build_opt;
}
diff --git a/tools/perf/util/parse-options.h b/tools/lib/subcmd/parse-options.h
index a8e407bc251e..13a2cc1d6140 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -1,8 +1,8 @@
-#ifndef __PERF_PARSE_OPTIONS_H
-#define __PERF_PARSE_OPTIONS_H
+#ifndef __SUBCMD_PARSE_OPTIONS_H
+#define __SUBCMD_PARSE_OPTIONS_H
-#include <linux/kernel.h>
#include <stdbool.h>
+#include <stdint.h>
enum parse_opt_type {
/* special types */
@@ -41,6 +41,8 @@ enum parse_opt_option_flags {
PARSE_OPT_DISABLED = 32,
PARSE_OPT_EXCLUSIVE = 64,
PARSE_OPT_NOEMPTY = 128,
+ PARSE_OPT_NOBUILD = 256,
+ PARSE_OPT_CANSKIP = 512,
};
struct option;
@@ -96,6 +98,7 @@ struct option {
void *value;
const char *argh;
const char *help;
+ const char *build_opt;
int flags;
parse_opt_cb *callback;
@@ -149,6 +152,9 @@ struct option {
/* parse_options() will filter out the processed options and leave the
* non-option argments in argv[].
* Returns the number of arguments left in argv[].
+ *
+ * NOTE: parse_options() and parse_options_subcommand() may call exit() in the
+ * case of an error (or for 'special' options like --list-cmds or --list-opts).
*/
extern int parse_options(int argc, const char **argv,
const struct option *options,
@@ -195,15 +201,6 @@ extern int parse_options_usage(const char * const *usagestr,
const char *optstr,
bool short_opt);
-extern void parse_options_start(struct parse_opt_ctx_t *ctx,
- int argc, const char **argv, int flags);
-
-extern int parse_options_step(struct parse_opt_ctx_t *ctx,
- const struct option *options,
- const char * const usagestr[]);
-
-extern int parse_options_end(struct parse_opt_ctx_t *ctx);
-
/*----- some often used options -----*/
extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
@@ -226,4 +223,7 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
extern const char *parse_options_fix_filename(const char *prefix, const char *file);
void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
-#endif /* __PERF_PARSE_OPTIONS_H */
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+ const char *build_opt, bool can_skip);
+
+#endif /* __SUBCMD_PARSE_OPTIONS_H */
diff --git a/tools/perf/util/run-command.c b/tools/lib/subcmd/run-command.c
index 34622b53e733..f4f6c9eb8e59 100644
--- a/tools/perf/util/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -1,7 +1,15 @@
-#include "cache.h"
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include "subcmd-util.h"
#include "run-command.h"
-#include "exec_cmd.h"
-#include "debug.h"
+#include "exec-cmd.h"
+
+#define STRERR_BUFSIZE 128
static inline void close_pair(int fd[2])
{
@@ -112,8 +120,8 @@ int start_command(struct child_process *cmd)
}
if (cmd->preexec_cb)
cmd->preexec_cb();
- if (cmd->perf_cmd) {
- execv_perf_cmd(cmd->argv);
+ if (cmd->exec_cmd) {
+ execv_cmd(cmd->argv);
} else {
execvp(cmd->argv[0], (char *const*) cmd->argv);
}
@@ -164,8 +172,8 @@ static int wait_or_whine(pid_t pid)
if (waiting < 0) {
if (errno == EINTR)
continue;
- error("waitpid failed (%s)",
- strerror_r(errno, sbuf, sizeof(sbuf)));
+ fprintf(stderr, " Error: waitpid failed (%s)",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
return -ERR_RUN_COMMAND_WAITPID;
}
if (waiting != pid)
@@ -207,7 +215,7 @@ static void prepare_run_command_v_opt(struct child_process *cmd,
memset(cmd, 0, sizeof(*cmd));
cmd->argv = argv;
cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
- cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0;
+ cmd->exec_cmd = opt & RUN_EXEC_CMD ? 1 : 0;
cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
}
diff --git a/tools/perf/util/run-command.h b/tools/lib/subcmd/run-command.h
index 1ef264d5069c..fe2befea1e73 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/lib/subcmd/run-command.h
@@ -1,5 +1,7 @@
-#ifndef __PERF_RUN_COMMAND_H
-#define __PERF_RUN_COMMAND_H
+#ifndef __SUBCMD_RUN_COMMAND_H
+#define __SUBCMD_RUN_COMMAND_H
+
+#include <unistd.h>
enum {
ERR_RUN_COMMAND_FORK = 10000,
@@ -41,7 +43,7 @@ struct child_process {
unsigned no_stdin:1;
unsigned no_stdout:1;
unsigned no_stderr:1;
- unsigned perf_cmd:1; /* if this is to be perf sub-command */
+ unsigned exec_cmd:1; /* if this is to be external sub-command */
unsigned stdout_to_stderr:1;
void (*preexec_cb)(void);
};
@@ -51,8 +53,8 @@ int finish_command(struct child_process *);
int run_command(struct child_process *);
#define RUN_COMMAND_NO_STDIN 1
-#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */
+#define RUN_EXEC_CMD 2 /*If this is to be external sub-command */
#define RUN_COMMAND_STDOUT_TO_STDERR 4
int run_command_v_opt(const char **argv, int opt);
-#endif /* __PERF_RUN_COMMAND_H */
+#endif /* __SUBCMD_RUN_COMMAND_H */
diff --git a/tools/perf/util/sigchain.c b/tools/lib/subcmd/sigchain.c
index ba785e9b1841..3537c348a18e 100644
--- a/tools/perf/util/sigchain.c
+++ b/tools/lib/subcmd/sigchain.c
@@ -1,5 +1,6 @@
+#include <signal.h>
+#include "subcmd-util.h"
#include "sigchain.h"
-#include "cache.h"
#define SIGCHAIN_MAX_SIGNALS 32
diff --git a/tools/perf/util/sigchain.h b/tools/lib/subcmd/sigchain.h
index 959d64eb5557..0c919f2874ca 100644
--- a/tools/perf/util/sigchain.h
+++ b/tools/lib/subcmd/sigchain.h
@@ -1,5 +1,5 @@
-#ifndef __PERF_SIGCHAIN_H
-#define __PERF_SIGCHAIN_H
+#ifndef __SUBCMD_SIGCHAIN_H
+#define __SUBCMD_SIGCHAIN_H
typedef void (*sigchain_fun)(int);
@@ -7,4 +7,4 @@ int sigchain_pop(int sig);
void sigchain_push_common(sigchain_fun f);
-#endif /* __PERF_SIGCHAIN_H */
+#endif /* __SUBCMD_SIGCHAIN_H */
diff --git a/tools/lib/subcmd/subcmd-config.c b/tools/lib/subcmd/subcmd-config.c
new file mode 100644
index 000000000000..d017c728bd1b
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.c
@@ -0,0 +1,11 @@
+#include "subcmd-config.h"
+
+#define UNDEFINED "SUBCMD_HAS_NOT_BEEN_INITIALIZED"
+
+struct subcmd_config subcmd_config = {
+ .exec_name = UNDEFINED,
+ .prefix = UNDEFINED,
+ .exec_path = UNDEFINED,
+ .exec_path_env = UNDEFINED,
+ .pager_env = UNDEFINED,
+};
diff --git a/tools/lib/subcmd/subcmd-config.h b/tools/lib/subcmd/subcmd-config.h
new file mode 100644
index 000000000000..cc8514030b5c
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.h
@@ -0,0 +1,14 @@
+#ifndef __PERF_SUBCMD_CONFIG_H
+#define __PERF_SUBCMD_CONFIG_H
+
+struct subcmd_config {
+ const char *exec_name;
+ const char *prefix;
+ const char *exec_path;
+ const char *exec_path_env;
+ const char *pager_env;
+};
+
+extern struct subcmd_config subcmd_config;
+
+#endif /* __PERF_SUBCMD_CONFIG_H */
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
new file mode 100644
index 000000000000..fc2e45d8aaf1
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -0,0 +1,91 @@
+#ifndef __SUBCMD_UTIL_H
+#define __SUBCMD_UTIL_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NORETURN __attribute__((__noreturn__))
+
+static inline void report(const char *prefix, const char *err, va_list params)
+{
+ char msg[1024];
+ vsnprintf(msg, sizeof(msg), err, params);
+ fprintf(stderr, " %s%s\n", prefix, msg);
+}
+
+static NORETURN inline void die(const char *err, ...)
+{
+ va_list params;
+
+ va_start(params, err);
+ report(" Fatal: ", err, params);
+ exit(128);
+ va_end(params);
+}
+
+#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+ do { \
+ if ((nr) > alloc) { \
+ if (alloc_nr(alloc) < (nr)) \
+ alloc = (nr); \
+ else \
+ alloc = alloc_nr(alloc); \
+ x = xrealloc((x), alloc * sizeof(*(x))); \
+ } \
+ } while(0)
+
+static inline void *xrealloc(void *ptr, size_t size)
+{
+ void *ret = realloc(ptr, size);
+ if (!ret && !size)
+ ret = realloc(ptr, 1);
+ if (!ret) {
+ ret = realloc(ptr, size);
+ if (!ret && !size)
+ ret = realloc(ptr, 1);
+ if (!ret)
+ die("Out of memory, realloc failed");
+ }
+ return ret;
+}
+
+#define astrcatf(out, fmt, ...) \
+({ \
+ char *tmp = *(out); \
+ if (asprintf((out), "%s" fmt, tmp ?: "", ## __VA_ARGS__) == -1) \
+ die("asprintf failed"); \
+ free(tmp); \
+})
+
+static inline void astrcat(char **out, const char *add)
+{
+ char *tmp = *out;
+
+ if (asprintf(out, "%s%s", tmp ?: "", add) == -1)
+ die("asprintf failed");
+
+ free(tmp);
+}
+
+static inline int prefixcmp(const char *str, const char *prefix)
+{
+ for (; ; str++, prefix++)
+ if (!*prefix)
+ return 0;
+ else if (*str != *prefix)
+ return (unsigned char)*prefix - (unsigned char)*str;
+}
+
+#endif /* __SUBCMD_UTIL_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 2a912df6771b..ea69ce35e902 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4735,73 +4735,80 @@ static int is_printable_array(char *p, unsigned int len)
return 1;
}
-static void print_event_fields(struct trace_seq *s, void *data,
- int size __maybe_unused,
- struct event_format *event)
+void pevent_print_field(struct trace_seq *s, void *data,
+ struct format_field *field)
{
- struct format_field *field;
unsigned long long val;
unsigned int offset, len, i;
-
- field = event->format.fields;
- while (field) {
- trace_seq_printf(s, " %s=", field->name);
- if (field->flags & FIELD_IS_ARRAY) {
- offset = field->offset;
- len = field->size;
- if (field->flags & FIELD_IS_DYNAMIC) {
- val = pevent_read_number(event->pevent, data + offset, len);
- offset = val;
- len = offset >> 16;
- offset &= 0xffff;
- }
- if (field->flags & FIELD_IS_STRING &&
- is_printable_array(data + offset, len)) {
- trace_seq_printf(s, "%s", (char *)data + offset);
- } else {
- trace_seq_puts(s, "ARRAY[");
- for (i = 0; i < len; i++) {
- if (i)
- trace_seq_puts(s, ", ");
- trace_seq_printf(s, "%02x",
- *((unsigned char *)data + offset + i));
- }
- trace_seq_putc(s, ']');
- field->flags &= ~FIELD_IS_STRING;
- }
+ struct pevent *pevent = field->event->pevent;
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = pevent_read_number(pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ trace_seq_printf(s, "%s", (char *)data + offset);
} else {
- val = pevent_read_number(event->pevent, data + field->offset,
- field->size);
- if (field->flags & FIELD_IS_POINTER) {
- trace_seq_printf(s, "0x%llx", val);
- } else if (field->flags & FIELD_IS_SIGNED) {
- switch (field->size) {
- case 4:
- /*
- * If field is long then print it in hex.
- * A long usually stores pointers.
- */
- if (field->flags & FIELD_IS_LONG)
- trace_seq_printf(s, "0x%x", (int)val);
- else
- trace_seq_printf(s, "%d", (int)val);
- break;
- case 2:
- trace_seq_printf(s, "%2d", (short)val);
- break;
- case 1:
- trace_seq_printf(s, "%1d", (char)val);
- break;
- default:
- trace_seq_printf(s, "%lld", val);
- }
- } else {
+ trace_seq_puts(s, "ARRAY[");
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_puts(s, ", ");
+ trace_seq_printf(s, "%02x",
+ *((unsigned char *)data + offset + i));
+ }
+ trace_seq_putc(s, ']');
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else {
+ val = pevent_read_number(pevent, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_POINTER) {
+ trace_seq_printf(s, "0x%llx", val);
+ } else if (field->flags & FIELD_IS_SIGNED) {
+ switch (field->size) {
+ case 4:
+ /*
+ * If field is long then print it in hex.
+ * A long usually stores pointers.
+ */
if (field->flags & FIELD_IS_LONG)
- trace_seq_printf(s, "0x%llx", val);
+ trace_seq_printf(s, "0x%x", (int)val);
else
- trace_seq_printf(s, "%llu", val);
+ trace_seq_printf(s, "%d", (int)val);
+ break;
+ case 2:
+ trace_seq_printf(s, "%2d", (short)val);
+ break;
+ case 1:
+ trace_seq_printf(s, "%1d", (char)val);
+ break;
+ default:
+ trace_seq_printf(s, "%lld", val);
}
+ } else {
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%llx", val);
+ else
+ trace_seq_printf(s, "%llu", val);
}
+ }
+}
+
+void pevent_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event)
+{
+ struct format_field *field;
+
+ field = event->format.fields;
+ while (field) {
+ trace_seq_printf(s, " %s=", field->name);
+ pevent_print_field(s, data, field);
field = field->next;
}
}
@@ -4827,7 +4834,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
if (event->flags & EVENT_FL_FAILED) {
trace_seq_printf(s, "[FAILED TO PARSE]");
- print_event_fields(s, data, size, event);
+ pevent_print_fields(s, data, size, event);
return;
}
@@ -4968,13 +4975,12 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
sizeof(long) != 8) {
char *p;
- ls = 2;
/* make %l into %ll */
- p = strchr(format, 'l');
- if (p)
+ if (ls == 1 && (p = strchr(format, 'l')))
memmove(p+1, p, strlen(p)+1);
else if (strcmp(format, "%p") == 0)
strcpy(format, "0x%llx");
+ ls = 2;
}
switch (ls) {
case -2:
@@ -5302,7 +5308,7 @@ void pevent_event_info(struct trace_seq *s, struct event_format *event,
int print_pretty = 1;
if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
- print_event_fields(s, record->data, record->size, event);
+ pevent_print_fields(s, record->data, record->size, event);
else {
if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 6fc83c7edbe9..706d9bc24066 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -705,6 +705,10 @@ struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *com
struct cmdline *next);
int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline);
+void pevent_print_field(struct trace_seq *s, void *data,
+ struct format_field *field);
+void pevent_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event);
void pevent_event_info(struct trace_seq *s, struct event_format *event,
struct pevent_record *record);
int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
diff --git a/tools/lib/util/find_next_bit.c b/tools/lib/util/find_next_bit.c
deleted file mode 100644
index 41b44f65a79e..000000000000
--- a/tools/lib/util/find_next_bit.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* find_next_bit.c: fallback find next bit implementation
- *
- * Copied from lib/find_next_bit.c to tools/lib/next_bit.c
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/bitops.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
-
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- const unsigned long *p = addr + BITOP_WORD(offset);
- unsigned long result = offset & ~(BITS_PER_LONG-1);
- unsigned long tmp;
-
- if (offset >= size)
- return size;
- size -= result;
- offset %= BITS_PER_LONG;
- if (offset) {
- tmp = *(p++);
- tmp &= (~0UL << offset);
- if (size < BITS_PER_LONG)
- goto found_first;
- if (tmp)
- goto found_middle;
- size -= BITS_PER_LONG;
- result += BITS_PER_LONG;
- }
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++)))
- goto found_middle;
- result += BITS_PER_LONG;
- size -= BITS_PER_LONG;
- }
- if (!size)
- return result;
- tmp = *p;
-
-found_first:
- tmp &= (~0UL >> (BITS_PER_LONG - size));
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
-found_middle:
- return result + __ffs(tmp);
-}
-#endif
-
-#ifndef find_first_bit
-/*
- * Find the first set bit in a memory region.
- */
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
-{
- const unsigned long *p = addr;
- unsigned long result = 0;
- unsigned long tmp;
-
- while (size & ~(BITS_PER_LONG-1)) {
- if ((tmp = *(p++)))
- goto found;
- result += BITS_PER_LONG;
- size -= BITS_PER_LONG;
- }
- if (!size)
- return result;
-
- tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
- if (tmp == 0UL) /* Are any bits set? */
- return result + size; /* Nope. */
-found:
- return result + __ffs(tmp);
-}
-#endif
diff --git a/tools/perf/Build b/tools/perf/Build
index 72237455b400..6b67e6f4179f 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -1,5 +1,6 @@
perf-y += builtin-bench.o
perf-y += builtin-annotate.o
+perf-y += builtin-config.o
perf-y += builtin-diff.o
perf-y += builtin-evlist.o
perf-y += builtin-help.o
@@ -19,6 +20,7 @@ perf-y += builtin-kvm.o
perf-y += builtin-inject.o
perf-y += builtin-mem.o
perf-y += builtin-data.o
+perf-y += builtin-version.o
perf-$(CONFIG_AUDIT) += builtin-trace.o
perf-$(CONFIG_LIBELF) += builtin-probe.o
@@ -34,8 +36,12 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
CFLAGS_builtin-help.o += $(paths)
CFLAGS_builtin-timechart.o += $(paths)
-CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE
+CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \
+ -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \
+ -DPREFIX="BUILD_STR($(prefix_SQ))" \
+ -include $(OUTPUT)PERF-VERSION-FILE
CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
+CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
libperf-y += util/
libperf-y += arch/
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
new file mode 100644
index 000000000000..b9ca1e304158
--- /dev/null
+++ b/tools/perf/Documentation/perf-config.txt
@@ -0,0 +1,103 @@
+perf-config(1)
+==============
+
+NAME
+----
+perf-config - Get and set variables in a configuration file.
+
+SYNOPSIS
+--------
+[verse]
+'perf config' -l | --list
+
+DESCRIPTION
+-----------
+You can manage variables in a configuration file with this command.
+
+OPTIONS
+-------
+
+-l::
+--list::
+ Show current config variables, name and value, for all sections.
+
+CONFIGURATION FILE
+------------------
+
+The perf configuration file contains many variables to change various
+aspects of each of its tools, including output, disk usage, etc.
+The '$HOME/.perfconfig' file is used to store a per-user configuration.
+The file '$(sysconfdir)/perfconfig' can be used to
+store a system-wide default configuration.
+
+Syntax
+~~~~~~
+
+The file consist of sections. A section starts with its name
+surrounded by square brackets and continues till the next section
+begins. Each variable must be in a section, and have the form
+'name = value', for example:
+
+ [section]
+ name1 = value1
+ name2 = value2
+
+Section names are case sensitive and can contain any characters except
+newline (double quote `"` and backslash have to be escaped as `\"` and `\\`,
+respectively). Section headers can't span multiple lines.
+
+Example
+~~~~~~~
+
+Given a $HOME/.perfconfig like this:
+
+#
+# This is the config file, and
+# a '#' and ';' character indicates a comment
+#
+
+ [colors]
+ # Color variables
+ top = red, default
+ medium = green, default
+ normal = lightgray, default
+ selected = white, lightgray
+ code = blue, default
+ addr = magenta, default
+ root = white, blue
+
+ [tui]
+ # Defaults if linked with libslang
+ report = on
+ annotate = on
+ top = on
+
+ [buildid]
+ # Default, disable using /dev/null
+ dir = ~/.debug
+
+ [annotate]
+ # Defaults
+ hide_src_code = false
+ use_offset = true
+ jump_arrows = true
+ show_nr_jumps = false
+
+ [help]
+ # Format can be man, info, web or html
+ format = man
+ autocorrect = 0
+
+ [ui]
+ show-headers = true
+
+ [call-graph]
+ # fp (framepointer), dwarf
+ record-mode = fp
+ print-type = graph
+ order = caller
+ sort-key = function
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 1ceb3700ffbb..6f7200fb85cf 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -32,6 +32,9 @@ OPTIONS
--group::
Show event group information.
+--trace-fields::
+ Show tracepoint field names.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e630a7d2c348..3a1a32f5479f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -207,11 +207,23 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-
In per-thread mode with inheritance mode on (default), samples are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
+-B::
+--no-buildid::
+Do not save the build ids of binaries in the perf.data files. This skips
+post processing after recording, which sometimes makes the final step in
+the recording process to take a long time, as it needs to process all
+events looking for mmap records. The downside is that it can misresolve
+symbols if the workload binaries used when recording get locally rebuilt
+or upgraded, because the only key available in this case is the
+pathname. You can also set the "record.build-id" config variable to
+'skip to have this behaviour permanently.
+
-N::
--no-buildid-cache::
Do not update the buildid cache. This saves some overhead in situations
where the information in the perf.data file (which includes buildids)
-is sufficient.
+is sufficient. You can also set the "record.build-id" config variable to
+'no-cache' to have the same effect.
-G name,...::
--cgroup name,...::
@@ -314,11 +326,17 @@ This option sets the time out limit. The default value is 500 ms.
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
---clang-path::
+--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
+(enabled when BPF support is on)
---clang-opt::
+--clang-opt=OPTIONS::
Options passed to clang when compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--vmlinux=PATH::
+Specify vmlinux path which has debuginfo.
+(enabled when BPF prologue is on)
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 5ce8da1e1256..8a301f6afb37 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -117,6 +117,30 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
+ If the data file has tracepoint event(s), following (dynamic) sort keys
+ are also available:
+ trace, trace_fields, [<event>.]<field>[/raw]
+
+ - trace: pretty printed trace output in a single column
+ - trace_fields: fields in tracepoints in separate columns
+ - <field name>: optional event and field name for a specific field
+
+ The last form consists of event and field names. If event name is
+ omitted, it searches all events for matching field name. The matched
+ field will be shown only for the event has the field. The event name
+ supports substring match so user doesn't need to specify full subsystem
+ and event name everytime. For example, 'sched:sched_switch' event can
+ be shortened to 'switch' as long as it's not ambiguous. Also event can
+ be specified by its index (starting from 1) preceded by the '%'.
+ So '%1' is the first event, '%2' is the second, and so on.
+
+ The field name can have '/raw' suffix which disables pretty printing
+ and shows raw field value like hex numbers. The --raw-trace option
+ has the same effect for all dynamic sort keys.
+
+ The default sort keys are changed to 'trace' if all events in the data
+ file are tracepoint.
+
-F::
--fields=::
Specify output field - multiple keys can be specified in CSV format.
@@ -170,17 +194,18 @@ OPTIONS
Dump raw trace in ASCII.
-g::
---call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>::
+--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
Display call chains using type, min percent threshold, print limit,
- call order, sort key and branch. Note that ordering of parameters is not
- fixed so any parement can be given in an arbitraty order. One exception
- is the print_limit which should be preceded by threshold.
+ call order, sort key, optional branch and value. Note that ordering of
+ parameters is not fixed so any parement can be given in an arbitraty order.
+ One exception is the print_limit which should be preceded by threshold.
print_type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates. (default)
- fractal: like graph, but displays relative rates. Each branch of
the tree is considered as a new profiled object.
+ - folded: call chains are displayed in a line, separated by semicolons
- none: disable call chain display.
threshold is a percentage value which specifies a minimum percent to be
@@ -204,6 +229,11 @@ OPTIONS
- branch: include last branch information in callgraph when available.
Usually more convenient to use --branch-history for this.
+ value can be:
+ - percent: diplay overhead percent (default)
+ - period: display event period
+ - count: display event count
+
--children::
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
@@ -365,6 +395,9 @@ include::itrace.txt[]
--socket-filter::
Only report the samples on the processor socket that match with this filter
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
include::callchain-overhead-calculation.txt[]
SEE ALSO
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4e074a660826..52ef7a9d50aa 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -10,6 +10,8 @@ SYNOPSIS
[verse]
'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' report [-i file]
DESCRIPTION
-----------
@@ -22,6 +24,11 @@ OPTIONS
<command>...::
Any command you can specify in a shell.
+record::
+ See STAT RECORD.
+
+report::
+ See STAT REPORT.
-e::
--event=::
@@ -159,6 +166,33 @@ filter out the startup phase of the program, which is often very different.
Print statistics of transactional execution if supported.
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+STAT REPORT
+-----------
+Reads and reports stat data from perf data file.
+
+-i file::
+--input file::
+Input file name.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 556cec09bf50..b0e60e17db38 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -230,6 +230,9 @@ Default is to monitor all CPUS.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
new file mode 100644
index 000000000000..a1c10e360db5
--- /dev/null
+++ b/tools/perf/Documentation/tips.txt
@@ -0,0 +1,14 @@
+For a higher level overview, try: perf report --sort comm,dso
+Sample related events with: perf record -e '{cycles,instructions}:S'
+Compare performance results with: perf diff [<old file> <new file>]
+Boolean options have negative forms, e.g.: perf report --no-children
+Customize output of perf script with: perf script -F event,ip,sym
+Generate a script for your data: perf script -g <lang>
+Save output of perf stat using: perf stat record <target workload>
+Create an archive with symtabs to analyse on other machine: perf archive
+Search options using a keyword: perf report -h <keyword>
+Use parent filter to see specific call path: perf report -p <regex>
+List events using substring match: perf list <keyword>
+To see list of saved events and attributes: perf evlist -v
+Use --symfs <dir> if your symbol files are in non-standard locations
+To see callchains in a more compact form: perf report -g folded
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 39c38cb45b00..ddf922f93aa1 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,6 +1,7 @@
tools/perf
tools/arch/alpha/include/asm/barrier.h
tools/arch/arm/include/asm/barrier.h
+tools/arch/arm64/include/asm/barrier.h
tools/arch/ia64/include/asm/barrier.h
tools/arch/mips/include/asm/barrier.h
tools/arch/powerpc/include/asm/barrier.h
@@ -20,14 +21,17 @@ tools/lib/traceevent
tools/lib/bpf
tools/lib/api
tools/lib/bpf
+tools/lib/subcmd
tools/lib/hweight.c
tools/lib/rbtree.c
+tools/lib/string.c
tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
-tools/lib/util/find_next_bit.c
+tools/lib/find_bit.c
tools/include/asm/atomic.h
tools/include/asm/barrier.h
tools/include/asm/bug.h
+tools/include/asm-generic/atomic-gcc.h
tools/include/asm-generic/barrier.h
tools/include/asm-generic/bitops/arch_hweight.h
tools/include/asm-generic/bitops/atomic.h
@@ -50,6 +54,7 @@ tools/include/linux/log2.h
tools/include/linux/poison.h
tools/include/linux/rbtree.h
tools/include/linux/rbtree_augmented.h
+tools/include/linux/string.h
tools/include/linux/types.h
tools/include/linux/err.h
include/asm-generic/bitops/arch_hweight.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0d19d5447d6c..0a22407e1d7d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -145,9 +145,10 @@ BISON = bison
STRIP = strip
AWK = awk
-LIB_DIR = $(srctree)/tools/lib/api/
+LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
-BPF_DIR = $(srctree)/tools/lib/bpf/
+BPF_DIR = $(srctree)/tools/lib/bpf/
+SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
# include config/Makefile by default and rule out
# non-config cases
@@ -184,15 +185,17 @@ strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
+ SUBCMD_PATH=$(OUTPUT)
ifneq ($(subdir),)
- LIB_PATH=$(OUTPUT)/../lib/api/
+ API_PATH=$(OUTPUT)/../lib/api/
else
- LIB_PATH=$(OUTPUT)
+ API_PATH=$(OUTPUT)
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
- LIB_PATH=$(LIB_DIR)
+ API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
+ SUBCMD_PATH=$(SUBCMD_DIR)
endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
@@ -201,11 +204,13 @@ export LIBTRACEEVENT
LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
-LIBAPI = $(LIB_PATH)libapi.a
+LIBAPI = $(API_PATH)libapi.a
export LIBAPI
LIBBPF = $(BPF_PATH)libbpf.a
+LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+
# python extension build directories
PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
@@ -257,7 +262,7 @@ export PERL_PATH
LIB_FILE=$(OUTPUT)libperf.a
-PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT)
+PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
ifndef NO_LIBBPF
PERFLIBS += $(LIBBPF)
endif
@@ -420,7 +425,7 @@ $(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
-install-traceevent-plugins: $(LIBTRACEEVENT)
+install-traceevent-plugins: libtraceevent_plugins
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
$(LIBAPI): fixdep FORCE
@@ -431,12 +436,19 @@ $(LIBAPI)-clean:
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBBPF): fixdep FORCE
- $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a
+ $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
+$(LIBSUBCMD): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+
+$(LIBSUBCMD)-clean:
+ $(call QUIET_CLEAN, libsubcmd)
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+
help:
@echo 'Perf make targets:'
@echo ' doc - make *all* documentation (see below)'
@@ -476,7 +488,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
$(DOC_TARGETS):
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
-TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol ../include ../lib/bpf
+TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
TAGS:
@@ -555,6 +567,9 @@ endif
$(call QUIET_INSTALL, perf_completion-script) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, perf-tip) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
+ $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
install-tests: all install-gtk
$(call QUIET_INSTALL, tests) \
@@ -582,15 +597,16 @@ $(INSTALL_DOC_TARGETS):
#
config-clean:
$(call QUIET_CLEAN, config)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean config-clean
+clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
- $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
- $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
+ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 7ed00f4b0908..b48de2f5813c 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -2,10 +2,10 @@
#define ARCH_TESTS_H
/* Tests */
-int test__rdpmc(void);
-int test__perf_time_to_tsc(void);
-int test__insn_x86(void);
-int test__intel_cqm_count_nmi_context(void);
+int test__rdpmc(int subtest);
+int test__perf_time_to_tsc(int subtest);
+int test__insn_x86(int subtest);
+int test__intel_cqm_count_nmi_context(int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index b6115dfd28f0..08d9b2bc185c 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
* verbose (-v) option to see all the instructions and whether or not they
* decoded successfuly.
*/
-int test__insn_x86(void)
+int test__insn_x86(int subtest __maybe_unused)
{
int ret = 0;
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index d28c1b6a3b54..3e89ba825f6b 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -33,7 +33,7 @@ static pid_t spawn(void)
* the last read counter value to avoid triggering a WARN_ON_ONCE() in
* smp_call_function_many() caused by sending IPIs from NMI context.
*/
-int test__intel_cqm_count_nmi_context(void)
+int test__intel_cqm_count_nmi_context(int subtest __maybe_unused)
{
struct perf_evlist *evlist = NULL;
struct perf_evsel *evsel = NULL;
@@ -54,7 +54,7 @@ int test__intel_cqm_count_nmi_context(void)
ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
if (ret) {
- pr_debug("parse_events failed\n");
+ pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;
goto out;
}
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 658cd200af74..9d29ee283ac5 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -35,13 +35,12 @@
* %0 is returned, otherwise %-1 is returned. If TSC conversion is not
* supported then then the test passes but " (not supported)" is printed.
*/
-int test__perf_time_to_tsc(void)
+int test__perf_time_to_tsc(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
- .freq = 4000,
.target = {
.uses_mmap = true,
},
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
index e7688214c7cf..7bb0d13c235f 100644
--- a/tools/perf/arch/x86/tests/rdpmc.c
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -149,7 +149,7 @@ out_close:
return 0;
}
-int test__rdpmc(void)
+int test__rdpmc(int subtest __maybe_unused)
{
int status = 0;
int wret = 0;
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index ff63649fa9ac..465970370f3e 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -5,6 +5,7 @@ libperf-y += kvm-stat.o
libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 9b94ce520917..8d8150f1cf9b 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -327,7 +327,7 @@ static int intel_bts_snapshot_start(struct auxtrace_record *itr)
evlist__for_each(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__disable_event(btsr->evlist, evsel);
+ return perf_evsel__disable(evsel);
}
return -EINVAL;
}
@@ -340,7 +340,7 @@ static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
evlist__for_each(btsr->evlist, evsel) {
if (evsel->attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__enable_event(btsr->evlist, evsel);
+ return perf_evsel__enable(evsel);
}
return -EINVAL;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index b02af064f0f9..f05daacc9e78 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -26,7 +26,7 @@
#include "../../util/evlist.h"
#include "../../util/evsel.h"
#include "../../util/cpumap.h"
-#include "../../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../../util/parse-events.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
@@ -725,7 +725,7 @@ static int intel_pt_snapshot_start(struct auxtrace_record *itr)
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__disable_event(ptr->evlist, evsel);
+ return perf_evsel__disable(evsel);
}
return -EINVAL;
}
@@ -738,7 +738,7 @@ static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
evlist__for_each(ptr->evlist, evsel) {
if (evsel->attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__enable_event(ptr->evlist, evsel);
+ return perf_evsel__enable(evsel);
}
return -EINVAL;
}
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index fc9bebd2cca0..0999ac536d86 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index bc6a16adbca8..6a18ce21f865 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -5,7 +5,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index ad0d9b5342fb..718238683013 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6d8c9fa2a16c..91aaf2a1fa90 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -10,7 +10,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index e5e41d3bdce7..f416bd705f66 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "bench.h"
#include "futex.h"
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9419b944220f..a91aa85d80ff 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -8,7 +8,7 @@
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/header.h"
#include "../util/cloexec.h"
#include "bench.h"
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 492df2752a2d..5049d6357a46 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -7,7 +7,7 @@
#include "../perf.h"
#include "../builtin.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../util/cloexec.h"
#include "bench.h"
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index d4ff1b539cfd..bfaf9503de8e 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -11,7 +11,7 @@
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../builtin.h"
#include "bench.h"
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 005cc283790c..1dc2d13cc272 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -10,7 +10,7 @@
*/
#include "../perf.h"
#include "../util/util.h"
-#include "../util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "../builtin.h"
#include "bench.h"
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 2bf9b3fd9e61..cc5c1267c738 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -21,7 +21,7 @@
#include "util/evsel.h"
#include "util/annotate.h"
#include "util/event.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/thread.h"
#include "util/sort.h"
@@ -47,7 +47,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
- struct perf_sample *sample __maybe_unused,
+ struct perf_sample *sample,
struct addr_location *al,
struct perf_annotate *ann)
{
@@ -72,7 +72,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
- he = __hists__add_entry(hists, al, NULL, NULL, NULL, 1, 1, 0, true);
+ sample->period = 1;
+ sample->weight = 1;
+
+ he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
if (he == NULL)
return -ENOMEM;
@@ -343,18 +346,19 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
return ret;
argc = parse_options(argc, argv, options, annotate_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(annotate_usage, options);
- if (annotate.use_stdio)
- use_browser = 0;
- else if (annotate.use_tui)
- use_browser = 1;
- else if (annotate.use_gtk)
- use_browser = 2;
+ annotate.sym_hist_filter = argv[0];
+ }
file.path = input_name;
- setup_browser(true);
-
annotate.session = perf_session__new(&file, false, &annotate.tool);
if (annotate.session == NULL)
return -1;
@@ -366,19 +370,17 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (ret < 0)
goto out_delete;
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
usage_with_options(annotate_usage, options);
- if (argc) {
- /*
- * Special case: if there's an argument left then assume that
- * it's a symbol filter:
- */
- if (argc > 1)
- usage_with_options(annotate_usage, options);
+ if (annotate.use_stdio)
+ use_browser = 0;
+ else if (annotate.use_tui)
+ use_browser = 1;
+ else if (annotate.use_gtk)
+ use_browser = 2;
- annotate.sym_hist_filter = argv[0];
- }
+ setup_browser(true);
ret = __cmd_annotate(&annotate);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b17aed36ca16..a1cddc6bbf0f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -16,7 +16,7 @@
*/
#include "perf.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "builtin.h"
#include "bench/bench.h"
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 7b8450cd33c2..d93bff7fc0e4 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -16,7 +16,7 @@
#include "util/cache.h"
#include "util/debug.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/strlist.h"
#include "util/build-id.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 918b4de29de4..5e914ee79eb3 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -12,7 +12,7 @@
#include "util/build-id.h"
#include "util/cache.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/session.h"
#include "util/symbol.h"
#include "util/data.h"
@@ -110,7 +110,7 @@ int cmd_buildid_list(int argc, const char **argv,
setup_pager();
if (show_kernel)
- return sysfs__fprintf_build_id(stdout);
+ return !(sysfs__fprintf_build_id(stdout) > 0);
return perf_session__list_build_ids(force, with_hits);
}
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
new file mode 100644
index 000000000000..f04e804a9fad
--- /dev/null
+++ b/tools/perf/builtin-config.c
@@ -0,0 +1,66 @@
+/*
+ * builtin-config.c
+ *
+ * Copyright (C) 2015, Taeung Song <treeze.taeung@gmail.com>
+ *
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/cache.h"
+#include <subcmd/parse-options.h>
+#include "util/util.h"
+#include "util/debug.h"
+
+static const char * const config_usage[] = {
+ "perf config [options]",
+ NULL
+};
+
+enum actions {
+ ACTION_LIST = 1
+} actions;
+
+static struct option config_options[] = {
+ OPT_SET_UINT('l', "list", &actions,
+ "show current config variables", ACTION_LIST),
+ OPT_END()
+};
+
+static int show_config(const char *key, const char *value,
+ void *cb __maybe_unused)
+{
+ if (value)
+ printf("%s=%s\n", key, value);
+ else
+ printf("%s\n", key);
+
+ return 0;
+}
+
+int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ int ret = 0;
+
+ argc = parse_options(argc, argv, config_options, config_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ switch (actions) {
+ case ACTION_LIST:
+ if (argc) {
+ pr_err("Error: takes no arguments\n");
+ parse_options_usage(config_usage, config_options, "l", 1);
+ } else {
+ ret = perf_config(show_config, NULL);
+ if (ret < 0)
+ pr_err("Nothing configured, "
+ "please check your ~/.perfconfig file\n");
+ }
+ break;
+ default:
+ usage_with_options(config_usage, config_options);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index d6525bc54d13..b97bc1518b44 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -2,7 +2,7 @@
#include "builtin.h"
#include "perf.h"
#include "debug.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "data-convert-bt.h"
typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 0b180a885ba3..36ccc2b8827f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -311,11 +311,11 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
}
static int hists__add_entry(struct hists *hists,
- struct addr_location *al, u64 period,
- u64 weight, u64 transaction)
+ struct addr_location *al,
+ struct perf_sample *sample)
{
- if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
- transaction, true) != NULL)
+ if (__hists__add_entry(hists, al, NULL, NULL, NULL,
+ sample, true) != NULL)
return 0;
return -ENOMEM;
}
@@ -336,8 +336,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- if (hists__add_entry(hists, &al, sample->period,
- sample->weight, sample->transaction)) {
+ if (hists__add_entry(hists, &al, sample)) {
pr_warning("problem incrementing symbol period, skipping event\n");
goto out_put;
}
@@ -1208,7 +1207,7 @@ static int ui_init(void)
BUG_ON(1);
}
- list_add(&fmt->sort_list, &perf_hpp__sort_list);
+ perf_hpp__register_sort_field(fmt);
return 0;
}
@@ -1280,7 +1279,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
sort__mode = SORT_MODE__DIFF;
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
usage_with_options(diff_usage, options);
setup_pager();
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index f4d62510acbb..8a31f511e1a0 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -12,7 +12,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/parse-events.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/session.h"
#include "util/data.h"
#include "util/debug.h"
@@ -26,14 +26,22 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
.mode = PERF_DATA_MODE_READ,
.force = details->force,
};
+ bool has_tracepoint = false;
session = perf_session__new(&file, 0, NULL);
if (session == NULL)
return -1;
- evlist__for_each(session->evlist, pos)
+ evlist__for_each(session->evlist, pos) {
perf_evsel__fprintf(pos, details, stdout);
+ if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+ has_tracepoint = true;
+ }
+
+ if (has_tracepoint && !details->trace_fields)
+ printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n");
+
perf_session__delete(session);
return 0;
}
@@ -49,6 +57,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('g', "group", &details.event_group,
"Show event group information"),
OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "trace-fields", &details.trace_fields, "Show tracepoint fields"),
OPT_END()
};
const char * const evlist_usage[] = {
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index a7d588bf3cdd..96c1a4cfbbbf 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -6,11 +6,11 @@
#include "perf.h"
#include "util/cache.h"
#include "builtin.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "common-cmds.h"
-#include "util/parse-options.h"
-#include "util/run-command.h"
-#include "util/help.h"
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+#include <subcmd/help.h>
#include "util/debug.h"
static struct man_viewer_list {
@@ -407,7 +407,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page)
#ifndef open_html
static void open_html(const char *path)
{
- execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL);
+ execl_cmd("web--browse", "-c", "help.browser", path, NULL);
}
#endif
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 99d127fe9c35..0022e02ed31a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -18,7 +18,7 @@
#include "util/data.h"
#include "util/auxtrace.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include <linux/list.h>
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 93ce665f976f..118010553d0c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -12,7 +12,7 @@
#include "util/tool.h"
#include "util/callchain.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/data.h"
#include "util/cpumap.h"
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index dd94b4ca2213..4418d9214872 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -10,7 +10,7 @@
#include "util/header.h"
#include "util/session.h"
#include "util/intlist.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
#include "util/tool.h"
@@ -1351,7 +1351,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
disable_buildid_cache();
use_browser = 0;
- setup_browser(false);
if (argc) {
argc = parse_options(argc, argv, live_options,
@@ -1409,8 +1408,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
err = kvm_events_live_report(kvm);
out:
- exit_browser(0);
-
if (kvm->session)
perf_session__delete(kvm->session);
kvm->session = NULL;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index bf679e2c978b..5e22db4684b8 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -14,7 +14,7 @@
#include "util/parse-events.h"
#include "util/cache.h"
#include "util/pmu.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index de16aaed516e..ce3bfb48b26f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -9,7 +9,7 @@
#include "util/thread.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 80170aace5d4..390170041696 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -1,7 +1,7 @@
#include "builtin.h"
#include "perf.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/tool.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 132afc97676c..9af859b28b15 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -37,7 +37,7 @@
#include "util/strfilter.h"
#include "util/symbol.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/probe-finder.h"
#include "util/probe-event.h"
#include "util/probe-file.h"
@@ -249,6 +249,9 @@ static int opt_show_vars(const struct option *opt,
return ret;
}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
#endif
static int opt_add_probe_event(const struct option *opt,
const char *str, int unset __maybe_unused)
@@ -473,7 +476,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
" with existing name"),
-#ifdef HAVE_DWARF_SUPPORT
OPT_CALLBACK('L', "line", NULL,
"FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
"Show source code lines.", opt_show_lines),
@@ -490,7 +492,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
"directory", "path to kernel source"),
OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
"Don't search inlined functions"),
-#endif
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
"Set how many probe points can be found for a probe."),
@@ -521,6 +522,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
#ifdef HAVE_DWARF_SUPPORT
set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+ set_nobuild('L', "line", false);
+ set_nobuild('V', "vars", false);
+ set_nobuild('\0', "externs", false);
+ set_nobuild('\0', "range", false);
+ set_nobuild('k', "vmlinux", true);
+ set_nobuild('s', "source", true);
+ set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
#endif
set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 199fc31e3919..dc4e0adf5c5b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -11,7 +11,7 @@
#include "util/build-id.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/callchain.h"
@@ -452,6 +452,8 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+
+ perf_header__clear_feat(&session->header, HEADER_STAT);
}
static volatile int workload_exec_errno;
@@ -813,8 +815,12 @@ int record_parse_callchain_opt(const struct option *opt,
}
ret = parse_callchain_record_opt(arg, &callchain_param);
- if (!ret)
+ if (!ret) {
+ /* Enable data address sampling for DWARF unwind. */
+ if (callchain_param.record_mode == CALLCHAIN_DWARF)
+ record->sample_address = true;
callchain_debug();
+ }
return ret;
}
@@ -837,6 +843,19 @@ int record_callchain_opt(const struct option *opt,
static int perf_record_config(const char *var, const char *value, void *cb)
{
+ struct record *rec = cb;
+
+ if (!strcmp(var, "record.build-id")) {
+ if (!strcmp(value, "cache"))
+ rec->no_buildid_cache = false;
+ else if (!strcmp(value, "no-cache"))
+ rec->no_buildid_cache = true;
+ else if (!strcmp(value, "skip"))
+ rec->no_buildid = true;
+ else
+ return -1;
+ return 0;
+ }
if (!strcmp(var, "record.call-graph"))
var = "call-graph.record-mode"; /* fall-through */
@@ -1113,12 +1132,12 @@ struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
"Record context switch events"),
-#ifdef HAVE_LIBBPF_SUPPORT
OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
"clang binary to use for compiling BPF scriptlets"),
OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
"options passed to clang when compiling BPF scriptlets"),
-#endif
+ OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
OPT_END()
};
@@ -1130,6 +1149,27 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
struct record *rec = &record;
char errbuf[BUFSIZ];
+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+ set_nobuild('\0', "clang-path", true);
+ set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
+#ifndef HAVE_BPF_PROLOGUE
+# if !defined (HAVE_DWARF_SUPPORT)
+# define REASON "NO_DWARF=1"
+# elif !defined (HAVE_LIBBPF_SUPPORT)
+# define REASON "NO_LIBBPF=1"
+# else
+# define REASON "this architecture doesn't support BPF prologue"
+# endif
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
+ set_nobuild('\0', "vmlinux", true);
+# undef set_nobuild
+# undef REASON
+#endif
+
rec->evlist = perf_evlist__new();
if (rec->evlist == NULL)
return -ENOMEM;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f256fac1e722..d5a42ee12529 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -27,7 +27,7 @@
#include "util/session.h"
#include "util/tool.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/thread.h"
@@ -45,7 +45,6 @@ struct report {
struct perf_tool tool;
struct perf_session *session;
bool use_tui, use_gtk, use_stdio;
- bool hide_unresolved;
bool dont_use_callchains;
bool show_full_info;
bool show_threads;
@@ -146,7 +145,7 @@ static int process_sample_event(struct perf_tool *tool,
struct hist_entry_iter iter = {
.evsel = evsel,
.sample = sample,
- .hide_unresolved = rep->hide_unresolved,
+ .hide_unresolved = symbol_conf.hide_unresolved,
.add_entry_cb = hist_iter__report_callback,
};
int ret = 0;
@@ -157,7 +156,7 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- if (rep->hide_unresolved && al.sym == NULL)
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
@@ -434,7 +433,7 @@ static int report__browse_hists(struct report *rep)
int ret;
struct perf_session *session = rep->session;
struct perf_evlist *evlist = session->evlist;
- const char *help = "For a higher level overview, try: perf report --sort comm,dso";
+ const char *help = perf_tip(TIPDIR);
switch (use_browser) {
case 1:
@@ -514,20 +513,26 @@ static int __cmd_report(struct report *rep)
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
- if (ret)
+ if (ret) {
+ ui__error("failed to set cpu bitmap\n");
return ret;
+ }
}
if (rep->show_threads)
perf_read_values_init(&rep->show_threads_values);
ret = report__setup_sample_type(rep);
- if (ret)
+ if (ret) {
+ /* report__setup_sample_type() already showed error message */
return ret;
+ }
ret = perf_session__process_events(session);
- if (ret)
+ if (ret) {
+ ui__error("failed to process sample\n");
return ret;
+ }
report__warn_kptr_restrict(rep);
@@ -625,7 +630,7 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
-#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function"
+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
CALLCHAIN_REPORT_HELP
@@ -708,7 +713,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
- "print_type,threshold[,print_limit],order,sort_key[,branch]",
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
report_callchain_help, &report_parse_callchain_opt,
callchain_default_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
@@ -740,7 +745,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
- OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved,
+ OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved,
"Only display entries resolved to a symbol"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
@@ -783,6 +788,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Show callgraph from reference event"),
OPT_INTEGER(0, "socket-filter", &report.socket_filter,
"only show processor socket that match with this filter"),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
OPT_END()
};
struct perf_data_file file = {
@@ -796,6 +803,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
perf_config(report__config, &report);
argc = parse_options(argc, argv, options, report_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(report_usage, options);
+
+ report.symbol_filter_str = argv[0];
+ }
if (symbol_conf.vmlinux_name &&
access(symbol_conf.vmlinux_name, R_OK)) {
@@ -882,7 +899,7 @@ repeat:
symbol_conf.cumulate_callchain = false;
}
- if (setup_sorting() < 0) {
+ if (setup_sorting(session->evlist) < 0) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
if (field_order)
@@ -941,17 +958,6 @@ repeat:
if (symbol__init(&session->header.env) < 0)
goto error;
- if (argc) {
- /*
- * Special case: if there's an argument left then assume that
- * it's a symbol filter:
- */
- if (argc > 1)
- usage_with_options(report_usage, options);
-
- report.symbol_filter_str = argv[0];
- }
-
sort__setup_elide(stdout);
ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e3d3e32c0a93..871b55ae22a4 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -12,7 +12,7 @@
#include "util/tool.h"
#include "util/cloexec.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/trace-event.h"
#include "util/debug.h"
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 72b5deb4bd79..c691214d820f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3,9 +3,9 @@
#include "perf.h"
#include "util/cache.h"
#include "util/debug.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/perf_regs.h"
#include "util/session.h"
#include "util/tool.h"
@@ -18,7 +18,11 @@
#include "util/sort.h"
#include "util/data.h"
#include "util/auxtrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
#include <linux/bitmap.h>
+#include "asm/bug.h"
static char const *script_name;
static char const *generate_script_lang;
@@ -32,6 +36,7 @@ static bool print_flags;
static bool nanosecs;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+static struct perf_stat_config stat_config;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
@@ -130,6 +135,18 @@ static struct {
.invalid_fields = PERF_OUTPUT_TRACE,
},
+
+ [PERF_TYPE_BREAKPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
+ PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE,
+ },
};
static bool output_set_by_user(void)
@@ -204,6 +221,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
struct perf_event_attr *attr = &evsel->attr;
bool allow_user_set;
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
allow_user_set = perf_header__has_feat(&session->header,
HEADER_AUXTRACE);
@@ -588,8 +608,35 @@ static void print_sample_flags(u32 flags)
printf(" %-4s ", str);
}
-static void process_event(union perf_event *event, struct perf_sample *sample,
- struct perf_evsel *evsel, struct addr_location *al)
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ int name_width;
+};
+
+static int perf_evlist__max_name_len(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int max = 0;
+
+ evlist__for_each(evlist, evsel) {
+ int len = strlen(perf_evsel__name(evsel));
+
+ max = MAX(len, max);
+ }
+
+ return max;
+}
+
+static void process_event(struct perf_script *script, union perf_event *event,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct addr_location *al)
{
struct thread *thread = al->thread;
struct perf_event_attr *attr = &evsel->attr;
@@ -604,7 +651,12 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
if (PRINT_FIELD(EVNAME)) {
const char *evname = perf_evsel__name(evsel);
- printf("%s: ", evname ? evname : "[unknown]");
+
+ if (!script->name_width)
+ script->name_width = perf_evlist__max_name_len(script->session->evlist);
+
+ printf("%*s: ", script->name_width,
+ evname ? evname : "[unknown]");
}
if (print_flags)
@@ -643,65 +695,81 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
printf("\n");
}
-static int default_start_script(const char *script __maybe_unused,
- int argc __maybe_unused,
- const char **argv __maybe_unused)
-{
- return 0;
-}
+static struct scripting_ops *scripting_ops;
-static int default_flush_script(void)
+static void __process_stat(struct perf_evsel *counter, u64 tstamp)
{
- return 0;
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+ static int header_printed;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ if (!header_printed) {
+ printf("%3s %8s %15s %15s %15s %15s %s\n",
+ "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT");
+ header_printed = 1;
+ }
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ struct perf_counts_values *counts;
+
+ counts = perf_counts(counter->counts, cpu, thread);
+
+ printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
+ counter->cpus->map[cpu],
+ thread_map__pid(counter->threads, thread),
+ counts->val,
+ counts->ena,
+ counts->run,
+ tstamp,
+ perf_evsel__name(counter));
+ }
+ }
}
-static int default_stop_script(void)
+static void process_stat(struct perf_evsel *counter, u64 tstamp)
{
- return 0;
+ if (scripting_ops && scripting_ops->process_stat)
+ scripting_ops->process_stat(&stat_config, counter, tstamp);
+ else
+ __process_stat(counter, tstamp);
}
-static int default_generate_script(struct pevent *pevent __maybe_unused,
- const char *outfile __maybe_unused)
+static void process_stat_interval(u64 tstamp)
{
- return 0;
+ if (scripting_ops && scripting_ops->process_stat_interval)
+ scripting_ops->process_stat_interval(tstamp);
}
-static struct scripting_ops default_scripting_ops = {
- .start_script = default_start_script,
- .flush_script = default_flush_script,
- .stop_script = default_stop_script,
- .process_event = process_event,
- .generate_script = default_generate_script,
-};
-
-static struct scripting_ops *scripting_ops;
-
static void setup_scripting(void)
{
setup_perl_scripting();
setup_python_scripting();
-
- scripting_ops = &default_scripting_ops;
}
static int flush_scripting(void)
{
- return scripting_ops->flush_script();
+ return scripting_ops ? scripting_ops->flush_script() : 0;
}
static int cleanup_scripting(void)
{
pr_debug("\nperf script stopped\n");
- return scripting_ops->stop_script();
+ return scripting_ops ? scripting_ops->stop_script() : 0;
}
-static int process_sample_event(struct perf_tool *tool __maybe_unused,
+static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
struct addr_location al;
if (debug_mode) {
@@ -727,20 +795,16 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
goto out_put;
- scripting_ops->process_event(event, sample, evsel, &al);
+ if (scripting_ops)
+ scripting_ops->process_event(event, sample, evsel, &al);
+ else
+ process_event(scr, event, sample, evsel, &al);
+
out_put:
addr_location__put(&al);
return 0;
}
-struct perf_script {
- struct perf_tool tool;
- struct perf_session *session;
- bool show_task_events;
- bool show_mmap_events;
- bool show_switch_events;
-};
-
static int process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_evlist **pevlist)
{
@@ -1156,6 +1220,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
type = PERF_TYPE_TRACEPOINT;
else if (!strcmp(str, "raw"))
type = PERF_TYPE_RAW;
+ else if (!strcmp(str, "break"))
+ type = PERF_TYPE_BREAKPOINT;
else {
fprintf(stderr, "Invalid event type in field string.\n");
rc = -EINVAL;
@@ -1421,7 +1487,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
char first_half[BUFSIZ];
char *script_root;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir)
@@ -1542,7 +1608,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
if (!session)
return -1;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir) {
@@ -1600,7 +1666,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
char lang_path[MAXPATHLEN];
char *__script_root;
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
scripts_dir = opendir(scripts_path);
if (!scripts_dir)
@@ -1695,6 +1761,87 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+
+ evlist__for_each(session->evlist, counter) {
+ perf_stat_process_counter(&stat_config, counter);
+ process_stat(counter, round->time);
+ }
+
+ process_stat_interval(round->time);
+ return 0;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+ return 0;
+}
+
+static int set_maps(struct perf_script *script)
+{
+ struct perf_evlist *evlist = script->session->evlist;
+
+ if (!script->cpus || !script->threads)
+ return 0;
+
+ if (WARN_ONCE(script->allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evlist, script->cpus, script->threads);
+
+ if (perf_evlist__alloc_stats(evlist, true))
+ return -ENOMEM;
+
+ script->allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ script->threads = thread_map__new_event(&event->thread_map);
+ if (!script->threads)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ script->cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!script->cpus)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
{
bool show_full_info = false;
@@ -1723,6 +1870,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ .stat_config = process_stat_config_event,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -1836,7 +1988,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
scripting_max_stack = itrace_synth_opts.callchain_sz;
/* make sure PERF_EXEC_PATH is set for scripts */
- perf_set_argv_exec_path(perf_exec_path());
+ set_argv_exec_path(get_argv_exec_path());
if (argc && !script_name && !rec_script_path && !rep_script_path) {
int live_pipe[2];
@@ -2076,6 +2228,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
flush_scripting();
out_delete:
+ perf_evlist__free_stats(session->evlist);
perf_session__delete(session);
if (script_started)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e77880b5094d..7f568244662b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -45,7 +45,7 @@
#include "builtin.h"
#include "util/cgroup.h"
#include "util/util.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/pmu.h"
#include "util/event.h"
@@ -59,6 +59,9 @@
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/counts.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "asm/bug.h"
#include <stdlib.h>
#include <sys/prctl.h>
@@ -126,6 +129,21 @@ static bool append_file;
static const char *output_name;
static int output_fd;
+struct perf_stat {
+ bool record;
+ struct perf_data_file file;
+ struct perf_session *session;
+ u64 bytes_written;
+ struct perf_tool tool;
+ bool maps_allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ enum aggr_mode aggr_mode;
+};
+
+static struct perf_stat perf_stat;
+#define STAT_RECORD perf_stat.record
+
static volatile int done = 0;
static struct perf_stat_config stat_config = {
@@ -161,15 +179,43 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->inherit = !no_inherit;
- if (target__has_cpu(&target))
- return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ /*
+ * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+ * while avoiding that older tools show confusing messages.
+ *
+ * However for pipe sessions we need to keep it zero,
+ * because script's perf_evsel__check_attr is triggered
+ * by attr->sample_type != 0, and we can't run it on
+ * stat sessions.
+ */
+ if (!(STAT_RECORD && perf_stat.file.is_pipe))
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
- if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
- if (!initial_delay)
+
+ /*
+ * In case of initial_delay we enable tracee
+ * events manually.
+ */
+ if (target__none(&target) && !initial_delay)
attr->enable_on_exec = 1;
}
+ if (target__has_cpu(&target))
+ return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
}
@@ -185,6 +231,42 @@ static inline int nsec_counter(struct perf_evsel *evsel)
return 0;
}
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return -1;
+ }
+
+ perf_stat.bytes_written += event->header.size;
+ return 0;
+}
+
+static int write_stat_round_event(u64 tm, u64 type)
+{
+ return perf_event__synthesize_stat_round(NULL, tm, type,
+ process_synthesized_event,
+ NULL);
+}
+
+#define WRITE_STAT_ROUND_EVENT(time, interval) \
+ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static int
+perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+ struct perf_counts_values *count)
+{
+ struct perf_sample_id *sid = SID(counter, cpu, thread);
+
+ return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
+ process_synthesized_event, NULL);
+}
+
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
@@ -208,6 +290,13 @@ static int read_counter(struct perf_evsel *counter)
count = perf_counts(counter->counts, cpu, thread);
if (perf_evsel__read(counter, cpu, thread, count))
return -1;
+
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
+ }
+ }
}
}
@@ -241,21 +330,26 @@ static void process_interval(void)
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
+ if (STAT_RECORD) {
+ if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+ pr_err("failed to write stat round event\n");
+ }
+
print_counters(&rs, 0, NULL);
}
-static void handle_initial_delay(void)
+static void enable_counters(void)
{
- struct perf_evsel *counter;
-
- if (initial_delay) {
- const int ncpus = cpu_map__nr(evsel_list->cpus),
- nthreads = thread_map__nr(evsel_list->threads);
-
+ if (initial_delay)
usleep(initial_delay * 1000);
- evlist__for_each(evsel_list, counter)
- perf_evsel__enable(counter, ncpus, nthreads);
- }
+
+ /*
+ * We need to enable counters only if:
+ * - we don't have tracee (attaching to task or cpu)
+ * - we have initial delay configured
+ */
+ if (!target__none(&target) || initial_delay)
+ perf_evlist__enable(evsel_list);
}
static volatile int workload_exec_errno;
@@ -271,6 +365,135 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
+static bool has_unit(struct perf_evsel *counter)
+{
+ return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+ return counter->scale != 1;
+}
+
+static int perf_stat_synthesize_config(bool is_pipe)
+{
+ struct perf_evsel *counter;
+ int err;
+
+ if (is_pipe) {
+ err = perf_event__synthesize_attrs(NULL, perf_stat.session,
+ process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each(evsel_list, counter) {
+ if (!counter->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(counter)) {
+ err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(counter)) {
+ err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel scale.\n");
+ return err;
+ }
+ }
+
+ if (counter->own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel scale.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+
+ err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_stat_config(NULL, &stat_config,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize config.\n");
+ return err;
+ }
+
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+static int __store_counter_ids(struct perf_evsel *counter,
+ struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ int fd = FD(counter, cpu, thread);
+
+ if (perf_evlist__id_add_fd(evsel_list, counter,
+ cpu, thread, fd) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int store_counter_ids(struct perf_evsel *counter)
+{
+ struct cpu_map *cpus = counter->cpus;
+ struct thread_map *threads = counter->threads;
+
+ if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
+ return -ENOMEM;
+
+ return __store_counter_ids(counter, cpus, threads);
+}
+
static int __run_perf_stat(int argc, const char **argv)
{
int interval = stat_config.interval;
@@ -281,6 +504,7 @@ static int __run_perf_stat(int argc, const char **argv)
size_t l;
int status = 0;
const bool forks = (argc > 0);
+ bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
if (interval) {
ts.tv_sec = interval / 1000;
@@ -291,7 +515,7 @@ static int __run_perf_stat(int argc, const char **argv)
}
if (forks) {
- if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
+ if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
workload_exec_failed_signal) < 0) {
perror("failed to prepare workload");
return -1;
@@ -335,6 +559,9 @@ static int __run_perf_stat(int argc, const char **argv)
l = strlen(counter->unit);
if (l > unit_width)
unit_width = l;
+
+ if (STAT_RECORD && store_counter_ids(counter))
+ return -1;
}
if (perf_evlist__apply_filters(evsel_list, &counter)) {
@@ -344,6 +571,24 @@ static int __run_perf_stat(int argc, const char **argv)
return -1;
}
+ if (STAT_RECORD) {
+ int err, fd = perf_data_file__fd(&perf_stat.file);
+
+ if (is_pipe) {
+ err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file));
+ } else {
+ err = perf_session__write_header(perf_stat.session, evsel_list,
+ fd, false);
+ }
+
+ if (err < 0)
+ return err;
+
+ err = perf_stat_synthesize_config(is_pipe);
+ if (err < 0)
+ return err;
+ }
+
/*
* Enable counters and exec the command:
*/
@@ -352,7 +597,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (forks) {
perf_evlist__start_workload(evsel_list);
- handle_initial_delay();
+ enable_counters();
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -371,7 +616,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
- handle_initial_delay();
+ enable_counters();
while (!done) {
nanosleep(&ts, NULL);
if (interval)
@@ -810,8 +1055,8 @@ static void print_header(int argc, const char **argv)
else if (target.cpu_list)
fprintf(output, "\'CPU(s) %s", target.cpu_list);
else if (!target__has_task(&target)) {
- fprintf(output, "\'%s", argv[0]);
- for (i = 1; i < argc; i++)
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
fprintf(output, " %s", argv[i]);
} else if (target.pid)
fprintf(output, "process id \'%s", target.pid);
@@ -847,6 +1092,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
struct perf_evsel *counter;
char buf[64], *prefix = NULL;
+ /* Do not print anything if we record to the pipe. */
+ if (STAT_RECORD && perf_stat.file.is_pipe)
+ return;
+
if (interval)
print_interval(prefix = buf, ts);
else
@@ -1077,6 +1326,109 @@ static int perf_stat_init_aggr_mode(void)
return cpus_aggr_map ? 0 : -ENOMEM;
}
+static void perf_stat__exit_aggr_mode(void)
+{
+ cpu_map__put(aggr_map);
+ cpu_map__put(cpus_aggr_map);
+ aggr_map = NULL;
+ cpus_aggr_map = NULL;
+}
+
+static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+{
+ int cpu;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ if (cpu >= env->nr_cpus_online)
+ return -1;
+
+ return cpu;
+}
+
+static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int cpu = perf_env__get_cpu(env, map, idx);
+
+ return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+}
+
+static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+ if (cpu != -1) {
+ int socket_id = env->cpu[cpu].socket_id;
+
+ /*
+ * Encode socket in upper 16 bits
+ * core_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket + core id.
+ */
+ core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+ }
+
+ return core;
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **sockp)
+{
+ return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
+}
+
+static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **corep)
+{
+ return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
+}
+
+static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+ struct perf_env *env = &st->session->header.env;
+
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build socket map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_socket_file;
+ break;
+ case AGGR_CORE:
+ if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_core_file;
+ break;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
/*
* Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1236,6 +1588,225 @@ static int add_default_attributes(void)
return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
}
+static const char * const recort_usage[] = {
+ "perf stat record [<options>]",
+ NULL,
+};
+
+static void init_features(struct perf_session *session)
+{
+ int feat;
+
+ for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+ perf_header__set_feat(&session->header, feat);
+
+ perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+ perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+ perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct perf_data_file *file = &perf_stat.file;
+
+ argc = parse_options(argc, argv, stat_options, record_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (output_name)
+ file->path = output_name;
+
+ if (run_count != 1 || forever) {
+ pr_err("Cannot use -r option with perf stat record.\n");
+ return -1;
+ }
+
+ session = perf_session__new(file, false, NULL);
+ if (session == NULL) {
+ pr_err("Perf session creation failed.\n");
+ return -1;
+ }
+
+ init_features(session);
+
+ session->evlist = evsel_list;
+ perf_stat.session = session;
+ perf_stat.record = true;
+ return argc;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+ struct timespec tsh, *ts = NULL;
+ const char **argv = session->header.env.cmdline_argv;
+ int argc = session->header.env.nr_cmdline;
+
+ evlist__for_each(evsel_list, counter)
+ perf_stat_process_counter(&stat_config, counter);
+
+ if (round->type == PERF_STAT_ROUND_TYPE__FINAL)
+ update_stats(&walltime_nsecs_stats, round->time);
+
+ if (stat_config.interval && round->time) {
+ tsh.tv_sec = round->time / NSECS_PER_SEC;
+ tsh.tv_nsec = round->time % NSECS_PER_SEC;
+ ts = &tsh;
+ }
+
+ print_counters(ts, argc, argv);
+ return 0;
+}
+
+static
+int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+ if (cpu_map__empty(st->cpus)) {
+ if (st->aggr_mode != AGGR_UNSET)
+ pr_warning("warning: processing task data, aggregation mode not set\n");
+ return 0;
+ }
+
+ if (st->aggr_mode != AGGR_UNSET)
+ stat_config.aggr_mode = st->aggr_mode;
+
+ if (perf_stat.file.is_pipe)
+ perf_stat_init_aggr_mode();
+ else
+ perf_stat_init_aggr_mode_file(st);
+
+ return 0;
+}
+
+static int set_maps(struct perf_stat *st)
+{
+ if (!st->cpus || !st->threads)
+ return 0;
+
+ if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+
+ if (perf_evlist__alloc_stats(evsel_list, true))
+ return -ENOMEM;
+
+ st->maps_allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ if (st->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ st->threads = thread_map__new_event(&event->thread_map);
+ if (!st->threads)
+ return -ENOMEM;
+
+ return set_maps(st);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+ struct cpu_map *cpus;
+
+ if (st->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!cpus)
+ return -ENOMEM;
+
+ st->cpus = cpus;
+ return set_maps(st);
+}
+
+static const char * const report_usage[] = {
+ "perf stat report [<options>]",
+ NULL,
+};
+
+static struct perf_stat perf_stat = {
+ .tool = {
+ .attr = perf_event__process_attr,
+ .event_update = perf_event__process_event_update,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
+ .stat_config = process_stat_config_event,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ },
+ .aggr_mode = AGGR_UNSET,
+};
+
+static int __cmd_report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ const struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
+ "aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
+ "aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+ "disable CPU count aggregation", AGGR_NONE),
+ OPT_END()
+ };
+ struct stat st;
+ int ret;
+
+ argc = parse_options(argc, argv, options, report_usage, 0);
+
+ if (!input_name || !strlen(input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ input_name = "-";
+ else
+ input_name = "perf.data";
+ }
+
+ perf_stat.file.path = input_name;
+ perf_stat.file.mode = PERF_DATA_MODE_READ;
+
+ session = perf_session__new(&perf_stat.file, false, &perf_stat.tool);
+ if (session == NULL)
+ return -1;
+
+ perf_stat.session = session;
+ stat_config.output = stderr;
+ evsel_list = session->evlist;
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ return ret;
+
+ perf_session__delete(session);
+ return 0;
+}
+
int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
{
const char * const stat_usage[] = {
@@ -1246,6 +1817,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
const char *mode;
FILE *output = stderr;
unsigned int interval;
+ const char * const stat_subcommands[] = { "record", "report" };
setlocale(LC_ALL, "");
@@ -1253,12 +1825,30 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
if (evsel_list == NULL)
return -ENOMEM;
- argc = parse_options(argc, argv, stat_options, stat_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+ (const char **) stat_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (csv_sep) {
+ csv_output = true;
+ if (!strcmp(csv_sep, "\\t"))
+ csv_sep = "\t";
+ } else
+ csv_sep = DEFAULT_SEPARATOR;
+
+ if (argc && !strncmp(argv[0], "rec", 3)) {
+ argc = __cmd_record(argc, argv);
+ if (argc < 0)
+ return -1;
+ } else if (argc && !strncmp(argv[0], "rep", 3))
+ return __cmd_report(argc, argv);
interval = stat_config.interval;
- if (output_name && strcmp(output_name, "-"))
+ /*
+ * For record command the -o is already taken care of.
+ */
+ if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
output = NULL;
if (output_name && output_fd) {
@@ -1296,13 +1886,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
stat_config.output = output;
- if (csv_sep) {
- csv_output = true;
- if (!strcmp(csv_sep, "\\t"))
- csv_sep = "\t";
- } else
- csv_sep = DEFAULT_SEPARATOR;
-
/*
* let the spreadsheet do the pretty-printing
*/
@@ -1425,6 +2008,42 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
if (!forever && status != -1 && !interval)
print_counters(NULL, argc, argv);
+ if (STAT_RECORD) {
+ /*
+ * We synthesize the kernel mmap record just so that older tools
+ * don't emit warnings about not being able to resolve symbols
+ * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+ * a saner message about no samples being in the perf.data file.
+ *
+ * This also serves to suppress a warning about f_header.data.size == 0
+ * in header.c at the moment 'perf stat record' gets introduced, which
+ * is not really needed once we start adding the stat specific PERF_RECORD_
+ * records, but the need to suppress the kptr_restrict messages in older
+ * tools remain -acme
+ */
+ int fd = perf_data_file__fd(&perf_stat.file);
+ int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+ process_synthesized_event,
+ &perf_stat.session->machines.host);
+ if (err) {
+ pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+ "older tools may produce warnings about this file\n.");
+ }
+
+ if (!interval) {
+ if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+ pr_err("failed to write stat round event\n");
+ }
+
+ if (!perf_stat.file.is_pipe) {
+ perf_stat.session->header.data_size += perf_stat.bytes_written;
+ perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+ }
+
+ perf_session__delete(perf_stat.session);
+ }
+
+ perf_stat__exit_aggr_mode();
perf_evlist__free_stats(evsel_list);
out:
perf_evlist__delete(evsel_list);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 30e59620179d..bd7a7757176f 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -30,7 +30,7 @@
#include "perf.h"
#include "util/header.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/event.h"
#include "util/session.h"
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7e2e72e6d9d1..bf01cbb0ef23 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -34,7 +34,7 @@
#include "util/top.h"
#include "util/util.h"
#include <linux/rbtree.h>
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/cpumap.h"
#include "util/xyarray.h"
@@ -175,42 +175,40 @@ static void perf_top__record_precise_ip(struct perf_top *top,
int counter, u64 ip)
{
struct annotation *notes;
- struct symbol *sym;
+ struct symbol *sym = he->ms.sym;
int err = 0;
- if (he == NULL || he->ms.sym == NULL ||
- ((top->sym_filter_entry == NULL ||
- top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
+ if (sym == NULL || (use_browser == 0 &&
+ (top->sym_filter_entry == NULL ||
+ top->sym_filter_entry->ms.sym != sym)))
return;
- sym = he->ms.sym;
notes = symbol__annotation(sym);
if (pthread_mutex_trylock(&notes->lock))
return;
- ip = he->ms.map->map_ip(he->ms.map, ip);
-
- if (ui__has_annotation())
- err = hist_entry__inc_addr_samples(he, counter, ip);
+ err = hist_entry__inc_addr_samples(he, counter, ip);
pthread_mutex_unlock(&notes->lock);
- /*
- * This function is now called with he->hists->lock held.
- * Release it before going to sleep.
- */
- pthread_mutex_unlock(&he->hists->lock);
+ if (unlikely(err)) {
+ /*
+ * This function is now called with he->hists->lock held.
+ * Release it before going to sleep.
+ */
+ pthread_mutex_unlock(&he->hists->lock);
+
+ if (err == -ERANGE && !he->ms.map->erange_warned)
+ ui__warn_map_erange(he->ms.map, sym, ip);
+ else if (err == -ENOMEM) {
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ }
- if (err == -ERANGE && !he->ms.map->erange_warned)
- ui__warn_map_erange(he->ms.map, sym, ip);
- else if (err == -ENOMEM) {
- pr_err("Not enough memory for annotating '%s' symbol!\n",
- sym->name);
- sleep(1);
+ pthread_mutex_lock(&he->hists->lock);
}
-
- pthread_mutex_lock(&he->hists->lock);
}
static void perf_top__show_details(struct perf_top *top)
@@ -687,14 +685,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
- if (sort__has_sym && single) {
- u64 ip = al->addr;
-
- if (al->map)
- ip = al->map->unmap_ip(al->map, ip);
-
- perf_top__record_precise_ip(top, he, evsel->idx, ip);
- }
+ if (sort__has_sym && single)
+ perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
!(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
@@ -964,7 +956,7 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;
- if (perf_session__register_idle_thread(top->session) == NULL)
+ if (perf_session__register_idle_thread(top->session) < 0)
goto out_delete;
machine__synthesize_threads(&top->session->machines.host, &opts->target,
@@ -1218,6 +1210,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
"branch filter mask", "branch stack filter modes",
parse_branch_stack),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
OPT_END()
};
const char * const top_usage[] = {
@@ -1239,11 +1233,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(top_usage, options);
+ if (!top.evlist->nr_entries &&
+ perf_evlist__add_default(top.evlist) < 0) {
+ pr_err("Not enough memory for event selector list\n");
+ goto out_delete_evlist;
+ }
+
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
sort__need_collapse = 1;
- if (setup_sorting() < 0) {
+ if (setup_sorting(top.evlist) < 0) {
if (sort_order)
parse_options_usage(top_usage, options, "s", 1);
if (field_order)
@@ -1279,12 +1279,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (target__none(target))
target->system_wide = true;
- if (perf_evlist__create_maps(top.evlist, target) < 0)
- usage_with_options(top_usage, options);
-
- if (!top.evlist->nr_entries &&
- perf_evlist__add_default(top.evlist) < 0) {
- ui__error("Not enough memory for event selector list\n");
+ if (perf_evlist__create_maps(top.evlist, target) < 0) {
+ ui__error("Couldn't create thread/CPU maps: %s\n",
+ errno == ENOENT ? "No such process" : strerror_r(errno, errbuf, sizeof(errbuf)));
goto out_delete_evlist;
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c783d8fd3a80..20916dd77aac 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -22,11 +22,11 @@
#include "util/color.h"
#include "util/debug.h"
#include "util/evlist.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/machine.h"
#include "util/session.h"
#include "util/thread.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/strlist.h"
#include "util/intlist.h"
#include "util/thread_map.h"
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
new file mode 100644
index 000000000000..9b10cda6b6dc
--- /dev/null
+++ b/tools/perf/builtin-version.c
@@ -0,0 +1,10 @@
+#include "util/util.h"
+#include "builtin.h"
+#include "perf.h"
+
+int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused,
+ const char *prefix __maybe_unused)
+{
+ printf("perf version %s\n", perf_version_string);
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3688ad29085f..3f871b54e261 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -17,6 +17,7 @@ extern int cmd_annotate(int argc, const char **argv, const char *prefix);
extern int cmd_bench(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+extern int cmd_config(int argc, const char **argv, const char *prefix);
extern int cmd_diff(int argc, const char **argv, const char *prefix);
extern int cmd_evlist(int argc, const char **argv, const char *prefix);
extern int cmd_help(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 00fcaf8a5b8d..ab5cbaa170d0 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common
perf-buildid-list mainporcelain common
perf-data mainporcelain common
perf-diff mainporcelain common
+perf-config mainporcelain common
perf-evlist mainporcelain common
perf-inject mainporcelain common
perf-kmem mainporcelain common
@@ -25,4 +26,4 @@ perf-stat mainporcelain common
perf-test mainporcelain common
perf-timechart mainporcelain common
perf-top mainporcelain common
-perf-trace mainporcelain common
+perf-trace mainporcelain audit
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index de89ec574361..254d06e39bea 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -135,8 +135,6 @@ endif
ifeq ($(DEBUG),0)
CFLAGS += -O6
-else
- CFLAGS += $(call cc-option,-Og,-O0)
endif
ifdef PARSER_DEBUG
@@ -318,6 +316,18 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_LIBBPF_SUPPORT
$(call detected,CONFIG_LIBBPF)
endif
+
+ ifndef NO_DWARF
+ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+ CFLAGS += -DHAVE_BPF_PROLOGUE
+ $(call detected,CONFIG_BPF_PROLOGUE)
+ else
+ msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
+ endif
+ else
+ msg := $(warning DWARF support is off, BPF prologue is disabled);
+ endif
+
endif # NO_LIBBPF
endif # NO_LIBELF
@@ -681,6 +691,7 @@ sharedir = $(prefix)/share
template_dir = share/perf-core/templates
STRACE_GROUPS_DIR = share/perf-core/strace/groups
htmldir = share/doc/perf-doc
+tipdir = share/doc/perf-tip
ifeq ($(prefix),/usr)
sysconfdir = /etc
ETC_PERFCONFIG = $(sysconfdir)/perfconfig
@@ -707,6 +718,7 @@ infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
template_dir_SQ = $(subst ','\'',$(template_dir))
htmldir_SQ = $(subst ','\'',$(htmldir))
+tipdir_SQ = $(subst ','\'',$(tipdir))
prefix_SQ = $(subst ','\'',$(prefix))
sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
libdir_SQ = $(subst ','\'',$(libdir))
@@ -714,12 +726,15 @@ libdir_SQ = $(subst ','\'',$(libdir))
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
+tip_instdir = $(tipdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
+tip_instdir = $(prefix)/$(tipdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
+tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
# If we install to $(HOME) we keep the traceevent default:
# $(HOME)/.traceevent/plugins
@@ -741,6 +756,10 @@ ifeq ($(VF),1)
$(call print_var,sysconfdir)
$(call print_var,LIBUNWIND_DIR)
$(call print_var,LIBDW_DIR)
+
+ ifeq ($(dwarf-post-unwind),1)
+ $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+ endif
$(info )
endif
@@ -756,6 +775,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
$(call detected_var,STRACE_GROUPS_DIR_SQ)
$(call detected_var,prefix_SQ)
$(call detected_var,perfexecdir_SQ)
+$(call detected_var,tipdir_SQ)
$(call detected_var,LIBDIR)
$(call detected_var,GTK_CFLAGS)
$(call detected_var,PERL_EMBED_CCOPTS)
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index 0ebef09c0842..c16ce833079c 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -177,22 +177,3 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
endef
_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
-
-# try-run
-# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
-# Exit code chooses option. "$$TMP" is can be used as temporary file and
-# is automatically cleaned up.
-try-run = $(shell set -e; \
- TMP="$(TMPOUT).$$$$.tmp"; \
- TMPO="$(TMPOUT).$$$$.o"; \
- if ($(1)) >/dev/null 2>&1; \
- then echo "$(2)"; \
- else echo "$(3)"; \
- fi; \
- rm -f "$$TMP" "$$TMPO")
-
-# cc-option
-# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
-
-cc-option = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 3d4c7c09adea..a929618b8eb6 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -9,16 +9,18 @@
#include "builtin.h"
#include "util/env.h"
-#include "util/exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/cache.h"
#include "util/quote.h"
-#include "util/run-command.h"
+#include <subcmd/run-command.h>
#include "util/parse-events.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/bpf-loader.h"
#include "util/debug.h"
#include <api/fs/tracing_path.h>
#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
const char perf_usage_string[] =
"perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
@@ -39,6 +41,7 @@ struct cmd_struct {
static struct cmd_struct commands[] = {
{ "buildid-cache", cmd_buildid_cache, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
+ { "config", cmd_config, 0 },
{ "diff", cmd_diff, 0 },
{ "evlist", cmd_evlist, 0 },
{ "help", cmd_help, 0 },
@@ -118,7 +121,7 @@ static void commit_pager_choice(void)
{
switch (use_pager) {
case 0:
- setenv("PERF_PAGER", "cat", 1);
+ setenv(PERF_PAGER_ENVIRONMENT, "cat", 1);
break;
case 1:
/* setup_pager(); */
@@ -182,9 +185,9 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
if (!prefixcmp(cmd, CMD_EXEC_PATH)) {
cmd += strlen(CMD_EXEC_PATH);
if (*cmd == '=')
- perf_set_argv_exec_path(cmd + 1);
+ set_argv_exec_path(cmd + 1);
else {
- puts(perf_exec_path());
+ puts(get_argv_exec_path());
exit(0);
}
} else if (!strcmp(cmd, "--html-path")) {
@@ -383,6 +386,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
use_pager = 1;
commit_pager_choice();
+ perf_env__set_cmdline(&perf_env, argc, argv);
status = p->fn(argc, argv, prefix);
exit_browser(status);
perf_env__exit(&perf_env);
@@ -528,14 +532,20 @@ int main(int argc, const char **argv)
const char *cmd;
char sbuf[STRERR_BUFSIZE];
+ /* libsubcmd init */
+ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
+ pager_init(PERF_PAGER_ENVIRONMENT);
+
/* The page_size is placed in util object. */
page_size = sysconf(_SC_PAGE_SIZE);
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
- cmd = perf_extract_argv0_path(argv[0]);
+ cmd = extract_argv0_path(argv[0]);
if (!cmd)
cmd = "perf-help";
+ srandom(time(NULL));
+
/* get debugfs/tracefs mount point from /proc/mounts */
tracing_path_mount();
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
new file mode 100644
index 000000000000..8b60f343dd07
--- /dev/null
+++ b/tools/perf/scripts/python/stat-cpi.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+data = {}
+times = []
+threads = []
+cpus = []
+
+def get_key(time, event, cpu, thread):
+ return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+ if (time not in times):
+ times.append(time)
+
+ if (cpu not in cpus):
+ cpus.append(cpu)
+
+ if (thread not in threads):
+ threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+ #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+ # (event, cpu, thread, time, val, ena, run)
+
+ store_key(time, cpu, thread)
+ key = get_key(time, event, cpu, thread)
+ data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+ key = get_key(time, event, cpu, thread)
+ return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+ for cpu in cpus:
+ for thread in threads:
+ cyc = get(time, "cycles", cpu, thread)
+ ins = get(time, "instructions", cpu, thread)
+ cpi = 0
+
+ if ins != 0:
+ cpi = cyc/float(ins)
+
+ print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+ pass
+# XXX trace_end callback could be used as an alternative place
+# to compute same values as in the script above:
+#
+# for time in times:
+# for cpu in cpus:
+# for thread in threads:
+# cyc = get(time, "cycles", cpu, thread)
+# ins = get(time, "instructions", cpu, thread)
+#
+# if ins != 0:
+# cpi = cyc/float(ins)
+#
+# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 489fc9ffbcb0..bf016c439fbd 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,2 +1,3 @@
llvm-src-base.c
llvm-src-kbuild.c
+llvm-src-prologue.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index f41ebf8849fe..614899b88b37 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -31,24 +31,34 @@ perf-y += sample-parsing.o
perf-y += parse-no-sample-id-all.o
perf-y += kmod-path.o
perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
perf-y += bpf.o
perf-y += topology.o
+perf-y += cpumap.o
+perf-y += stat.o
+perf-y += event_update.o
-$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c
+$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
$(Q)echo '#include <tests/llvm.h>' > $@
$(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
-$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c
+$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build
$(call rule_mkdir)
$(Q)echo '#include <tests/llvm.h>' > $@
$(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
+$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 638875a0960a..28d1605b0338 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -24,7 +24,7 @@
#include <linux/kernel.h>
#include "../perf.h"
#include "util.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "tests.h"
#define ENV "PERF_TEST_ATTR"
@@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf)
return system(cmd);
}
-int test__attr(void)
+int test__attr(int subtest __maybe_unused)
{
struct stat st;
char path_perf[PATH_MAX];
@@ -164,7 +164,7 @@ int test__attr(void)
return run_dir("./tests", "./perf");
/* Then installed path. */
- snprintf(path_dir, PATH_MAX, "%s/tests", perf_exec_path());
+ snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
if (!lstat(path_dir, &st) &&
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index a02b035fd5aa..fb80c9eb6a95 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -111,7 +111,7 @@ static long long bp_count(int fd)
return count;
}
-int test__bp_signal(void)
+int test__bp_signal(int subtest __maybe_unused)
{
struct sigaction sa;
long long count1, count2;
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
index e76537724491..89f92fa67cc4 100644
--- a/tools/perf/tests/bp_signal_overflow.c
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -58,7 +58,7 @@ static long long bp_count(int fd)
#define EXECUTIONS 10000
#define THRESHOLD 100
-int test__bp_signal_overflow(void)
+int test__bp_signal_overflow(int subtest __maybe_unused)
{
struct perf_event_attr pe;
struct sigaction sa;
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
new file mode 100644
index 000000000000..7230e62c70fc
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -0,0 +1,35 @@
+/*
+ * bpf-script-test-prologue.c
+ * Test BPF prologue
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+#define FMODE_READ 0x1
+#define FMODE_WRITE 0x2
+
+static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) 6;
+
+SEC("func=null_lseek file->f_mode offset orig")
+int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode,
+ unsigned long offset, unsigned long orig)
+{
+ if (err)
+ return 0;
+ if (f_mode & FMODE_WRITE)
+ return 0;
+ if (offset & 1)
+ return 0;
+ if (orig == SEEK_CUR)
+ return 0;
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index ec16f7812c8b..33689a0cf821 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -19,6 +19,29 @@ static int epoll_pwait_loop(void)
return 0;
}
+#ifdef HAVE_BPF_PROLOGUE
+
+static int llseek_loop(void)
+{
+ int fds[2], i;
+
+ fds[0] = open("/dev/null", O_RDONLY);
+ fds[1] = open("/dev/null", O_RDWR);
+
+ if (fds[0] < 0 || fds[1] < 0)
+ return -1;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ }
+ close(fds[0]);
+ close(fds[1]);
+ return 0;
+}
+
+#endif
+
static struct {
enum test_llvm__testcase prog_id;
const char *desc;
@@ -37,6 +60,17 @@ static struct {
&epoll_pwait_loop,
(NR_ITERS + 1) / 2,
},
+#ifdef HAVE_BPF_PROLOGUE
+ {
+ LLVM_TESTCASE_BPF_PROLOGUE,
+ "Test BPF prologue generation",
+ "[bpf_prologue_test]",
+ "fix kbuild first",
+ "check your vmlinux setting?",
+ &llseek_loop,
+ (NR_ITERS + 1) / 4,
+ },
+#endif
};
static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -68,8 +102,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
if (err || list_empty(&parse_evlist.list)) {
pr_debug("Failed to add events selected by BPF\n");
- if (!err)
- return TEST_FAIL;
+ return TEST_FAIL;
}
snprintf(pid, sizeof(pid), "%d", getpid());
@@ -123,8 +156,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
}
}
- if (count != expect)
+ if (count != expect) {
pr_debug("BPF filter result incorrect\n");
+ goto out_delete_evlist;
+ }
ret = TEST_OK;
@@ -146,7 +181,7 @@ prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
return obj;
}
-static int __test__bpf(int index)
+static int __test__bpf(int idx)
{
int ret;
void *obj_buf;
@@ -154,54 +189,72 @@ static int __test__bpf(int index)
struct bpf_object *obj;
ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
- bpf_testcase_table[index].prog_id,
+ bpf_testcase_table[idx].prog_id,
true);
if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
pr_debug("Unable to get BPF object, %s\n",
- bpf_testcase_table[index].msg_compile_fail);
- if (index == 0)
+ bpf_testcase_table[idx].msg_compile_fail);
+ if (idx == 0)
return TEST_SKIP;
else
return TEST_FAIL;
}
obj = prepare_bpf(obj_buf, obj_buf_sz,
- bpf_testcase_table[index].name);
+ bpf_testcase_table[idx].name);
if (!obj) {
ret = TEST_FAIL;
goto out;
}
ret = do_test(obj,
- bpf_testcase_table[index].target_func,
- bpf_testcase_table[index].expect_result);
+ bpf_testcase_table[idx].target_func,
+ bpf_testcase_table[idx].expect_result);
out:
bpf__clear();
return ret;
}
-int test__bpf(void)
+int test__bpf_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(bpf_testcase_table);
+}
+
+const char *test__bpf_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return NULL;
+ return bpf_testcase_table[i].desc;
+}
+
+int test__bpf(int i)
{
- unsigned int i;
int err;
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return TEST_FAIL;
+
if (geteuid() != 0) {
pr_debug("Only root can run BPF test\n");
return TEST_SKIP;
}
- for (i = 0; i < ARRAY_SIZE(bpf_testcase_table); i++) {
- err = __test__bpf(i);
+ err = __test__bpf(i);
+ return err;
+}
- if (err != TEST_OK)
- return err;
- }
+#else
+int test__bpf_subtest_get_nr(void)
+{
+ return 0;
+}
- return TEST_OK;
+const char *test__bpf_subtest_get_desc(int i __maybe_unused)
+{
+ return NULL;
}
-#else
-int test__bpf(void)
+int test__bpf(int i __maybe_unused)
{
pr_debug("Skip BPF test because BPF support is not compiled\n");
return TEST_SKIP;
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 80c442eab767..f2b1dcac45d3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -11,7 +11,7 @@
#include "tests.h"
#include "debug.h"
#include "color.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "symbol.h"
struct test __weak arch_tests[] = {
@@ -160,6 +160,11 @@ static struct test generic_tests[] = {
{
.desc = "Test LLVM searching and compiling",
.func = test__llvm,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__llvm_subtest_get_nr,
+ .get_desc = test__llvm_subtest_get_desc,
+ },
},
{
.desc = "Test topology in session",
@@ -168,6 +173,35 @@ static struct test generic_tests[] = {
{
.desc = "Test BPF filter",
.func = test__bpf,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__bpf_subtest_get_nr,
+ .get_desc = test__bpf_subtest_get_desc,
+ },
+ },
+ {
+ .desc = "Test thread map synthesize",
+ .func = test__thread_map_synthesize,
+ },
+ {
+ .desc = "Test cpu map synthesize",
+ .func = test__cpu_map_synthesize,
+ },
+ {
+ .desc = "Test stat config synthesize",
+ .func = test__synthesize_stat_config,
+ },
+ {
+ .desc = "Test stat synthesize",
+ .func = test__synthesize_stat,
+ },
+ {
+ .desc = "Test stat round synthesize",
+ .func = test__synthesize_stat_round,
+ },
+ {
+ .desc = "Test attr update synthesize",
+ .func = test__event_update,
},
{
.func = NULL,
@@ -203,7 +237,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
return false;
}
-static int run_test(struct test *test)
+static int run_test(struct test *test, int subtest)
{
int status, err = -1, child = fork();
char sbuf[STRERR_BUFSIZE];
@@ -216,7 +250,22 @@ static int run_test(struct test *test)
if (!child) {
pr_debug("test child forked, pid %d\n", getpid());
- err = test->func();
+ if (!verbose) {
+ int nullfd = open("/dev/null", O_WRONLY);
+ if (nullfd >= 0) {
+ close(STDERR_FILENO);
+ close(STDOUT_FILENO);
+
+ dup2(nullfd, STDOUT_FILENO);
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+ close(nullfd);
+ }
+ } else {
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
+ }
+
+ err = test->func(subtest);
exit(err);
}
@@ -237,6 +286,40 @@ static int run_test(struct test *test)
for (j = 0; j < ARRAY_SIZE(tests); j++) \
for (t = &tests[j][0]; t->func; t++)
+static int test_and_print(struct test *t, bool force_skip, int subtest)
+{
+ int err;
+
+ if (!force_skip) {
+ pr_debug("\n--- start ---\n");
+ err = run_test(t, subtest);
+ pr_debug("---- end ----\n");
+ } else {
+ pr_debug("\n--- force skipped ---\n");
+ err = TEST_SKIP;
+ }
+
+ if (!t->subtest.get_nr)
+ pr_debug("%s:", t->desc);
+ else
+ pr_debug("%s subtest %d:", t->desc, subtest);
+
+ switch (err) {
+ case TEST_OK:
+ pr_info(" Ok\n");
+ break;
+ case TEST_SKIP:
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ break;
+ case TEST_FAIL:
+ default:
+ color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+ break;
+ }
+
+ return err;
+}
+
static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
{
struct test *t;
@@ -264,21 +347,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
- pr_debug("\n--- start ---\n");
- err = run_test(t);
- pr_debug("---- end ----\n%s:", t->desc);
-
- switch (err) {
- case TEST_OK:
- pr_info(" Ok\n");
- break;
- case TEST_SKIP:
- color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
- break;
- case TEST_FAIL:
- default:
- color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
- break;
+ if (!t->subtest.get_nr) {
+ test_and_print(t, false, -1);
+ } else {
+ int subn = t->subtest.get_nr();
+ /*
+ * minus 2 to align with normal testcases.
+ * For subtest we print additional '.x' in number.
+ * for example:
+ *
+ * 35: Test LLVM searching and compiling :
+ * 35.1: Basic BPF llvm compiling test : Ok
+ */
+ int subw = width > 2 ? width - 2 : width;
+ bool skip = false;
+ int subi;
+
+ if (subn <= 0) {
+ color_fprintf(stderr, PERF_COLOR_YELLOW,
+ " Skip (not compiled in)\n");
+ continue;
+ }
+ pr_info("\n");
+
+ for (subi = 0; subi < subn; subi++) {
+ int len = strlen(t->subtest.get_desc(subi));
+
+ if (subw < len)
+ subw = len;
+ }
+
+ for (subi = 0; subi < subn; subi++) {
+ pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ t->subtest.get_desc(subi));
+ err = test_and_print(t, skip, subi);
+ if (err != TEST_OK && t->subtest.skip_if_fail)
+ skip = true;
+ }
}
}
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index a767a6400c5c..313a48c6b2bc 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -433,7 +433,6 @@ enum {
static int do_test_code_reading(bool try_kcore)
{
- struct machines machines;
struct machine *machine;
struct thread *thread;
struct record_opts opts = {
@@ -459,8 +458,7 @@ static int do_test_code_reading(bool try_kcore)
pid = getpid();
- machines__init(&machines);
- machine = &machines.host;
+ machine = machine__new_host();
ret = machine__create_kernel_maps(machine);
if (ret < 0) {
@@ -549,6 +547,13 @@ static int do_test_code_reading(bool try_kcore)
if (ret < 0) {
if (!excl_kernel) {
excl_kernel = true;
+ /*
+ * Both cpus and threads are now owned by evlist
+ * and will be freed by following perf_evlist__set_maps
+ * call. Getting refference to keep them alive.
+ */
+ cpu_map__get(cpus);
+ thread_map__get(threads);
perf_evlist__set_maps(evlist, NULL, NULL);
perf_evlist__delete(evlist);
evlist = NULL;
@@ -594,14 +599,13 @@ out_err:
cpu_map__put(cpus);
thread_map__put(threads);
}
- machines__destroy_kernel_maps(&machines);
machine__delete_threads(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
-int test__code_reading(void)
+int test__code_reading(int subtest __maybe_unused)
{
int ret;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
new file mode 100644
index 000000000000..4cb6418a8ffc
--- /dev/null
+++ b/tools/perf/tests/cpumap.c
@@ -0,0 +1,88 @@
+#include "tests.h"
+#include "cpumap.h"
+
+static int process_event_mask(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_mask *mask;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+ int i;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
+
+ mask = (struct cpu_map_mask *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", mask->nr == 1);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
+ }
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 20);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+ }
+
+ cpu_map__put(map);
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
+
+ cpus = (struct cpu_map_entries *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", cpus->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256);
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+ TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1);
+ cpu_map__put(map);
+ return 0;
+}
+
+
+int test__cpu_map_synthesize(int subtest __maybe_unused)
+{
+ struct cpu_map *cpus;
+
+ /* This one is better stores in mask. */
+ cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
+
+ cpu_map__put(cpus);
+
+ /* This one is better stores in cpu values. */
+ cpus = cpu_map__new("1,256");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
+
+ cpu_map__put(cpus);
+ return 0;
+}
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index a218aeaf56a0..dc673ff7c437 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
return fd;
}
-int test__dso_data(void)
+int test__dso_data(int subtest __maybe_unused)
{
struct machine machine;
struct dso *dso;
@@ -245,7 +245,7 @@ static int set_fd_limit(int n)
return setrlimit(RLIMIT_NOFILE, &rlim);
}
-int test__dso_data_cache(void)
+int test__dso_data_cache(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
@@ -302,7 +302,7 @@ int test__dso_data_cache(void)
return 0;
}
-int test__dso_data_reopen(void)
+int test__dso_data_reopen(int subtest __maybe_unused)
{
struct machine machine;
long nr_end, nr = open_files_cnt();
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 07221793a3ac..1c5c0221cea2 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -51,6 +51,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
"krava_1",
"test__dwarf_unwind"
};
+ /*
+ * The funcs[MAX_STACK] array index, based on the
+ * callchain order setup.
+ */
+ int idx = callchain_param.order == ORDER_CALLER ?
+ MAX_STACK - *cnt - 1 : *cnt;
if (*cnt >= MAX_STACK) {
pr_debug("failed: crossed the max stack value %d\n", MAX_STACK);
@@ -63,8 +69,10 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return -1;
}
- pr_debug("got: %s 0x%" PRIx64 "\n", symbol, entry->ip);
- return strcmp((const char *) symbol, funcs[(*cnt)++]);
+ (*cnt)++;
+ pr_debug("got: %s 0x%" PRIx64 ", expecting %s\n",
+ symbol, entry->ip, funcs[idx]);
+ return strcmp((const char *) symbol, funcs[idx]);
}
__attribute__ ((noinline))
@@ -105,8 +113,16 @@ static int compare(void *p1, void *p2)
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
- if (global_unwind_retval == -INT_MAX)
+ if (global_unwind_retval == -INT_MAX) {
+ /* Call unwinder twice for both callchain orders. */
+ callchain_param.order = ORDER_CALLER;
+
global_unwind_retval = unwind_thread(thread);
+ if (!global_unwind_retval) {
+ callchain_param.order = ORDER_CALLEE;
+ global_unwind_retval = unwind_thread(thread);
+ }
+ }
return p1 - p2;
}
@@ -142,21 +158,23 @@ static int krava_1(struct thread *thread)
return krava_2(thread);
}
-int test__dwarf_unwind(void)
+int test__dwarf_unwind(int subtest __maybe_unused)
{
- struct machines machines;
struct machine *machine;
struct thread *thread;
int err = -1;
- machines__init(&machines);
-
- machine = machines__find(&machines, HOST_KERNEL_ID);
+ machine = machine__new_host();
if (!machine) {
pr_err("Could not get machine\n");
return -1;
}
+ if (machine__create_kernel_maps(machine)) {
+ pr_err("Failed to create kernel maps\n");
+ return -1;
+ }
+
callchain_param.record_mode = CALLCHAIN_DWARF;
if (init_live_machine(machine)) {
@@ -178,7 +196,6 @@ int test__dwarf_unwind(void)
out:
machine__delete_threads(machine);
- machine__exit(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
new file mode 100644
index 000000000000..012eab5d1df1
--- /dev/null
+++ b/tools/perf/tests/event_update.c
@@ -0,0 +1,117 @@
+#include <linux/compiler.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "tests.h"
+#include "debug.h"
+
+static int process_event_unit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
+ TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+ return 0;
+}
+
+static int process_event_scale(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+ struct event_update_event_scale *ev_data;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
+ TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+ return 0;
+}
+
+struct event_name {
+ struct perf_tool tool;
+ const char *name;
+};
+
+static int process_event_name(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_name *tmp = container_of(tool, struct event_name, tool);
+ struct event_update_event *ev = (struct event_update_event*) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
+ TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event*) event;
+ struct event_update_event_cpus *ev_data;
+ struct cpu_map *map;
+
+ ev_data = (struct event_update_event_cpus*) ev->data;
+
+ map = cpu_map__new_data(&ev_data->cpus);
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
+ TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
+ TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
+ TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+ cpu_map__put(map);
+ return 0;
+}
+
+int test__event_update(int subtest __maybe_unused)
+{
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct event_name tmp;
+
+ evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("failed to allos ids",
+ !perf_evsel__alloc_id(evsel, 1, 1));
+
+ perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+
+ evsel->unit = strdup("KRAVA");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update unit",
+ !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
+
+ evsel->scale = 0.123;
+
+ TEST_ASSERT_VAL("failed to synthesize attr update scale",
+ !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+
+ tmp.name = perf_evsel__name(evsel);
+
+ TEST_ASSERT_VAL("failed to synthesize attr update name",
+ !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
+
+ evsel->own_cpus = cpu_map__new("1,2,3");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update cpus",
+ !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
+
+ cpu_map__put(evsel->own_cpus);
+ return 0;
+}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 3fa715987a5e..2de4a4f2c3ed 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -95,7 +95,7 @@ out_delete_evlist:
#define perf_evsel__name_array_test(names) \
__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
-int test__perf_evsel__roundtrip_name_test(void)
+int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused)
{
int err = 0, ret = 0;
@@ -103,7 +103,8 @@ int test__perf_evsel__roundtrip_name_test(void)
if (err)
ret = err;
- err = perf_evsel__name_array_test(perf_evsel__sw_names);
+ err = __perf_evsel__name_array_test(perf_evsel__sw_names,
+ PERF_COUNT_SW_DUMMY + 1);
if (err)
ret = err;
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 790e413d9a1f..1984b3bbfe15 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
return ret;
}
-int test__perf_evsel__tp_sched_test(void)
+int test__perf_evsel__tp_sched_test(int subtest __maybe_unused)
{
struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
int ret = 0;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index d24b837951d4..c809463edbe5 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
return printed + fdarray__fprintf(fda, fp);
}
-int test__fdarray__filter(void)
+int test__fdarray__filter(int subtest __maybe_unused)
{
int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
@@ -103,7 +103,7 @@ out:
return err;
}
-int test__fdarray__add(void)
+int test__fdarray__add(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct fdarray *fda = fdarray__new(2, 2);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index ce80b274b097..bcfd081ee1d2 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -87,6 +87,11 @@ struct machine *setup_fake_machine(struct machines *machines)
return NULL;
}
+ if (machine__create_kernel_maps(machine)) {
+ pr_debug("Cannot create kernel maps\n");
+ return NULL;
+ }
+
for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
struct thread *thread;
@@ -150,7 +155,6 @@ struct machine *setup_fake_machine(struct machines *machines)
out:
pr_debug("Not enough memory for machine setup\n");
machine__delete_threads(machine);
- machine__delete(machine);
return NULL;
}
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7ed737019de7..e36089212061 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -281,7 +281,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = false;
perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -428,7 +428,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = false;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -486,7 +486,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = true;
perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -670,7 +670,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
symbol_conf.cumulate_callchain = true;
perf_evsel__set_sample_bit(evsel, CALLCHAIN);
- setup_sorting();
+ setup_sorting(NULL);
callchain_register_param(&callchain_param);
err = add_hist_entries(hists, machine);
@@ -686,7 +686,7 @@ out:
return err;
}
-int test__hists_cumulate(void)
+int test__hists_cumulate(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 818acf875dd0..2a784befd9ce 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -104,7 +104,7 @@ out:
return TEST_FAIL;
}
-int test__hists_filter(void)
+int test__hists_filter(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
@@ -122,7 +122,7 @@ int test__hists_filter(void)
goto out;
/* default sort order (comm,dso,sym) will be used */
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
goto out;
machines__init(&machines);
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 8c102b011424..c764d69ac6ef 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
- struct perf_sample sample = { .period = 1, };
+ struct perf_sample sample = { .period = 1, .weight = 1, };
size_t i = 0, k;
/*
@@ -90,7 +90,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(hists, &al, NULL,
- NULL, NULL, 1, 1, 0, true);
+ NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
goto out;
@@ -116,7 +116,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
goto out;
he = __hists__add_entry(hists, &al, NULL,
- NULL, NULL, 1, 1, 0, true);
+ NULL, NULL, &sample, true);
if (he == NULL) {
addr_location__put(&al);
goto out;
@@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
-int test__hists_link(void)
+int test__hists_link(int subtest __maybe_unused)
{
int err = -1;
struct hists *hists, *first_hists;
@@ -294,7 +294,7 @@ int test__hists_link(void)
goto out;
/* default sort order (comm,dso,sym) will be used */
- if (setup_sorting() < 0)
+ if (setup_sorting(NULL) < 0)
goto out;
machines__init(&machines);
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index adbebc852cc8..ebe6cd485b5d 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -134,7 +134,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
field_order = NULL;
sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -236,7 +236,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
field_order = "overhead,cpu";
sort_order = "pid";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -292,7 +292,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
field_order = "comm,overhead,dso";
sort_order = NULL;
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -366,7 +366,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
field_order = "dso,sym,comm,overhead,dso";
sort_order = "sym";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -468,7 +468,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
field_order = "cpu,pid,comm,dso,sym";
sort_order = "dso,pid";
- setup_sorting();
+ setup_sorting(NULL);
/*
* expected output:
@@ -576,7 +576,7 @@ out:
return err;
}
-int test__hists_output(void)
+int test__hists_output(int subtest __maybe_unused)
{
int err = TEST_FAIL;
struct machines machines;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index a2e2269aa093..ddb78fae064a 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -49,13 +49,12 @@ static int find_comm(struct perf_evlist *evlist, const char *comm)
* when an event is disabled but a dummy software event is not disabled. If the
* test passes %0 is returned, otherwise %-1 is returned.
*/
-int test__keep_tracking(void)
+int test__keep_tracking(int subtest __maybe_unused)
{
struct record_opts opts = {
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
- .freq = 4000,
.target = {
.uses_mmap = true,
},
@@ -124,7 +123,7 @@ int test__keep_tracking(void)
evsel = perf_evlist__last(evlist);
- CHECK__(perf_evlist__disable_event(evlist, evsel));
+ CHECK__(perf_evsel__disable(evsel));
comm = "Test COMM 2";
CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index 08c433b4bf4f..d2af78193153 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
#define M(path, c, e) \
TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
-int test__kmod_path__parse(void)
+int test__kmod_path__parse(int subtest __maybe_unused)
{
/* path alloc_name alloc_ext kmod comp name ext */
T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index bc4cf507cde5..06f45c1d4256 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -44,13 +44,17 @@ static struct {
.source = test_llvm__bpf_test_kbuild_prog,
.desc = "Test kbuild searching",
},
+ [LLVM_TESTCASE_BPF_PROLOGUE] = {
+ .source = test_llvm__bpf_test_prologue_prog,
+ .desc = "Compile source for BPF prologue generation test",
+ },
};
int
test_llvm__fetch_bpf_obj(void **p_obj_buf,
size_t *p_obj_buf_sz,
- enum test_llvm__testcase index,
+ enum test_llvm__testcase idx,
bool force)
{
const char *source;
@@ -59,11 +63,11 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
char *tmpl_new = NULL, *clang_opt_new = NULL;
int err, old_verbose, ret = TEST_FAIL;
- if (index >= __LLVM_TESTCASE_MAX)
+ if (idx >= __LLVM_TESTCASE_MAX)
return TEST_FAIL;
- source = bpf_source_table[index].source;
- desc = bpf_source_table[index].desc;
+ source = bpf_source_table[idx].source;
+ desc = bpf_source_table[idx].desc;
perf_config(perf_config_cb, NULL);
@@ -127,44 +131,39 @@ out:
return ret;
}
-int test__llvm(void)
+int test__llvm(int subtest)
{
- enum test_llvm__testcase i;
+ int ret;
+ void *obj_buf = NULL;
+ size_t obj_buf_sz = 0;
- for (i = 0; i < __LLVM_TESTCASE_MAX; i++) {
- int ret;
- void *obj_buf = NULL;
- size_t obj_buf_sz = 0;
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return TEST_FAIL;
- ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
- i, false);
+ ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+ subtest, false);
- if (ret == TEST_OK) {
- ret = test__bpf_parsing(obj_buf, obj_buf_sz);
- if (ret != TEST_OK)
- pr_debug("Failed to parse test case '%s'\n",
- bpf_source_table[i].desc);
- }
- free(obj_buf);
-
- switch (ret) {
- case TEST_SKIP:
- return TEST_SKIP;
- case TEST_OK:
- break;
- default:
- /*
- * Test 0 is the basic LLVM test. If test 0
- * fail, the basic LLVM support not functional
- * so the whole test should fail. If other test
- * case fail, it can be fixed by adjusting
- * config so don't report error.
- */
- if (i == 0)
- return TEST_FAIL;
- else
- return TEST_SKIP;
+ if (ret == TEST_OK) {
+ ret = test__bpf_parsing(obj_buf, obj_buf_sz);
+ if (ret != TEST_OK) {
+ pr_debug("Failed to parse test case '%s'\n",
+ bpf_source_table[subtest].desc);
}
}
- return TEST_OK;
+ free(obj_buf);
+
+ return ret;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+ return __LLVM_TESTCASE_MAX;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest)
+{
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return NULL;
+
+ return bpf_source_table[subtest].desc;
}
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
index d91d8f44efee..5150b4d6ef50 100644
--- a/tools/perf/tests/llvm.h
+++ b/tools/perf/tests/llvm.h
@@ -6,10 +6,12 @@
extern const char test_llvm__bpf_base_prog[];
extern const char test_llvm__bpf_test_kbuild_prog[];
+extern const char test_llvm__bpf_test_prologue_prog[];
enum test_llvm__testcase {
LLVM_TESTCASE_BASE,
LLVM_TESTCASE_KBUILD,
+ LLVM_TESTCASE_BPF_PROLOGUE,
__LLVM_TESTCASE_MAX,
};
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 8ea3dffc5065..c1fbb8e884c0 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -259,7 +259,8 @@ $(run_O):
tarpkg:
@cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \
echo "- $@: $$cmd" && echo $$cmd > $@ && \
- ( eval $$cmd ) >> $@ 2>&1
+ ( eval $$cmd ) >> $@ 2>&1 && \
+ rm -f $@
make_kernelsrc:
@echo "- make -C <kernelsrc> tools/perf"
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 4495493c9431..359e98fcd94c 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -16,7 +16,7 @@
* Then it checks if the number of syscalls reported as perf events by
* the kernel corresponds to the number of syscalls made.
*/
-int test__basic_mmap(void)
+int test__basic_mmap(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 145050e2e544..0c5ce44f723f 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -149,7 +149,6 @@ static int synth_process(struct machine *machine)
static int mmap_events(synth_cb synth)
{
- struct machines machines;
struct machine *machine;
int err, i;
@@ -162,8 +161,7 @@ static int mmap_events(synth_cb synth)
*/
TEST_ASSERT_VAL("failed to create threads", !threads_create());
- machines__init(&machines);
- machine = &machines.host;
+ machine = machine__new_host();
dump_trace = verbose > 1 ? 1 : 0;
@@ -203,7 +201,7 @@ static int mmap_events(synth_cb synth)
}
machine__delete_threads(machine);
- machines__exit(&machines);
+ machine__delete(machine);
return err;
}
@@ -221,7 +219,7 @@ static int mmap_events(synth_cb synth)
*
* by using all thread objects.
*/
-int test__mmap_thread_lookup(void)
+int test__mmap_thread_lookup(int subtest __maybe_unused)
{
/* perf_event__synthesize_threads synthesize */
TEST_ASSERT_VAL("failed with sythesizing all",
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 2006485a2859..53c2273e8859 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -7,7 +7,7 @@
#include "debug.h"
#include "stat.h"
-int test__openat_syscall_event_on_all_cpus(void)
+int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused)
{
int err = -1, fd, cpu;
struct cpu_map *cpus;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 5e811cd8f1c3..eb99a105f31c 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -6,7 +6,7 @@
#include "tests.h"
#include "debug.h"
-int test__syscall_openat_tp_fields(void)
+int test__syscall_openat_tp_fields(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 033b54797b8a..1184f9ba6499 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -5,7 +5,7 @@
#include "debug.h"
#include "tests.h"
-int test__openat_syscall_event(void)
+int test__openat_syscall_event(int subtest __maybe_unused)
{
int err = -1, fd;
struct perf_evsel *evsel;
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 636d7b42d844..abe8849d1d70 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1765,7 +1765,7 @@ static void debug_warn(const char *warn, va_list params)
fprintf(stderr, " Warning: %s\n", msg);
}
-int test__parse_events(void)
+int test__parse_events(int subtest __maybe_unused)
{
int ret1, ret2 = 0;
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index 2c63ea658541..294c76b01b41 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -67,7 +67,7 @@ struct test_attr_event {
*
* Return: %0 on success, %-1 if the test fails.
*/
-int test__parse_no_sample_id_all(void)
+int test__parse_no_sample_id_all(int subtest __maybe_unused)
{
int err;
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7a228a2a070b..1cc78cefe399 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -32,7 +32,7 @@ realloc:
return cpu;
}
-int test__PERF_RECORD(void)
+int test__PERF_RECORD(int subtest __maybe_unused)
{
struct record_opts opts = {
.target = {
@@ -40,12 +40,11 @@ int test__PERF_RECORD(void)
.uses_mmap = true,
},
.no_buffering = true,
- .freq = 10,
.mmap_pages = 256,
};
cpu_set_t cpu_mask;
size_t cpu_mask_size = sizeof(cpu_mask);
- struct perf_evlist *evlist = perf_evlist__new_default();
+ struct perf_evlist *evlist = perf_evlist__new_dummy();
struct perf_evsel *evsel;
struct perf_sample sample;
const char *cmd = "sleep";
@@ -61,6 +60,9 @@ int test__PERF_RECORD(void)
int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
char sbuf[STRERR_BUFSIZE];
+ if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+ evlist = perf_evlist__new_default();
+
if (evlist == NULL || argv == NULL) {
pr_debug("Not enough memory to create evlist\n");
goto out;
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index faa04e9d5d5f..1e2ba2602930 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void)
return &terms;
}
-int test__pmu(void)
+int test__pmu(int subtest __maybe_unused)
{
char *format = test_format_dir_get();
LIST_HEAD(formats);
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 7760277c6def..7a52834ee0d0 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -4,11 +4,12 @@
#include <stdio.h>
#include <stdlib.h>
+#include <linux/compiler.h>
#include "tests.h"
extern int verbose;
-int test__python_use(void)
+int test__python_use(int subtest __maybe_unused)
{
char *cmd;
int ret;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 30c02181e78b..5f23710b9fee 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -290,7 +290,7 @@ out_free:
* checks sample format bits separately and together. If the test passes %0 is
* returned, otherwise %-1 is returned.
*/
-int test__sample_parsing(void)
+int test__sample_parsing(int subtest __maybe_unused)
{
const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
u64 sample_type;
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
new file mode 100644
index 000000000000..6a20ff2326bb
--- /dev/null
+++ b/tools/perf/tests/stat.c
@@ -0,0 +1,111 @@
+#include <linux/compiler.h>
+#include "event.h"
+#include "tests.h"
+#include "stat.h"
+#include "counts.h"
+#include "debug.h"
+
+static bool has_term(struct stat_config_event *config,
+ u64 tag, u64 val)
+{
+ unsigned i;
+
+ for (i = 0; i < config->nr; i++) {
+ if ((config->data[i].tag == tag) &&
+ (config->data[i].val == val))
+ return true;
+ }
+
+ return false;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_config_event *config = &event->stat_config;
+ struct perf_stat_config stat_config;
+
+#define HAS(term, val) \
+ has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
+
+ TEST_ASSERT_VAL("wrong nr", config->nr == PERF_STAT_CONFIG_TERM__MAX);
+ TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE));
+ TEST_ASSERT_VAL("wrong scale", HAS(SCALE, 1));
+ TEST_ASSERT_VAL("wrong interval", HAS(INTERVAL, 1));
+
+#undef HAS
+
+ perf_event__read_stat_config(&stat_config, config);
+
+ TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
+ TEST_ASSERT_VAL("wrong scale", stat_config.scale == 1);
+ TEST_ASSERT_VAL("wrong interval", stat_config.interval == 1);
+ return 0;
+}
+
+int test__synthesize_stat_config(int subtest __maybe_unused)
+{
+ struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_CORE,
+ .scale = 1,
+ .interval = 1,
+ };
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_event *st = &event->stat;
+
+ TEST_ASSERT_VAL("wrong cpu", st->cpu == 1);
+ TEST_ASSERT_VAL("wrong thread", st->thread == 2);
+ TEST_ASSERT_VAL("wrong id", st->id == 3);
+ TEST_ASSERT_VAL("wrong val", st->val == 100);
+ TEST_ASSERT_VAL("wrong run", st->ena == 200);
+ TEST_ASSERT_VAL("wrong ena", st->run == 300);
+ return 0;
+}
+
+int test__synthesize_stat(int subtest __maybe_unused)
+{
+ struct perf_counts_values count;
+
+ count.val = 100;
+ count.ena = 200;
+ count.run = 300;
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_round_event *stat_round = &event->stat_round;
+
+ TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef);
+ TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL);
+ return 0;
+}
+
+int test__synthesize_stat_round(int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
+ process_stat_round_event, NULL));
+
+ return 0;
+}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 5b83f56a3b6f..36e8ce1550e3 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -122,7 +122,7 @@ out_delete_evlist:
return err;
}
-int test__sw_clock_freq(void)
+int test__sw_clock_freq(int subtest __maybe_unused)
{
int ret;
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index a02af503100c..ebd80168d51e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -305,7 +305,7 @@ out_free_nodes:
* evsel->system_wide and evsel->tracking flags (respectively) with other events
* sometimes enabled or disabled.
*/
-int test__switch_tracking(void)
+int test__switch_tracking(int subtest __maybe_unused)
{
const char *sched_switch = "sched:sched_switch";
struct switch_tracking switch_tracking = { .tids = NULL, };
@@ -455,7 +455,7 @@ int test__switch_tracking(void)
perf_evlist__enable(evlist);
- err = perf_evlist__disable_event(evlist, cpu_clocks_evsel);
+ err = perf_evsel__disable(cpu_clocks_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
@@ -474,7 +474,7 @@ int test__switch_tracking(void)
goto out_err;
}
- err = perf_evlist__disable_event(evlist, cycles_evsel);
+ err = perf_evsel__disable(cycles_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
@@ -500,7 +500,7 @@ int test__switch_tracking(void)
goto out_err;
}
- err = perf_evlist__enable_event(evlist, cycles_evsel);
+ err = perf_evsel__enable(cycles_evsel);
if (err) {
pr_debug("perf_evlist__disable_event failed!\n");
goto out_err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index add16385f13e..2dfff7ac8ef3 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
* if the number of exit event reported by the kernel is 1 or not
* in order to check the kernel returns correct number of event.
*/
-int test__task_exit(void)
+int test__task_exit(int subtest __maybe_unused)
{
int err = -1;
union perf_event *event;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 3c8734a3abbc..82b2b5e6ba7c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,8 @@
#ifndef TESTS_H
#define TESTS_H
+#include <stdbool.h>
+
#define TEST_ASSERT_VAL(text, cond) \
do { \
if (!(cond)) { \
@@ -26,48 +28,63 @@ enum {
struct test {
const char *desc;
- int (*func)(void);
+ int (*func)(int subtest);
+ struct {
+ bool skip_if_fail;
+ int (*get_nr)(void);
+ const char *(*get_desc)(int subtest);
+ } subtest;
};
/* Tests */
-int test__vmlinux_matches_kallsyms(void);
-int test__openat_syscall_event(void);
-int test__openat_syscall_event_on_all_cpus(void);
-int test__basic_mmap(void);
-int test__PERF_RECORD(void);
-int test__perf_evsel__roundtrip_name_test(void);
-int test__perf_evsel__tp_sched_test(void);
-int test__syscall_openat_tp_fields(void);
-int test__pmu(void);
-int test__attr(void);
-int test__dso_data(void);
-int test__dso_data_cache(void);
-int test__dso_data_reopen(void);
-int test__parse_events(void);
-int test__hists_link(void);
-int test__python_use(void);
-int test__bp_signal(void);
-int test__bp_signal_overflow(void);
-int test__task_exit(void);
-int test__sw_clock_freq(void);
-int test__code_reading(void);
-int test__sample_parsing(void);
-int test__keep_tracking(void);
-int test__parse_no_sample_id_all(void);
-int test__dwarf_unwind(void);
-int test__hists_filter(void);
-int test__mmap_thread_lookup(void);
-int test__thread_mg_share(void);
-int test__hists_output(void);
-int test__hists_cumulate(void);
-int test__switch_tracking(void);
-int test__fdarray__filter(void);
-int test__fdarray__add(void);
-int test__kmod_path__parse(void);
-int test__thread_map(void);
-int test__llvm(void);
-int test__bpf(void);
-int test_session_topology(void);
+int test__vmlinux_matches_kallsyms(int subtest);
+int test__openat_syscall_event(int subtest);
+int test__openat_syscall_event_on_all_cpus(int subtest);
+int test__basic_mmap(int subtest);
+int test__PERF_RECORD(int subtest);
+int test__perf_evsel__roundtrip_name_test(int subtest);
+int test__perf_evsel__tp_sched_test(int subtest);
+int test__syscall_openat_tp_fields(int subtest);
+int test__pmu(int subtest);
+int test__attr(int subtest);
+int test__dso_data(int subtest);
+int test__dso_data_cache(int subtest);
+int test__dso_data_reopen(int subtest);
+int test__parse_events(int subtest);
+int test__hists_link(int subtest);
+int test__python_use(int subtest);
+int test__bp_signal(int subtest);
+int test__bp_signal_overflow(int subtest);
+int test__task_exit(int subtest);
+int test__sw_clock_freq(int subtest);
+int test__code_reading(int subtest);
+int test__sample_parsing(int subtest);
+int test__keep_tracking(int subtest);
+int test__parse_no_sample_id_all(int subtest);
+int test__dwarf_unwind(int subtest);
+int test__hists_filter(int subtest);
+int test__mmap_thread_lookup(int subtest);
+int test__thread_mg_share(int subtest);
+int test__hists_output(int subtest);
+int test__hists_cumulate(int subtest);
+int test__switch_tracking(int subtest);
+int test__fdarray__filter(int subtest);
+int test__fdarray__add(int subtest);
+int test__kmod_path__parse(int subtest);
+int test__thread_map(int subtest);
+int test__llvm(int subtest);
+const char *test__llvm_subtest_get_desc(int subtest);
+int test__llvm_subtest_get_nr(void);
+int test__bpf(int subtest);
+const char *test__bpf_subtest_get_desc(int subtest);
+int test__bpf_subtest_get_nr(void);
+int test_session_topology(int subtest);
+int test__thread_map_synthesize(int subtest);
+int test__cpu_map_synthesize(int subtest);
+int test__synthesize_stat_config(int subtest);
+int test__synthesize_stat(int subtest);
+int test__synthesize_stat_round(int subtest);
+int test__event_update(int subtest);
#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 138a0e3431fa..fccde848fe9c 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -4,7 +4,7 @@
#include "thread_map.h"
#include "debug.h"
-int test__thread_map(void)
+int test__thread_map(int subtest __maybe_unused)
{
struct thread_map *map;
@@ -40,3 +40,46 @@ int test__thread_map(void)
thread_map__put(map);
return 0;
}
+
+static int process_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct thread_map_event *map = &event->thread_map;
+ struct thread_map *threads;
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid", map->entries[0].pid == (u64) getpid());
+ TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf"));
+
+ threads = thread_map__new_event(&event->thread_map);
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ TEST_ASSERT_VAL("wrong nr", threads->nr == 1);
+ TEST_ASSERT_VAL("wrong pid",
+ thread_map__pid(threads, 0) == getpid());
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(threads, 0) &&
+ !strcmp(thread_map__comm(threads, 0), "perf"));
+ TEST_ASSERT_VAL("wrong refcnt",
+ atomic_read(&threads->refcnt) == 1);
+ thread_map__put(threads);
+ return 0;
+}
+
+int test__thread_map_synthesize(int subtest __maybe_unused)
+{
+ struct thread_map *threads;
+
+ /* test map on current pid */
+ threads = thread_map__new_by_pid(getpid());
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ thread_map__read_comms(threads);
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+
+ return 0;
+}
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index 01fabb19d746..188b63140fc8 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -4,7 +4,7 @@
#include "map.h"
#include "debug.h"
-int test__thread_mg_share(void)
+int test__thread_mg_share(int subtest __maybe_unused)
{
struct machines machines;
struct machine *machine;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index f5bb096c3bd9..98fe69ac553c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
return 0;
}
-int test_session_topology(void)
+int test_session_topology(int subtest __maybe_unused)
{
char path[PATH_MAX];
struct cpu_map *map;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index d677e018e504..f0bfc9e8fd9f 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
-int test__vmlinux_matches_kallsyms(void)
+int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
{
int err = -1;
struct rb_node *nd;
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index e9703c0829f1..d37202121689 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -528,7 +528,7 @@ static struct ui_browser_colorset {
.colorset = HE_COLORSET_SELECTED,
.name = "selected",
.fg = "black",
- .bg = "lightgray",
+ .bg = "yellow",
},
{
.colorset = HE_COLORSET_CODE,
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fa9eb92c9e24..901d481e6cea 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -178,12 +178,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
return n;
}
+static int callchain_node__count_flat_rows(struct callchain_node *node)
+{
+ struct callchain_list *chain;
+ char folded_sign = 0;
+ int n = 0;
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ if (!folded_sign) {
+ /* only check first chain list entry */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ if (!folded_sign) {
+ /* node->parent_val list might be empty */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ return n;
+}
+
+static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
+{
+ return 1;
+}
+
static int callchain_node__count_rows(struct callchain_node *node)
{
struct callchain_list *chain;
bool unfolded = false;
int n = 0;
+ if (callchain_param.mode == CHAIN_FLAT)
+ return callchain_node__count_flat_rows(node);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ return callchain_node__count_folded_rows(node);
+
list_for_each_entry(chain, &node->val, list) {
++n;
unfolded = chain->unfolded;
@@ -263,7 +302,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling;
- if (!list_empty(&node->val)) {
+ if (node->val.next != node->val.prev) {
chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
@@ -279,6 +318,9 @@ static void callchain__init_have_children(struct rb_root *root)
for (nd = rb_first(root); nd; nd = rb_next(nd)) {
struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
callchain_node__init_have_children(node, has_sibling);
+ if (callchain_param.mode == CHAIN_FLAT ||
+ callchain_param.mode == CHAIN_FOLDED)
+ callchain_node__make_parent_list(node);
}
}
@@ -298,6 +340,9 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
bool has_children;
+ if (!he || !ms)
+ return false;
+
if (ms == &he->ms)
has_children = hist_entry__toggle_fold(he);
else
@@ -574,6 +619,231 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u
#define LEVEL_OFFSET_STEP 3
+static int hist_browser__show_callchain_list(struct hist_browser *browser,
+ struct callchain_node *node,
+ struct callchain_list *chain,
+ unsigned short row, u64 total,
+ bool need_percent, int offset,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg)
+{
+ char bf[1024], *alloc_str;
+ const char *str;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ return 0;
+ }
+
+ alloc_str = NULL;
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+
+ if (need_percent) {
+ char buf[64];
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf),
+ total);
+
+ if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
+ str = "Not enough memory!";
+ else
+ str = alloc_str;
+ }
+
+ print(browser, chain, str, offset, row, arg);
+
+ free(alloc_str);
+ return 1;
+}
+
+static int hist_browser__show_callchain_flat(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = node && rb_next(node);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain;
+ char folded_sign = ' ';
+ int first = true;
+ int extra_offset = 0;
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ goto next;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ break;
+ }
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+out:
+ return row - first_row;
+}
+
+static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
+ struct callchain_list *chain,
+ char *value_str, char *old_str)
+{
+ char bf[1024];
+ const char *str;
+ char *new;
+
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+ if (old_str) {
+ if (asprintf(&new, "%s%s%s", old_str,
+ symbol_conf.field_sep ?: ";", str) < 0)
+ new = NULL;
+ } else {
+ if (value_str) {
+ if (asprintf(&new, "%s %s", value_str, str) < 0)
+ new = NULL;
+ } else {
+ if (asprintf(&new, "%s", str) < 0)
+ new = NULL;
+ }
+ }
+ return new;
+}
+
+static int hist_browser__show_callchain_folded(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = node && rb_next(node);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain, *first_chain = NULL;
+ int first = true;
+ char *value_str = NULL, *value_str_alloc = NULL;
+ char *chain_str = NULL, *chain_str_alloc = NULL;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ goto next;
+ }
+
+ if (need_percent) {
+ char buf[64];
+
+ callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
+ if (asprintf(&value_str, "%s", buf) < 0) {
+ value_str = (char *)"<...>";
+ goto do_print;
+ }
+ value_str_alloc = value_str;
+ }
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+do_print:
+ print(browser, first_chain, chain_str, offset, row++, arg);
+ free(value_str_alloc);
+ free(chain_str_alloc);
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+
+ return row - first_row;
+}
+
static int hist_browser__show_callchain(struct hist_browser *browser,
struct rb_root *root, int level,
unsigned short row, u64 total,
@@ -592,15 +862,12 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
- u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
int extra_offset = 0;
list_for_each_entry(chain, &child->val, list) {
- char bf[1024], *alloc_str;
- const char *str;
bool was_first = first;
if (first)
@@ -609,31 +876,16 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
extra_offset = LEVEL_OFFSET_STEP;
folded_sign = callchain_list__folded(chain);
- if (arg->row_offset != 0) {
- arg->row_offset--;
- goto do_next;
- }
-
- alloc_str = NULL;
- str = callchain_list__sym_name(chain, bf, sizeof(bf),
- browser->show_dso);
-
- if (was_first && need_percent) {
- double percent = cumul * 100.0 / total;
- if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
- str = "Not enough memory!";
- else
- str = alloc_str;
- }
-
- print(browser, chain, str, offset + extra_offset, row, arg);
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
- free(alloc_str);
-
- if (is_output_full(browser, ++row))
+ if (is_output_full(browser, row))
goto out;
-do_next:
+
if (folded_sign == '+')
break;
}
@@ -789,7 +1041,8 @@ static int hist_browser__show_entry(struct hist_browser *browser,
hist_browser__gotorc(browser, row, 0);
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll)
+ if (perf_hpp__should_skip(fmt, entry->hists) ||
+ column++ < browser->b.horiz_scroll)
continue;
if (current_entry && browser->b.navkeypressed) {
@@ -844,10 +1097,22 @@ static int hist_browser__show_entry(struct hist_browser *browser,
total = entry->stat.period;
}
- printed += hist_browser__show_callchain(browser,
+ if (callchain_param.mode == CHAIN_FLAT) {
+ printed += hist_browser__show_callchain_flat(browser,
+ &entry->sorted_chain, row, total,
+ hist_browser__show_callchain_entry, &arg,
+ hist_browser__check_output_full);
+ } else if (callchain_param.mode == CHAIN_FOLDED) {
+ printed += hist_browser__show_callchain_folded(browser,
+ &entry->sorted_chain, row, total,
+ hist_browser__show_callchain_entry, &arg,
+ hist_browser__check_output_full);
+ } else {
+ printed += hist_browser__show_callchain(browser,
&entry->sorted_chain, 1, row, total,
hist_browser__show_callchain_entry, &arg,
hist_browser__check_output_full);
+ }
if (arg.is_current_entry)
browser->he_selection = entry;
@@ -880,7 +1145,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
}
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll)
+ if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll)
continue;
ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
@@ -928,6 +1193,8 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
}
ui_browser__hists_init_top(browser);
+ hb->he_selection = NULL;
+ hb->selection = NULL;
for (nd = browser->top; nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -1033,6 +1300,9 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
* and stop when we printed enough lines to fill the screen.
*/
do_offset:
+ if (!nd)
+ return;
+
if (offset > 0) {
do {
h = rb_entry(nd, struct hist_entry, rb_node);
@@ -1145,7 +1415,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
printed += fprintf(fp, "%c ", folded_sign);
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, he->hists))
continue;
if (!first) {
@@ -1794,10 +2064,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
SLang_reset_tty();
SLang_init_tty(0, 0, 0);
- if (min_pcnt) {
+ if (min_pcnt)
browser->min_pcnt = min_pcnt;
- hist_browser__update_nr_entries(browser);
- }
+ hist_browser__update_nr_entries(browser);
browser->pstack = pstack__new(3);
if (browser->pstack == NULL)
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 4b3585eed1e8..0f8dcfdfb10f 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -89,8 +89,8 @@ void perf_gtk__init_hpp(void)
perf_gtk__hpp_color_overhead_acc;
}
-static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
- GtkTreeIter *parent, int col, u64 total)
+static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
{
struct rb_node *nd;
bool has_single_node = (rb_first(root) == rb_last(root));
@@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
struct callchain_list *chain;
GtkTreeIter iter, new_parent;
bool need_new_parent;
- double percent;
- u64 hits, child_total;
node = rb_entry(nd, struct callchain_node, rb_node);
- hits = callchain_cumul_hits(node);
- percent = 100.0 * hits / total;
+ new_parent = *parent;
+ need_new_parent = !has_single_node;
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+ }
+}
+
+static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter;
+ char buf[64];
+ char *str, *str_alloc = NULL;
+ bool first = true;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ gtk_tree_store_append(store, &iter, parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ gtk_tree_store_set(store, &iter, col, str, -1);
+
+ free(str_alloc);
+ }
+}
+
+static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+ bool has_single_node = (rb_first(root) == rb_last(root));
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter, new_parent;
+ bool need_new_parent;
+ u64 child_total;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
new_parent = *parent;
need_new_parent = !has_single_node && (node->val_nr > 1);
@@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
gtk_tree_store_append(store, &iter, &new_parent);
- scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
gtk_tree_store_set(store, &iter, 0, buf, -1);
callchain_list__sym_name(chain, buf, sizeof(buf), false);
@@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
child_total = total;
/* Now 'iter' contains info of the last callchain_list */
- perf_gtk__add_callchain(&node->rb_root, store, &iter, col,
- child_total);
+ perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
+ child_total);
}
}
+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ if (callchain_param.mode == CHAIN_FLAT)
+ perf_gtk__add_callchain_flat(root, store, parent, col, total);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ perf_gtk__add_callchain_folded(root, store, parent, col, total);
+ else
+ perf_gtk__add_callchain_graph(root, store, parent, col, total);
+}
+
static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
GtkTreeViewColumn *col __maybe_unused,
gpointer user_data __maybe_unused)
@@ -188,7 +318,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
/*
@@ -238,7 +368,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
col_idx = 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, h->hists))
continue;
if (fmt->color)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 5029ba2b55af..bf2a66e254ea 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -443,7 +443,6 @@ LIST_HEAD(perf_hpp__sort_list);
void perf_hpp__init(void)
{
- struct list_head *list;
int i;
for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
@@ -484,17 +483,6 @@ void perf_hpp__init(void)
if (symbol_conf.show_total_period)
hpp_dimension__add_output(PERF_HPP__PERIOD);
-
- /* prepend overhead field for backward compatiblity. */
- list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list;
- if (list_empty(list))
- list_add(list, &perf_hpp__sort_list);
-
- if (symbol_conf.cumulate_callchain) {
- list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list;
- if (list_empty(list))
- list_add(list, &perf_hpp__sort_list);
- }
}
void perf_hpp__column_register(struct perf_hpp_fmt *format)
@@ -619,7 +607,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
struct perf_hpp dummy_hpp;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (first)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index dfcbc90146ef..387110d50b00 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
return ret;
}
-static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
+static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
+ struct callchain_list *chain,
int depth, int depth_mask, int period,
- u64 total_samples, u64 hits,
- int left_margin)
+ u64 total_samples, int left_margin)
{
int i;
size_t ret = 0;
@@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
else
ret += fprintf(fp, " ");
if (!period && i == depth - 1) {
- double percent;
-
- percent = hits * 100.0 / total_samples;
- ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent);
+ ret += fprintf(fp, "--");
+ ret += callchain_node__fprintf_value(node, fp, total_samples);
+ ret += fprintf(fp, "--");
} else
ret += fprintf(fp, "%s", " ");
}
@@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
int depth_mask, int left_margin)
{
struct rb_node *node, *next;
- struct callchain_node *child;
+ struct callchain_node *child = NULL;
struct callchain_list *chain;
int new_depth_mask = depth_mask;
u64 remaining;
size_t ret = 0;
int i;
uint entries_printed = 0;
+ int cumul_count = 0;
remaining = total_samples;
@@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
child = rb_entry(node, struct callchain_node, rb_node);
cumul = callchain_cumul_hits(child);
remaining -= cumul;
+ cumul_count += callchain_cumul_counts(child);
/*
* The depth mask manages the output of pipes that show
@@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
left_margin);
i = 0;
list_for_each_entry(chain, &child->val, list) {
- ret += ipchain__fprintf_graph(fp, chain, depth,
+ ret += ipchain__fprintf_graph(fp, child, chain, depth,
new_depth_mask, i++,
total_samples,
- cumul,
left_margin);
}
@@ -143,14 +143,23 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (callchain_param.mode == CHAIN_GRAPH_REL &&
remaining && remaining != total_samples) {
+ struct callchain_node rem_node = {
+ .hit = remaining,
+ };
if (!rem_sq_bracket)
return ret;
+ if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
+ rem_node.count = child->parent->children_count - cumul_count;
+ if (rem_node.count <= 0)
+ return ret;
+ }
+
new_depth_mask &= ~(1 << (depth - 1));
- ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
+ ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
new_depth_mask, 0, total_samples,
- remaining, left_margin);
+ left_margin);
}
return ret;
@@ -243,12 +252,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
struct rb_node *rb_node = rb_first(tree);
while (rb_node) {
- double percent;
-
chain = rb_entry(rb_node, struct callchain_node, rb_node);
- percent = chain->hit * 100.0 / total_samples;
- ret = percent_color_fprintf(fp, " %6.2f%%\n", percent);
+ ret += fprintf(fp, " ");
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, "\n");
ret += __callchain__fprintf_flat(fp, chain, total_samples);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)
@@ -260,6 +268,57 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
return ret;
}
+static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+ const char *sep = symbol_conf.field_sep ?: ";";
+ struct callchain_list *chain;
+ size_t ret = 0;
+ char bf[1024];
+ bool first;
+
+ if (!node)
+ return 0;
+
+ ret += __callchain__fprintf_folded(fp, node->parent);
+
+ first = (ret == 0);
+ list_for_each_entry(chain, &node->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ ret += fprintf(fp, "%s%s", first ? "" : sep,
+ callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
+ first = false;
+ }
+
+ return ret;
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
+ u64 total_samples)
+{
+ size_t ret = 0;
+ u32 entries_printed = 0;
+ struct callchain_node *chain;
+ struct rb_node *rb_node = rb_first(tree);
+
+ while (rb_node) {
+
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, " ");
+ ret += __callchain__fprintf_folded(fp, chain);
+ ret += fprintf(fp, "\n");
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+
+ rb_node = rb_next(rb_node);
+ }
+
+ return ret;
+}
+
static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
u64 total_samples, int left_margin,
FILE *fp)
@@ -278,6 +337,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
case CHAIN_FLAT:
return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
break;
+ case CHAIN_FOLDED:
+ return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
+ break;
case CHAIN_NONE:
break;
default:
@@ -323,7 +385,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
return 0;
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, he->hists))
continue;
/*
@@ -402,7 +464,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
fprintf(fp, "# ");
perf_hpp__for_each_format(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (!first)
@@ -428,7 +490,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
perf_hpp__for_each_format(fmt) {
unsigned int i;
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, hists))
continue;
if (!first)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 591b3fe3ed49..5eec53a3f4ac 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -6,24 +6,20 @@ libperf-y += config.o
libperf-y += ctype.o
libperf-y += db-export.o
libperf-y += env.o
-libperf-y += environment.o
libperf-y += event.o
libperf-y += evlist.o
libperf-y += evsel.o
-libperf-y += exec_cmd.o
-libperf-y += find_next_bit.o
-libperf-y += help.o
+libperf-y += find_bit.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
libperf-y += llvm-utils.o
-libperf-y += parse-options.o
libperf-y += parse-events.o
libperf-y += perf_regs.o
libperf-y += path.o
libperf-y += rbtree.o
+libperf-y += libstring.o
libperf-y += bitmap.o
libperf-y += hweight.o
-libperf-y += run-command.o
libperf-y += quote.o
libperf-y += strbuf.o
libperf-y += string.o
@@ -32,11 +28,9 @@ libperf-y += strfilter.o
libperf-y += top.o
libperf-y += usage.o
libperf-y += wrapper.o
-libperf-y += sigchain.o
libperf-y += dso.o
libperf-y += symbol.o
libperf-y += color.o
-libperf-y += pager.o
libperf-y += header.o
libperf-y += callchain.o
libperf-y += values.o
@@ -86,8 +80,11 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
libperf-y += parse-branch-options.o
libperf-y += parse-regs-options.o
+libperf-y += term.o
+libperf-y += help-unknown-cmd.o
libperf-$(CONFIG_LIBBPF) += bpf-loader.o
+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
libperf-$(CONFIG_LIBELF) += symbol-elf.o
libperf-$(CONFIG_LIBELF) += probe-file.o
libperf-$(CONFIG_LIBELF) += probe-event.o
@@ -110,7 +107,6 @@ libperf-$(CONFIG_ZLIB) += zlib.o
libperf-$(CONFIG_LZMA) += lzma.o
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))"
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
@@ -136,8 +132,10 @@ CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
@@ -145,7 +143,11 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
-$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
@@ -153,6 +155,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1dd1949b0e79..b795b6994144 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -65,6 +65,11 @@ static int call__parse(struct ins_operands *ops)
name++;
+#ifdef __arm__
+ if (strchr(name, '+'))
+ return -1;
+#endif
+
tok = strchr(name, '>');
if (tok == NULL)
return -1;
@@ -246,7 +251,11 @@ static int mov__parse(struct ins_operands *ops)
return -1;
target = ++s;
+#ifdef __arm__
+ comment = strchr(s, ';');
+#else
comment = strchr(s, '#');
+#endif
if (comment != NULL)
s = comment - 1;
@@ -354,6 +363,20 @@ static struct ins instructions[] = {
{ .name = "addq", .ops = &mov_ops, },
{ .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
+#ifdef __arm__
+ { .name = "b", .ops = &jump_ops, }, // might also be a call
+ { .name = "bcc", .ops = &jump_ops, },
+ { .name = "bcs", .ops = &jump_ops, },
+ { .name = "beq", .ops = &jump_ops, },
+ { .name = "bge", .ops = &jump_ops, },
+ { .name = "bgt", .ops = &jump_ops, },
+ { .name = "bhi", .ops = &jump_ops, },
+ { .name = "bl", .ops = &call_ops, },
+ { .name = "blt", .ops = &jump_ops, },
+ { .name = "bls", .ops = &jump_ops, },
+ { .name = "blx", .ops = &call_ops, },
+ { .name = "bne", .ops = &jump_ops, },
+#endif
{ .name = "bts", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
{ .name = "callq", .ops = &call_ops, },
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 7f10430af39c..360fda01f3b0 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -45,7 +45,7 @@
#include "event.h"
#include "session.h"
#include "debug.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "intel-pt.h"
#include "intel-bts.h"
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 4c50411371db..540a7efa657e 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -5,11 +5,15 @@
* Copyright (C) 2015 Huawei Inc.
*/
+#include <linux/bpf.h>
#include <bpf/libbpf.h>
#include <linux/err.h>
+#include <linux/string.h>
#include "perf.h"
#include "debug.h"
#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "llvm-utils.h"
#include "probe-event.h"
#include "probe-finder.h" // for MAX_PROBES
#include "llvm-utils.h"
@@ -32,6 +36,10 @@ DEFINE_PRINT_FN(debug, 1)
struct bpf_prog_priv {
struct perf_probe_event pev;
+ bool need_prologue;
+ struct bpf_insn *insns_buf;
+ int nr_types;
+ int *type_mapping;
};
static bool libbpf_initialized;
@@ -106,10 +114,178 @@ bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
struct bpf_prog_priv *priv = _priv;
cleanup_perf_probe_events(&priv->pev, 1);
+ zfree(&priv->insns_buf);
+ zfree(&priv->type_mapping);
free(priv);
}
static int
+prog_config__exec(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = true;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__module(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = false;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__bool(const char *value, bool *pbool, bool invert)
+{
+ int err;
+ bool bool_value;
+
+ if (!pbool)
+ return -EINVAL;
+
+ err = strtobool(value, &bool_value);
+ if (err)
+ return err;
+
+ *pbool = invert ? !bool_value : bool_value;
+ return 0;
+}
+
+static int
+prog_config__inlines(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.no_inlines, true);
+}
+
+static int
+prog_config__force(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.force_add, false);
+}
+
+static struct {
+ const char *key;
+ const char *usage;
+ const char *desc;
+ int (*func)(const char *, struct perf_probe_event *);
+} bpf_prog_config_terms[] = {
+ {
+ .key = "exec",
+ .usage = "exec=<full path of file>",
+ .desc = "Set uprobe target",
+ .func = prog_config__exec,
+ },
+ {
+ .key = "module",
+ .usage = "module=<module name> ",
+ .desc = "Set kprobe module",
+ .func = prog_config__module,
+ },
+ {
+ .key = "inlines",
+ .usage = "inlines=[yes|no] ",
+ .desc = "Probe at inline symbol",
+ .func = prog_config__inlines,
+ },
+ {
+ .key = "force",
+ .usage = "force=[yes|no] ",
+ .desc = "Forcibly add events with existing name",
+ .func = prog_config__force,
+ },
+};
+
+static int
+do_prog_config(const char *key, const char *value,
+ struct perf_probe_event *pev)
+{
+ unsigned int i;
+
+ pr_debug("config bpf program: %s=%s\n", key, value);
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
+ return bpf_prog_config_terms[i].func(value, pev);
+
+ pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
+ key, value);
+
+ pr_debug("\nHint: Valid options are:\n");
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
+ bpf_prog_config_terms[i].desc);
+ pr_debug("\n");
+
+ return -BPF_LOADER_ERRNO__PROGCONF_TERM;
+}
+
+static const char *
+parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+ char *text = strdup(config_str);
+ char *sep, *line;
+ const char *main_str = NULL;
+ int err = 0;
+
+ if (!text) {
+ pr_debug("No enough memory: dup config_str failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ line = text;
+ while ((sep = strchr(line, ';'))) {
+ char *equ;
+
+ *sep = '\0';
+ equ = strchr(line, '=');
+ if (!equ) {
+ pr_warning("WARNING: invalid config in BPF object: %s\n",
+ line);
+ pr_warning("\tShould be 'key=value'.\n");
+ goto nextline;
+ }
+ *equ = '\0';
+
+ err = do_prog_config(line, equ + 1, pev);
+ if (err)
+ break;
+nextline:
+ line = sep + 1;
+ }
+
+ if (!err)
+ main_str = config_str + (line - text);
+ free(text);
+
+ return err ? ERR_PTR(err) : main_str;
+}
+
+static int
+parse_prog_config(const char *config_str, struct perf_probe_event *pev)
+{
+ int err;
+ const char *main_str = parse_prog_config_kvpair(config_str, pev);
+
+ if (IS_ERR(main_str))
+ return PTR_ERR(main_str);
+
+ err = parse_perf_probe_command(main_str, pev);
+ if (err < 0) {
+ pr_debug("bpf: '%s' is not a valid config string\n",
+ config_str);
+ /* parse failed, don't need clear pev. */
+ return -BPF_LOADER_ERRNO__CONFIG;
+ }
+ return 0;
+}
+
+static int
config_bpf_program(struct bpf_program *prog)
{
struct perf_probe_event *pev = NULL;
@@ -117,6 +293,10 @@ config_bpf_program(struct bpf_program *prog)
const char *config_str;
int err;
+ /* Initialize per-program probing setting */
+ probe_conf.no_inlines = false;
+ probe_conf.force_add = false;
+
config_str = bpf_program__title(prog, false);
if (IS_ERR(config_str)) {
pr_debug("bpf: unable to get title for program\n");
@@ -131,13 +311,9 @@ config_bpf_program(struct bpf_program *prog)
pev = &priv->pev;
pr_debug("bpf: config program '%s'\n", config_str);
- err = parse_perf_probe_command(config_str, pev);
- if (err < 0) {
- pr_debug("bpf: '%s' is not a valid config string\n",
- config_str);
- err = -BPF_LOADER_ERRNO__CONFIG;
+ err = parse_prog_config(config_str, pev);
+ if (err)
goto errout;
- }
if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
@@ -197,6 +373,220 @@ static int bpf__prepare_probe(void)
return err;
}
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+ struct bpf_insn *orig_insns, int orig_insns_cnt,
+ struct bpf_prog_prep_result *res)
+{
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ struct bpf_prog_priv *priv;
+ struct bpf_insn *buf;
+ size_t prologue_cnt = 0;
+ int i, err;
+
+ err = bpf_program__get_private(prog, (void **)&priv);
+ if (err || !priv)
+ goto errout;
+
+ pev = &priv->pev;
+
+ if (n < 0 || n >= priv->nr_types)
+ goto errout;
+
+ /* Find a tev belongs to that type */
+ for (i = 0; i < pev->ntevs; i++) {
+ if (priv->type_mapping[i] == n)
+ break;
+ }
+
+ if (i >= pev->ntevs) {
+ pr_debug("Internal error: prologue type %d not found\n", n);
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ tev = &pev->tevs[i];
+
+ buf = priv->insns_buf;
+ err = bpf__gen_prologue(tev->args, tev->nargs,
+ buf, &prologue_cnt,
+ BPF_MAXINSNS - orig_insns_cnt);
+ if (err) {
+ const char *title;
+
+ title = bpf_program__title(prog, false);
+ if (!title)
+ title = "[unknown]";
+
+ pr_debug("Failed to generate prologue for program %s\n",
+ title);
+ return err;
+ }
+
+ memcpy(&buf[prologue_cnt], orig_insns,
+ sizeof(struct bpf_insn) * orig_insns_cnt);
+
+ res->new_insn_ptr = buf;
+ res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+ res->pfd = NULL;
+ return 0;
+
+errout:
+ pr_debug("Internal error in preproc_gen_prologue\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+}
+
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can proof it but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+ int i, ret;
+ const struct probe_trace_event *tev1 =
+ *(const struct probe_trace_event **)ptev1;
+ const struct probe_trace_event *tev2 =
+ *(const struct probe_trace_event **)ptev2;
+
+ ret = tev2->nargs - tev1->nargs;
+ if (ret)
+ return ret;
+
+ for (i = 0; i < tev1->nargs; i++) {
+ struct probe_trace_arg *arg1, *arg2;
+ struct probe_trace_arg_ref *ref1, *ref2;
+
+ arg1 = &tev1->args[i];
+ arg2 = &tev2->args[i];
+
+ ret = strcmp(arg1->value, arg2->value);
+ if (ret)
+ return ret;
+
+ ref1 = arg1->ref;
+ ref2 = arg2->ref;
+
+ while (ref1 && ref2) {
+ ret = ref2->offset - ref1->offset;
+ if (ret)
+ return ret;
+
+ ref1 = ref1->next;
+ ref2 = ref2->next;
+ }
+
+ if (ref1 || ref2)
+ return ref2 ? 1 : -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Assign a type number to each tevs in a pev.
+ * mapping is an array with same slots as tevs in that pev.
+ * nr_types will be set to number of types.
+ */
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+ int *nr_types)
+{
+ int i, type = 0;
+ struct probe_trace_event **ptevs;
+
+ size_t array_sz = sizeof(*ptevs) * pev->ntevs;
+
+ ptevs = malloc(array_sz);
+ if (!ptevs) {
+ pr_debug("No ehough memory: alloc ptevs failed\n");
+ return -ENOMEM;
+ }
+
+ pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+ for (i = 0; i < pev->ntevs; i++)
+ ptevs[i] = &pev->tevs[i];
+
+ qsort(ptevs, pev->ntevs, sizeof(*ptevs),
+ compare_tev_args);
+
+ for (i = 0; i < pev->ntevs; i++) {
+ int n;
+
+ n = ptevs[i] - pev->tevs;
+ if (i == 0) {
+ mapping[n] = type;
+ pr_debug("mapping[%d]=%d\n", n, type);
+ continue;
+ }
+
+ if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
+ mapping[n] = type;
+ else
+ mapping[n] = ++type;
+
+ pr_debug("mapping[%d]=%d\n", n, mapping[n]);
+ }
+ free(ptevs);
+ *nr_types = type + 1;
+
+ return 0;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+ struct perf_probe_event *pev;
+ struct bpf_prog_priv *priv;
+ bool need_prologue = false;
+ int err, i;
+
+ err = bpf_program__get_private(prog, (void **)&priv);
+ if (err || !priv) {
+ pr_debug("Internal error when hook preprocessor\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ struct probe_trace_event *tev = &pev->tevs[i];
+
+ if (tev->nargs > 0) {
+ need_prologue = true;
+ break;
+ }
+ }
+
+ /*
+ * Since all tevs don't have argument, we don't need generate
+ * prologue.
+ */
+ if (!need_prologue) {
+ priv->need_prologue = false;
+ return 0;
+ }
+
+ priv->need_prologue = true;
+ priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+ if (!priv->insns_buf) {
+ pr_debug("No enough memory: alloc insns_buf failed\n");
+ return -ENOMEM;
+ }
+
+ priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
+ if (!priv->type_mapping) {
+ pr_debug("No enough memory: alloc type_mapping failed\n");
+ return -ENOMEM;
+ }
+ memset(priv->type_mapping, -1,
+ sizeof(int) * pev->ntevs);
+
+ err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
+ if (err)
+ return err;
+
+ err = bpf_program__set_prep(prog, priv->nr_types,
+ preproc_gen_prologue);
+ return err;
+}
+
int bpf__probe(struct bpf_object *obj)
{
int err = 0;
@@ -231,6 +621,18 @@ int bpf__probe(struct bpf_object *obj)
pr_debug("bpf_probe: failed to apply perf probe events");
goto out;
}
+
+ /*
+ * After probing, let's consider prologue, which
+ * adds program fetcher to BPF programs.
+ *
+ * hook_load_preprocessorr() hooks pre-processor
+ * to bpf_program, let it generate prologue
+ * dynamically during loading.
+ */
+ err = hook_load_preprocessor(prog);
+ if (err)
+ goto out;
}
out:
return err < 0 ? err : 0;
@@ -314,7 +716,14 @@ int bpf__foreach_tev(struct bpf_object *obj,
for (i = 0; i < pev->ntevs; i++) {
tev = &pev->tevs[i];
- fd = bpf_program__fd(prog);
+ if (priv->need_prologue) {
+ int type = priv->type_mapping[i];
+
+ fd = bpf_program__nth_fd(prog, type);
+ } else {
+ fd = bpf_program__fd(prog);
+ }
+
if (fd < 0) {
pr_debug("bpf: failed to get file descriptor\n");
return fd;
@@ -340,6 +749,10 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
[ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
[ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
+ [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string",
+ [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
+ [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
+ [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
};
static int
@@ -420,7 +833,11 @@ int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
int err, char *buf, size_t size)
{
bpf__strerror_head(err, buf, size);
- bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'");
+ case BPF_LOADER_ERRNO__PROGCONF_TERM: {
+ scnprintf(buf, size, "%s (add -v to see detail)", emsg);
+ break;
+ }
+ bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
bpf__strerror_entry(EACCES, "You need to be root");
bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 9caf3ae4acf3..6fdc0457e2b6 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -20,6 +20,10 @@ enum bpf_loader_errno {
BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */
BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */
BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */
+ BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
+ BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
+ BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
+ BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
__BPF_LOADER_ERRNO__END,
};
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000000000000..6cdbee119ceb
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,455 @@
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <bpf/libbpf.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include <dwarf-regs.h>
+#include <linux/filter.h>
+
+#define BPF_REG_SIZE 8
+
+#define JMP_TO_ERROR_CODE -1
+#define JMP_TO_SUCCESS_CODE -2
+#define JMP_TO_USER_CODE -3
+
+struct bpf_insn_pos {
+ struct bpf_insn *begin;
+ struct bpf_insn *end;
+ struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+ return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+ if (!pos->pos)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ if (pos->pos + 1 >= pos->end) {
+ pr_err("bpf prologue: prologue too long\n");
+ pos->pos = NULL;
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ }
+
+ *(pos->pos)++ = new_insn;
+ return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+ if (!pos->pos || pos->pos >= pos->end)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ return 0;
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ * 'ldd target_reg, offset(ctx_reg)', where:
+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
+ */
+static int
+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
+ const char *reg, int target_reg)
+{
+ int offset = regs_query_register_offset(reg);
+
+ if (offset < 0) {
+ pr_err("bpf: prologue: failed to get register %s\n",
+ reg);
+ return offset;
+ }
+ ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Generate a BPF_FUNC_probe_read function call.
+ *
+ * src_base_addr_reg is a register holding base address,
+ * dst_addr_reg is a register holding dest address (on stack),
+ * result is:
+ *
+ * *[dst_addr_reg] = *([src_base_addr_reg] + offset)
+ *
+ * Arguments of BPF_FUNC_probe_read:
+ * ARG1: ptr to stack (dest)
+ * ARG2: size (8)
+ * ARG3: unsafe ptr (src)
+ */
+static int
+gen_read_mem(struct bpf_insn_pos *pos,
+ int src_base_addr_reg,
+ int dst_addr_reg,
+ long offset)
+{
+ /* mov arg3, src_base_addr_reg */
+ if (src_base_addr_reg != BPF_REG_ARG3)
+ ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
+ /* add arg3, #offset */
+ if (offset)
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
+
+ /* mov arg2, #reg_size */
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
+
+ /* mov arg1, dst_addr_reg */
+ if (dst_addr_reg != BPF_REG_ARG1)
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
+
+ /* Call probe_read */
+ ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+ /*
+ * Error processing: if read fail, goto error code,
+ * will be relocated. Target should be the start of
+ * error processing code.
+ */
+ ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
+ pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Each arg should be bare register. Fetch and save them into argument
+ * registers (r3 - r5).
+ *
+ * BPF_REG_1 should have been initialized with pointer to
+ * 'struct pt_regs'.
+ */
+static int
+gen_prologue_fastpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nargs; i++) {
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
+ BPF_PROLOGUE_START_ARG_REG + i);
+ if (err)
+ goto errout;
+ }
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+/*
+ * Slow path:
+ * At least one argument has the form of 'offset($rx)'.
+ *
+ * Following code first stores them into stack, then loads all of then
+ * to r2 - r5.
+ * Before final loading, the final result should be:
+ *
+ * low address
+ * BPF_REG_FP - 24 ARG3
+ * BPF_REG_FP - 16 ARG2
+ * BPF_REG_FP - 8 ARG1
+ * BPF_REG_FP
+ * high address
+ *
+ * For each argument (described as: offn(...off2(off1(reg)))),
+ * generates following code:
+ *
+ * r7 <- fp
+ * r7 <- r7 - stack_offset // Ideal code should initialize r7 using
+ * // fp before generating args. However,
+ * // eBPF won't regard r7 as stack pointer
+ * // if it is generated by minus 8 from
+ * // another stack pointer except fp.
+ * // This is why we have to set r7
+ * // to fp for each variable.
+ * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
+ * (r7) <- r3 // skip following instructions for bare reg
+ * r3 <- r3 + off1 . // skip if off1 == 0
+ * r2 <- 8 \
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * r3 <- (r7)
+ * r3 <- r3 + off2 . // skip if off2 == 0
+ * r2 <- 8 \ // r2 may be broken by probe_read, so set again
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * ...
+ */
+static int
+gen_prologue_slowpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int err, i;
+
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg *arg = &args[i];
+ const char *reg = arg->value;
+ struct probe_trace_arg_ref *ref = NULL;
+ int stack_offset = (i + 1) * -8;
+
+ pr_debug("prologue: fetch arg %d, base reg is %s\n",
+ i, reg);
+
+ /* value of base register is stored into ARG3 */
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
+ BPF_REG_ARG3);
+ if (err) {
+ pr_err("prologue: failed to get offset of register %s\n",
+ reg);
+ goto errout;
+ }
+
+ /* Make r7 the stack pointer. */
+ ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
+ /* r7 += -8 */
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
+ /*
+ * Store r3 (base register) onto stack
+ * Ensure fp[offset] is set.
+ * fp is the only valid base register when storing
+ * into stack. We are not allowed to use r7 as base
+ * register here.
+ */
+ ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
+ stack_offset), pos);
+
+ ref = arg->ref;
+ while (ref) {
+ pr_debug("prologue: arg %d: offset %ld\n",
+ i, ref->offset);
+ err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
+ ref->offset);
+ if (err) {
+ pr_err("prologue: failed to generate probe_read function call\n");
+ goto errout;
+ }
+
+ ref = ref->next;
+ /*
+ * Load previous result into ARG3. Use
+ * BPF_REG_FP instead of r7 because verifier
+ * allows FP based addressing only.
+ */
+ if (ref)
+ ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
+ BPF_REG_FP, stack_offset), pos);
+ }
+ }
+
+ /* Final pass: read to registers */
+ for (i = 0; i < nargs; i++)
+ ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i,
+ BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
+
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+static int
+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
+ struct bpf_insn *success_code, struct bpf_insn *user_code)
+{
+ struct bpf_insn *insn;
+
+ if (check_pos(pos))
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ for (insn = pos->begin; insn < pos->pos; insn++) {
+ struct bpf_insn *target;
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode;
+
+ if (class != BPF_JMP)
+ continue;
+ opcode = BPF_OP(insn->code);
+ if (opcode == BPF_CALL)
+ continue;
+
+ switch (insn->off) {
+ case JMP_TO_ERROR_CODE:
+ target = error_code;
+ break;
+ case JMP_TO_SUCCESS_CODE:
+ target = success_code;
+ break;
+ case JMP_TO_USER_CODE:
+ target = user_code;
+ break;
+ default:
+ pr_err("bpf prologue: internal error: relocation failed\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ insn->off = target - (insn + 1);
+ }
+ return 0;
+}
+
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space)
+{
+ struct bpf_insn *success_code = NULL;
+ struct bpf_insn *error_code = NULL;
+ struct bpf_insn *user_code = NULL;
+ struct bpf_insn_pos pos;
+ bool fastpath = true;
+ int err = 0, i;
+
+ if (!new_prog || !new_cnt)
+ return -EINVAL;
+
+ if (cnt_space > BPF_MAXINSNS)
+ cnt_space = BPF_MAXINSNS;
+
+ pos.begin = new_prog;
+ pos.end = new_prog + cnt_space;
+ pos.pos = new_prog;
+
+ if (!nargs) {
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
+ &pos);
+
+ if (check_pos(&pos))
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+ }
+
+ if (nargs > BPF_PROLOGUE_MAX_ARGS) {
+ pr_warning("bpf: prologue: %d arguments are dropped\n",
+ nargs - BPF_PROLOGUE_MAX_ARGS);
+ nargs = BPF_PROLOGUE_MAX_ARGS;
+ }
+
+ /* First pass: validation */
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg_ref *ref = args[i].ref;
+
+ if (args[i].value[0] == '@') {
+ /* TODO: fetch global variable */
+ pr_err("bpf: prologue: global %s%+ld not support\n",
+ args[i].value, ref ? ref->offset : 0);
+ return -ENOTSUP;
+ }
+
+ while (ref) {
+ /* fastpath is true if all args has ref == NULL */
+ fastpath = false;
+
+ /*
+ * Instruction encodes immediate value using
+ * s32, ref->offset is long. On systems which
+ * can't fill long in s32, refuse to process if
+ * ref->offset too large (or small).
+ */
+#ifdef __LP64__
+#define OFFSET_MAX ((1LL << 31) - 1)
+#define OFFSET_MIN ((1LL << 31) * -1)
+ if (ref->offset > OFFSET_MAX ||
+ ref->offset < OFFSET_MIN) {
+ pr_err("bpf: prologue: offset out of bound: %ld\n",
+ ref->offset);
+ return -BPF_LOADER_ERRNO__PROLOGUEOOB;
+ }
+#endif
+ ref = ref->next;
+ }
+ }
+ pr_debug("prologue: pass validation\n");
+
+ if (fastpath) {
+ /* If all variables are registers... */
+ pr_debug("prologue: fast path\n");
+ err = gen_prologue_fastpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ } else {
+ pr_debug("prologue: slow path\n");
+
+ /* Initialization: move ctx to a callee saved register. */
+ ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
+
+ err = gen_prologue_slowpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ /*
+ * start of ERROR_CODE (only slow pass needs error code)
+ * mov r2 <- 1 // r2 is error number
+ * mov r3 <- 0 // r3, r4... should be touched or
+ * // verifier would complain
+ * mov r4 <- 0
+ * ...
+ * goto usercode
+ */
+ error_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
+ &pos);
+
+ for (i = 0; i < nargs; i++)
+ ins(BPF_ALU64_IMM(BPF_MOV,
+ BPF_PROLOGUE_START_ARG_REG + i,
+ 0),
+ &pos);
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
+ &pos);
+ }
+
+ /*
+ * start of SUCCESS_CODE:
+ * mov r2 <- 0
+ * goto usercode // skip
+ */
+ success_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
+
+ /*
+ * start of USER_CODE:
+ * Restore ctx to r1
+ */
+ user_code = pos.pos;
+ if (!fastpath) {
+ /*
+ * Only slow path needs restoring of ctx. In fast path,
+ * register are loaded directly from r1.
+ */
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
+ err = prologue_relocate(&pos, error_code, success_code,
+ user_code);
+ if (err)
+ goto errout;
+ }
+
+ err = check_pos(&pos);
+ if (err)
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+errout:
+ return err;
+}
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000000..d94cbea12899
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+#include <linux/compiler.h>
+#include <linux/filter.h>
+#include "probe-event.h"
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space);
+#else
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+ int nargs __maybe_unused,
+ struct bpf_insn *new_prog __maybe_unused,
+ size_t *new_cnt,
+ size_t cnt_space __maybe_unused)
+{
+ if (!new_cnt)
+ return -EINVAL;
+ *new_cnt = 0;
+ return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 217b5a60e2ab..6a7e273a514a 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -91,7 +91,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf)
bid += 2;
}
- return raw - build_id;
+ return (bid - bf) + 1;
}
int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index c861373aaed3..07b5d63947b1 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -4,9 +4,12 @@
#include <stdbool.h>
#include "util.h"
#include "strbuf.h"
+#include <subcmd/pager.h>
#include "../perf.h"
#include "../ui/ui.h"
+#include <linux/string.h>
+
#define CMD_EXEC_PATH "--exec-path"
#define CMD_PERF_DIR "--perf-dir="
#define CMD_WORK_TREE "--work-tree="
@@ -18,6 +21,7 @@
#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
typedef int (*config_fn_t)(const char *, const char *, void *);
extern int perf_default_config(const char *, const char *, void *);
@@ -28,11 +32,6 @@ extern int perf_config_bool(const char *, const char *);
extern int config_error_nonbool(const char *);
extern const char *perf_config_dirname(const char *, const char *);
-/* pager.c */
-extern void setup_pager(void);
-extern int pager_in_use(void);
-extern int pager_use_color;
-
char *alias_lookup(const char *alias);
int split_cmdline(char *cmdline, const char ***argv);
@@ -71,9 +70,4 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2
extern char *perf_pathdup(const char *fmt, ...)
__attribute__((format (printf, 1, 2)));
-#ifndef __UCLIBC__
-/* Matches the libc/libbsd function attribute so we declare this unconditionally: */
-extern size_t strlcpy(char *dest, const char *src, size_t size);
-#endif
-
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 735ad48e1858..53c43eb9489e 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value)
callchain_param.mode = CHAIN_GRAPH_REL;
return 0;
}
+ if (!strncmp(value, "folded", strlen(value))) {
+ callchain_param.mode = CHAIN_FOLDED;
+ return 0;
+ }
return -1;
}
@@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value)
return -1;
}
+static int parse_callchain_value(const char *value)
+{
+ if (!strncmp(value, "percent", strlen(value))) {
+ callchain_param.value = CCVAL_PERCENT;
+ return 0;
+ }
+ if (!strncmp(value, "period", strlen(value))) {
+ callchain_param.value = CCVAL_PERIOD;
+ return 0;
+ }
+ if (!strncmp(value, "count", strlen(value))) {
+ callchain_param.value = CCVAL_COUNT;
+ return 0;
+ }
+ return -1;
+}
+
static int
__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
{
@@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
if (!parse_callchain_mode(tok) ||
!parse_callchain_order(tok) ||
- !parse_callchain_sort_key(tok)) {
+ !parse_callchain_sort_key(tok) ||
+ !parse_callchain_value(tok)) {
/* parsing ok - move on to the next */
try_stack_size = false;
goto next;
@@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
switch (mode) {
case CHAIN_FLAT:
+ case CHAIN_FOLDED:
if (rnode->hit < chain->hit)
p = &(*p)->rb_left;
else
@@ -267,6 +290,7 @@ static void
sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
u64 min_hit, struct callchain_param *param __maybe_unused)
{
+ *rb_root = RB_ROOT;
__sort_chain_flat(rb_root, &root->node, min_hit);
}
@@ -338,6 +362,7 @@ int callchain_register_param(struct callchain_param *param)
param->sort = sort_chain_graph_rel;
break;
case CHAIN_FLAT:
+ case CHAIN_FOLDED:
param->sort = sort_chain_flat;
break;
case CHAIN_NONE:
@@ -363,6 +388,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
}
new->parent = parent;
INIT_LIST_HEAD(&new->val);
+ INIT_LIST_HEAD(&new->parent_val);
if (inherit_children) {
struct rb_node *n;
@@ -431,6 +457,8 @@ add_child(struct callchain_node *parent,
new->children_hit = 0;
new->hit = period;
+ new->children_count = 0;
+ new->count = 1;
return new;
}
@@ -478,6 +506,9 @@ split_add_child(struct callchain_node *parent,
parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
+ new->count = parent->count;
+ new->children_count = parent->children_count;
+ parent->children_count = callchain_cumul_counts(new);
/* create a new child for the new branch if any */
if (idx_total < cursor->nr) {
@@ -488,6 +519,8 @@ split_add_child(struct callchain_node *parent,
parent->hit = 0;
parent->children_hit += period;
+ parent->count = 0;
+ parent->children_count += 1;
node = callchain_cursor_current(cursor);
new = add_child(parent, cursor, period);
@@ -510,6 +543,7 @@ split_add_child(struct callchain_node *parent,
rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
} else {
parent->hit = period;
+ parent->count = 1;
}
}
@@ -556,6 +590,7 @@ append_chain_children(struct callchain_node *root,
inc_children_hit:
root->children_hit += period;
+ root->children_count++;
}
static int
@@ -608,6 +643,7 @@ append_chain(struct callchain_node *root,
/* we match 100% of the path, increment the hit */
if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
+ root->count++;
return 0;
}
@@ -799,12 +835,72 @@ char *callchain_list__sym_name(struct callchain_list *cl,
return bf;
}
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ scnprintf(bf, bfsize, "%"PRIu64, period);
+ break;
+ case CCVAL_COUNT:
+ scnprintf(bf, bfsize, "%u", count);
+ break;
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ scnprintf(bf, bfsize, "%.2f%%", percent);
+ break;
+ }
+ return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ return fprintf(fp, "%"PRIu64, period);
+ case CCVAL_COUNT:
+ return fprintf(fp, "%u", count);
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ return percent_color_fprintf(fp, "%.2f%%", percent);
+ }
+ return 0;
+}
+
static void free_callchain_node(struct callchain_node *node)
{
struct callchain_list *list, *tmp;
struct callchain_node *child;
struct rb_node *n;
+ list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+ list_del(&list->list);
+ free(list);
+ }
+
list_for_each_entry_safe(list, tmp, &node->val, list) {
list_del(&list->list);
free(list);
@@ -828,3 +924,69 @@ void free_callchain(struct callchain_root *root)
free_callchain_node(&root->node);
}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+ u64 child_hits = 0;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+
+ child_hits += decay_callchain_node(child);
+ n = rb_next(n);
+ }
+
+ node->hit = (node->hit * 7) / 8;
+ node->children_hit = child_hits;
+
+ return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+ struct callchain_node *parent = node->parent;
+ struct callchain_list *chain, *new;
+ LIST_HEAD(head);
+
+ while (parent) {
+ list_for_each_entry_reverse(chain, &parent->val, list) {
+ new = malloc(sizeof(*new));
+ if (new == NULL)
+ goto out;
+ *new = *chain;
+ new->has_children = false;
+ list_add_tail(&new->list, &head);
+ }
+ parent = parent->parent;
+ }
+
+ list_for_each_entry_safe_reverse(chain, new, &head, list)
+ list_move_tail(&chain->list, &node->parent_val);
+
+ if (!list_empty(&node->parent_val)) {
+ chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+ chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+ chain = list_first_entry(&node->val, struct callchain_list, list);
+ chain->has_children = false;
+ }
+ return 0;
+
+out:
+ list_for_each_entry_safe(chain, new, &head, list) {
+ list_del(&chain->list);
+ free(chain);
+ }
+ return -ENOMEM;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index fce8161e54db..18dd22269764 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -24,12 +24,13 @@
#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
#define CALLCHAIN_REPORT_HELP \
- HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \
+ HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
- HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n"
+ HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+ HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
enum perf_call_graph_mode {
CALLCHAIN_NONE,
@@ -43,7 +44,8 @@ enum chain_mode {
CHAIN_NONE,
CHAIN_FLAT,
CHAIN_GRAPH_ABS,
- CHAIN_GRAPH_REL
+ CHAIN_GRAPH_REL,
+ CHAIN_FOLDED,
};
enum chain_order {
@@ -54,11 +56,14 @@ enum chain_order {
struct callchain_node {
struct callchain_node *parent;
struct list_head val;
+ struct list_head parent_val;
struct rb_node rb_node_in; /* to insert nodes in an rbtree */
struct rb_node rb_node; /* to sort nodes in an output tree */
struct rb_root rb_root_in; /* input tree of children */
struct rb_root rb_root; /* sorted output tree of children */
unsigned int val_nr;
+ unsigned int count;
+ unsigned int children_count;
u64 hit;
u64 children_hit;
};
@@ -78,6 +83,12 @@ enum chain_key {
CCKEY_ADDRESS
};
+enum chain_value {
+ CCVAL_PERCENT,
+ CCVAL_PERIOD,
+ CCVAL_COUNT,
+};
+
struct callchain_param {
bool enabled;
enum perf_call_graph_mode record_mode;
@@ -90,6 +101,7 @@ struct callchain_param {
bool order_set;
enum chain_key key;
bool branch_callstack;
+ enum chain_value value;
};
extern struct callchain_param callchain_param;
@@ -131,6 +143,7 @@ extern __thread struct callchain_cursor callchain_cursor;
static inline void callchain_init(struct callchain_root *root)
{
INIT_LIST_HEAD(&root->node.val);
+ INIT_LIST_HEAD(&root->node.parent_val);
root->node.parent = NULL;
root->node.hit = 0;
@@ -144,6 +157,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node)
return node->hit + node->children_hit;
}
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+ return node->count + node->children_count;
+}
+
int callchain_register_param(struct callchain_param *param);
int callchain_append(struct callchain_root *root,
struct callchain_cursor *cursor,
@@ -229,7 +247,13 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total);
void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 32e12ecfe9c5..90aa1b46b2e5 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -1,6 +1,6 @@
#include "util.h"
#include "../perf.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 9b9565416f90..e5fb88bab9e1 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -24,7 +24,7 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
auto_color:
if (stdout_is_tty < 0)
stdout_is_tty = isatty(1);
- if (stdout_is_tty || (pager_in_use() && pager_use_color)) {
+ if (stdout_is_tty || pager_in_use()) {
char *term = getenv("TERM");
if (term && strcmp(term, "dumb"))
return 1;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 2e452ac1353d..d3e12e30e1d5 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -10,7 +10,7 @@
*/
#include "util.h"
#include "cache.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 10af1e7524fb..fa935093a599 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -5,6 +5,7 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
+#include <linux/bitmap.h>
#include "asm/bug.h"
static struct cpu_map *cpu_map__default_new(void)
@@ -179,6 +180,56 @@ out:
return cpus;
}
+static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+{
+ struct cpu_map *map;
+
+ map = cpu_map__empty_new(cpus->nr);
+ if (map) {
+ unsigned i;
+
+ for (i = 0; i < cpus->nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (cpus->cpu[i] == (u16) -1)
+ map->map[i] = -1;
+ else
+ map->map[i] = (int) cpus->cpu[i];
+ }
+ }
+
+ return map;
+}
+
+static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask)
+{
+ struct cpu_map *map;
+ int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
+
+ nr = bitmap_weight(mask->mask, nbits);
+
+ map = cpu_map__empty_new(nr);
+ if (map) {
+ int cpu, i = 0;
+
+ for_each_set_bit(cpu, mask->mask, nbits)
+ map->map[i++] = cpu;
+ }
+ return map;
+
+}
+
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
+{
+ if (data->type == PERF_CPU_MAP__CPUS)
+ return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+ else
+ return cpu_map__from_mask((struct cpu_map_mask *)data->data);
+}
+
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
{
int i;
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 85f7772457fa..71c41b9efabb 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -17,6 +17,7 @@ struct cpu_map {
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__empty_new(int nr);
struct cpu_map *cpu_map__dummy_new(void);
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
struct cpu_map *cpu_map__read(FILE *file);
size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
int cpu_map__get_socket_id(int cpu);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5bfc1198ab46..34cd1e4039d3 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -63,6 +63,7 @@ struct ctf_writer {
struct bt_ctf_field_type *s32;
struct bt_ctf_field_type *u32;
struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
struct bt_ctf_field_type *u64_hex;
};
struct bt_ctf_field_type *array[6];
@@ -982,6 +983,7 @@ do { \
CREATE_INT_TYPE(cw->data.u64, 64, false, false);
CREATE_INT_TYPE(cw->data.s32, 32, true, false);
CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
cw->data.string = bt_ctf_field_type_string_create();
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 425df5c86c9c..e8e9a9dbf5e3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1243,6 +1243,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
if (dso != NULL) {
__dsos__add(dsos, dso);
dso__set_basename(dso);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
}
return dso;
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6af4f7c36820..7dd5939dea2e 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -25,15 +25,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
{
int i;
- /*
- * If env->cmdline_argv has already been set, do not override it. This allows
- * a command to set the cmdline, parse args and then call another
- * builtin function that implements a command -- e.g, cmd_kvm calling
- * cmd_record.
- */
- if (env->cmdline_argv != NULL)
- return 0;
-
/* do not include NULL termination */
env->cmdline_argv = calloc(argc, sizeof(char *));
if (env->cmdline_argv == NULL)
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
deleted file mode 100644
index 7405123692f1..000000000000
--- a/tools/perf/util/environment.c
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- * We put all the perf config variables in this same object
- * file, so that programs can link against the config parser
- * without having to link against all the rest of perf.
- */
-#include "cache.h"
-
-int pager_use_color = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8b10621b415c..cd61bb1f3917 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -10,6 +10,8 @@
#include "thread.h"
#include "thread_map.h"
#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
static const char *perf_event__names[] = {
[0] = "TOTAL",
@@ -37,6 +39,12 @@ static const char *perf_event__names[] = {
[PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
[PERF_RECORD_AUXTRACE] = "AUXTRACE",
[PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
+ [PERF_RECORD_THREAD_MAP] = "THREAD_MAP",
+ [PERF_RECORD_CPU_MAP] = "CPU_MAP",
+ [PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG",
+ [PERF_RECORD_STAT] = "STAT",
+ [PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
+ [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
};
const char *perf_event__name(unsigned int id)
@@ -699,6 +707,274 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
return err;
}
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event *event;
+ int i, err, size;
+
+ size = sizeof(event->thread_map);
+ size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_THREAD_MAP;
+ event->header.size = size;
+ event->thread_map.nr = threads->nr;
+
+ for (i = 0; i < threads->nr; i++) {
+ struct thread_map_event_entry *entry = &event->thread_map.entries[i];
+ char *comm = thread_map__comm(threads, i);
+
+ if (!comm)
+ comm = (char *) "";
+
+ entry->pid = thread_map__pid(threads, i);
+ strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+ }
+
+ err = process(tool, event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+ struct cpu_map *map)
+{
+ int i;
+
+ cpus->nr = map->nr;
+
+ for (i = 0; i < map->nr; i++)
+ cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct cpu_map_mask *mask,
+ struct cpu_map *map, int max)
+{
+ int i;
+
+ mask->nr = BITS_TO_LONGS(max);
+ mask->long_size = sizeof(long);
+
+ for (i = 0; i < map->nr; i++)
+ set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct cpu_map *map)
+{
+ return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct cpu_map *map, int *max)
+{
+ int i;
+
+ *max = 0;
+
+ for (i = 0; i < map->nr; i++) {
+ /* bit possition of the cpu is + 1 */
+ int bit = map->map[i] + 1;
+
+ if (bit > *max)
+ *max = bit;
+ }
+
+ return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max)
+{
+ size_t size_cpus, size_mask;
+ bool is_dummy = cpu_map__empty(map);
+
+ /*
+ * Both array and mask data have variable size based
+ * on the number of cpus and their actual values.
+ * The size of the 'struct cpu_map_data' is:
+ *
+ * array = size of 'struct cpu_map_entries' +
+ * number of cpus * sizeof(u64)
+ *
+ * mask = size of 'struct cpu_map_mask' +
+ * maximum cpu bit converted to size of longs
+ *
+ * and finaly + the size of 'struct cpu_map_data'.
+ */
+ size_cpus = cpus_size(map);
+ size_mask = mask_size(map, max);
+
+ if (is_dummy || (size_cpus < size_mask)) {
+ *size += size_cpus;
+ *type = PERF_CPU_MAP__CPUS;
+ } else {
+ *size += size_mask;
+ *type = PERF_CPU_MAP__MASK;
+ }
+
+ *size += sizeof(struct cpu_map_data);
+ return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max)
+{
+ data->type = type;
+
+ switch (type) {
+ case PERF_CPU_MAP__CPUS:
+ synthesize_cpus((struct cpu_map_entries *) data->data, map);
+ break;
+ case PERF_CPU_MAP__MASK:
+ synthesize_mask((struct cpu_map_mask *) data->data, map, max);
+ default:
+ break;
+ };
+}
+
+static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map)
+{
+ size_t size = sizeof(struct cpu_map_event);
+ struct cpu_map_event *event;
+ int max;
+ u16 type;
+
+ event = cpu_map_data__alloc(map, &size, &type, &max);
+ if (!event)
+ return NULL;
+
+ event->header.type = PERF_RECORD_CPU_MAP;
+ event->header.size = size;
+ event->data.type = type;
+
+ cpu_map_data__synthesize(&event->data, map, type, max);
+ return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *map,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct cpu_map_event *event;
+ int err;
+
+ event = cpu_map_event__new(map);
+ if (!event)
+ return -ENOMEM;
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_config_event *event;
+ int size, i = 0, err;
+
+ size = sizeof(*event);
+ size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_STAT_CONFIG;
+ event->header.size = size;
+ event->nr = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val) \
+ event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \
+ event->data[i].val = __val; \
+ i++;
+
+ ADD(AGGR_MODE, config->aggr_mode)
+ ADD(INTERVAL, config->interval)
+ ADD(SCALE, config->scale)
+
+ WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+ "stat config terms unbalanced\n");
+#undef ADD
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_event event;
+
+ event.header.type = PERF_RECORD_STAT;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.id = id;
+ event.cpu = cpu;
+ event.thread = thread;
+ event.val = count->val;
+ event.ena = count->ena;
+ event.run = count->run;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 evtime, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_round_event event;
+
+ event.header.type = PERF_RECORD_STAT_ROUND;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.time = evtime;
+ event.type = type;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event)
+{
+ unsigned i;
+
+ for (i = 0; i < event->nr; i++) {
+
+ switch (event->data[i].tag) {
+#define CASE(__term, __val) \
+ case PERF_STAT_CONFIG_TERM__##__term: \
+ config->__val = event->data[i].val; \
+ break;
+
+ CASE(AGGR_MODE, aggr_mode)
+ CASE(SCALE, scale)
+ CASE(INTERVAL, interval)
+#undef CASE
+ default:
+ pr_warning("unknown stat config term %" PRIu64 "\n",
+ event->data[i].tag);
+ }
+ }
+}
+
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
{
const char *s;
@@ -783,6 +1059,38 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
event->mmap2.filename);
}
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+ struct thread_map *threads = thread_map__new_event(&event->thread_map);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (threads)
+ ret += thread_map__fprintf(threads, fp);
+ else
+ ret += fprintf(fp, "failed to get threads from event\n");
+
+ thread_map__put(threads);
+ return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+ struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (cpus)
+ ret += cpu_map__fprintf(cpus, fp);
+ else
+ ret += fprintf(fp, "failed to get cpumap from event\n");
+
+ cpu_map__put(cpus);
+ return ret;
+}
+
int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a0dbcbd4f6d8..b7ffb7ee9971 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -226,6 +226,12 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_AUXTRACE_INFO = 70,
PERF_RECORD_AUXTRACE = 71,
PERF_RECORD_AUXTRACE_ERROR = 72,
+ PERF_RECORD_THREAD_MAP = 73,
+ PERF_RECORD_CPU_MAP = 74,
+ PERF_RECORD_STAT_CONFIG = 75,
+ PERF_RECORD_STAT = 76,
+ PERF_RECORD_STAT_ROUND = 77,
+ PERF_RECORD_EVENT_UPDATE = 78,
PERF_RECORD_HEADER_MAX
};
@@ -270,12 +276,61 @@ struct events_stats {
u32 nr_proc_map_timeout;
};
+enum {
+ PERF_CPU_MAP__CPUS = 0,
+ PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+ u16 nr;
+ u16 cpu[];
+};
+
+struct cpu_map_mask {
+ u16 nr;
+ u16 long_size;
+ unsigned long mask[];
+};
+
+struct cpu_map_data {
+ u16 type;
+ char data[];
+};
+
+struct cpu_map_event {
+ struct perf_event_header header;
+ struct cpu_map_data data;
+};
+
struct attr_event {
struct perf_event_header header;
struct perf_event_attr attr;
u64 id[];
};
+enum {
+ PERF_EVENT_UPDATE__UNIT = 0,
+ PERF_EVENT_UPDATE__SCALE = 1,
+ PERF_EVENT_UPDATE__NAME = 2,
+ PERF_EVENT_UPDATE__CPUS = 3,
+};
+
+struct event_update_event_cpus {
+ struct cpu_map_data cpus;
+};
+
+struct event_update_event_scale {
+ double scale;
+};
+
+struct event_update_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 id;
+
+ char data[];
+};
+
#define MAX_EVENT_NAME 64
struct perf_trace_event_type {
@@ -356,6 +411,63 @@ struct context_switch_event {
u32 next_prev_tid;
};
+struct thread_map_event_entry {
+ u64 pid;
+ char comm[16];
+};
+
+struct thread_map_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct thread_map_event_entry entries[];
+};
+
+enum {
+ PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
+ PERF_STAT_CONFIG_TERM__INTERVAL = 1,
+ PERF_STAT_CONFIG_TERM__SCALE = 2,
+ PERF_STAT_CONFIG_TERM__MAX = 3,
+};
+
+struct stat_config_event_entry {
+ u64 tag;
+ u64 val;
+};
+
+struct stat_config_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct stat_config_event_entry data[];
+};
+
+struct stat_event {
+ struct perf_event_header header;
+
+ u64 id;
+ u32 cpu;
+ u32 thread;
+
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+};
+
+enum {
+ PERF_STAT_ROUND_TYPE__INTERVAL = 0,
+ PERF_STAT_ROUND_TYPE__FINAL = 1,
+};
+
+struct stat_round_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 time;
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -368,6 +480,7 @@ union perf_event {
struct throttle_event throttle;
struct sample_event sample;
struct attr_event attr;
+ struct event_update_event event_update;
struct event_type_event event_type;
struct tracing_data_event tracing_data;
struct build_id_event build_id;
@@ -378,12 +491,20 @@ union perf_event {
struct aux_event aux;
struct itrace_start_event itrace_start;
struct context_switch_event context_switch;
+ struct thread_map_event thread_map;
+ struct cpu_map_event cpu_map;
+ struct stat_config_event stat_config;
+ struct stat_event stat;
+ struct stat_round_event stat_round;
};
void perf_event__print_totals(void);
struct perf_tool;
struct thread_map;
+struct cpu_map;
+struct perf_stat_config;
+struct perf_counts_values;
typedef int (*perf_event__handler_t)(struct perf_tool *tool,
union perf_event *event,
@@ -395,6 +516,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
unsigned int proc_map_timeout);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *cpus,
+ perf_event__handler_t process,
+ struct machine *machine);
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
@@ -402,7 +531,21 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
-
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine);
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event);
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 time, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine);
int perf_event__synthesize_modules(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine);
@@ -499,9 +642,14 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
u64 kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name);
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max);
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1392194a9a9..d81f13de2476 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,7 +18,7 @@
#include <unistd.h>
#include "parse-events.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include <sys/mman.h>
@@ -68,6 +68,18 @@ struct perf_evlist *perf_evlist__new_default(void)
return evlist;
}
+struct perf_evlist *perf_evlist__new_dummy(void)
+{
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist && perf_evlist__add_dummy(evlist)) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
/**
* perf_evlist__set_id_pos - set the positions of event ids.
* @evlist: selected event list
@@ -248,6 +260,22 @@ error:
return -ENOMEM;
}
+int perf_evlist__add_dummy(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ struct perf_evsel *evsel = perf_evsel__new(&attr);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs)
{
@@ -336,20 +364,12 @@ static int perf_evlist__nr_threads(struct perf_evlist *evlist,
void perf_evlist__disable(struct perf_evlist *evlist)
{
- int cpu, thread;
struct perf_evsel *pos;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- evlist__for_each(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
- continue;
- nr_threads = perf_evlist__nr_threads(evlist, pos);
- for (thread = 0; thread < nr_threads; thread++)
- ioctl(FD(pos, cpu, thread),
- PERF_EVENT_IOC_DISABLE, 0);
- }
+ evlist__for_each(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__disable(pos);
}
evlist->enabled = false;
@@ -357,20 +377,12 @@ void perf_evlist__disable(struct perf_evlist *evlist)
void perf_evlist__enable(struct perf_evlist *evlist)
{
- int cpu, thread;
struct perf_evsel *pos;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- evlist__for_each(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
- continue;
- nr_threads = perf_evlist__nr_threads(evlist, pos);
- for (thread = 0; thread < nr_threads; thread++)
- ioctl(FD(pos, cpu, thread),
- PERF_EVENT_IOC_ENABLE, 0);
- }
+ evlist__for_each(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__enable(pos);
}
evlist->enabled = true;
@@ -381,48 +393,6 @@ void perf_evlist__toggle_enable(struct perf_evlist *evlist)
(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
}
-int perf_evlist__disable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- int cpu, thread, err;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads = perf_evlist__nr_threads(evlist, evsel);
-
- if (!evsel->fd)
- return 0;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < nr_threads; thread++) {
- err = ioctl(FD(evsel, cpu, thread),
- PERF_EVENT_IOC_DISABLE, 0);
- if (err)
- return err;
- }
- }
- return 0;
-}
-
-int perf_evlist__enable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel)
-{
- int cpu, thread, err;
- int nr_cpus = cpu_map__nr(evlist->cpus);
- int nr_threads = perf_evlist__nr_threads(evlist, evsel);
-
- if (!evsel->fd)
- return -EINVAL;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < nr_threads; thread++) {
- err = ioctl(FD(evsel, cpu, thread),
- PERF_EVENT_IOC_ENABLE, 0);
- if (err)
- return err;
- }
- }
- return 0;
-}
-
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
struct perf_evsel *evsel, int cpu)
{
@@ -550,9 +520,9 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
evsel->id[evsel->ids++] = id;
}
-static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
- struct perf_evsel *evsel,
- int cpu, int thread, int fd)
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
{
u64 read_data[4] = { 0, };
int id_idx = 1; /* The first entry is the counter value */
@@ -1486,7 +1456,7 @@ int perf_evlist__open(struct perf_evlist *evlist)
perf_evlist__update_id_pos(evlist);
evlist__for_each(evlist, evsel) {
- err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
+ err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
if (err < 0)
goto out_err;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a459fe71b452..7c4d9a206776 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -67,6 +67,7 @@ struct perf_evsel_str_handler {
struct perf_evlist *perf_evlist__new(void);
struct perf_evlist *perf_evlist__new_default(void);
+struct perf_evlist *perf_evlist__new_dummy(void);
void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
struct thread_map *threads);
void perf_evlist__exit(struct perf_evlist *evlist);
@@ -81,6 +82,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
#define perf_evlist__add_default_attrs(evlist, array) \
__perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+int perf_evlist__add_dummy(struct perf_evlist *evlist);
+
int perf_evlist__add_newtp(struct perf_evlist *evlist,
const char *sys, const char *name, void *handler);
@@ -97,6 +100,9 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id);
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
@@ -149,10 +155,6 @@ void perf_evlist__disable(struct perf_evlist *evlist);
void perf_evlist__enable(struct perf_evlist *evlist);
void perf_evlist__toggle_enable(struct perf_evlist *evlist);
-int perf_evlist__disable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
-int perf_evlist__enable_event(struct perf_evlist *evlist,
- struct perf_evsel *evsel);
int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
struct perf_evsel *evsel, int idx);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 397fb4ed3c97..cdbaf9b51e42 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -36,6 +36,7 @@ static struct {
bool cloexec;
bool clockid;
bool clockid_wrong;
+ bool lbr_flags;
} perf_missing_features;
static clockid_t clockid;
@@ -574,7 +575,9 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel,
} else {
perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK;
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -981,13 +984,26 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
return -1;
}
-int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
+int perf_evsel__enable(struct perf_evsel *evsel)
{
+ int nthreads = thread_map__nr(evsel->threads);
+ int ncpus = cpu_map__nr(evsel->cpus);
+
return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
PERF_EVENT_IOC_ENABLE,
0);
}
+int perf_evsel__disable(struct perf_evsel *evsel)
+{
+ int nthreads = thread_map__nr(evsel->threads);
+ int ncpus = cpu_map__nr(evsel->cpus);
+
+ return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+ PERF_EVENT_IOC_DISABLE,
+ 0);
+}
+
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
{
if (ncpus == 0 || nthreads == 0)
@@ -1192,6 +1208,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+ bit_name(WEIGHT),
{ .name = NULL, }
};
#undef bit_name
@@ -1323,6 +1340,9 @@ fallback_missing_features:
evsel->attr.mmap2 = 0;
if (perf_missing_features.exclude_guest)
evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+ if (perf_missing_features.lbr_flags)
+ evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES);
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1441,6 +1461,12 @@ try_fallback:
} else if (!perf_missing_features.sample_id_all) {
perf_missing_features.sample_id_all = true;
goto retry_sample_id;
+ } else if (!perf_missing_features.lbr_flags &&
+ (evsel->attr.branch_sample_type &
+ (PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+ perf_missing_features.lbr_flags = true;
+ goto fallback_missing_features;
}
out_close:
@@ -2272,6 +2298,29 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
term, (u64)evsel->attr.sample_freq);
}
+
+ if (details->trace_fields) {
+ struct format_field *field;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+ goto out;
+ }
+
+ field = evsel->tp_format->format.fields;
+ if (field == NULL) {
+ printed += comma_fprintf(fp, &first, " (no trace field)");
+ goto out;
+ }
+
+ printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+ field = field->next;
+ while (field) {
+ printed += comma_fprintf(fp, &first, "%s", field->name);
+ field = field->next;
+ }
+ }
out:
fputc('\n', fp);
return ++printed;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 0e49bd742c63..8e75434bd01c 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -227,7 +227,8 @@ int perf_evsel__append_filter(struct perf_evsel *evsel,
const char *op, const char *filter);
int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
const char *filter);
-int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__enable(struct perf_evsel *evsel);
+int perf_evsel__disable(struct perf_evsel *evsel);
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
struct cpu_map *cpus);
@@ -368,6 +369,7 @@ struct perf_attr_details {
bool verbose;
bool event_group;
bool force;
+ bool trace_fields;
};
int perf_evsel__fprintf(struct perf_evsel *evsel,
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
deleted file mode 100644
index 7adf4ad15d8f..000000000000
--- a/tools/perf/util/exec_cmd.c
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "cache.h"
-#include "exec_cmd.h"
-#include "quote.h"
-
-#include <string.h>
-
-#define MAX_ARGS 32
-
-static const char *argv_exec_path;
-static const char *argv0_path;
-
-const char *system_path(const char *path)
-{
- static const char *prefix = PREFIX;
- struct strbuf d = STRBUF_INIT;
-
- if (is_absolute_path(path))
- return path;
-
- strbuf_addf(&d, "%s/%s", prefix, path);
- path = strbuf_detach(&d, NULL);
- return path;
-}
-
-const char *perf_extract_argv0_path(const char *argv0)
-{
- const char *slash;
-
- if (!argv0 || !*argv0)
- return NULL;
- slash = argv0 + strlen(argv0);
-
- while (argv0 <= slash && !is_dir_sep(*slash))
- slash--;
-
- if (slash >= argv0) {
- argv0_path = strndup(argv0, slash - argv0);
- return argv0_path ? slash + 1 : NULL;
- }
-
- return argv0;
-}
-
-void perf_set_argv_exec_path(const char *exec_path)
-{
- argv_exec_path = exec_path;
- /*
- * Propagate this setting to external programs.
- */
- setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1);
-}
-
-
-/* Returns the highest-priority, location to look for perf programs. */
-const char *perf_exec_path(void)
-{
- const char *env;
-
- if (argv_exec_path)
- return argv_exec_path;
-
- env = getenv(EXEC_PATH_ENVIRONMENT);
- if (env && *env) {
- return env;
- }
-
- return system_path(PERF_EXEC_PATH);
-}
-
-static void add_path(struct strbuf *out, const char *path)
-{
- if (path && *path) {
- if (is_absolute_path(path))
- strbuf_addstr(out, path);
- else
- strbuf_addstr(out, make_nonrelative_path(path));
-
- strbuf_addch(out, PATH_SEP);
- }
-}
-
-void setup_path(void)
-{
- const char *old_path = getenv("PATH");
- struct strbuf new_path = STRBUF_INIT;
-
- add_path(&new_path, perf_exec_path());
- add_path(&new_path, argv0_path);
-
- if (old_path)
- strbuf_addstr(&new_path, old_path);
- else
- strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin");
-
- setenv("PATH", new_path.buf, 1);
-
- strbuf_release(&new_path);
-}
-
-static const char **prepare_perf_cmd(const char **argv)
-{
- int argc;
- const char **nargv;
-
- for (argc = 0; argv[argc]; argc++)
- ; /* just counting */
- nargv = malloc(sizeof(*nargv) * (argc + 2));
-
- nargv[0] = "perf";
- for (argc = 0; argv[argc]; argc++)
- nargv[argc + 1] = argv[argc];
- nargv[argc + 1] = NULL;
- return nargv;
-}
-
-int execv_perf_cmd(const char **argv) {
- const char **nargv = prepare_perf_cmd(argv);
-
- /* execvp() can only ever return if it fails */
- execvp("perf", (char **)nargv);
-
- free(nargv);
- return -1;
-}
-
-
-int execl_perf_cmd(const char *cmd,...)
-{
- int argc;
- const char *argv[MAX_ARGS + 1];
- const char *arg;
- va_list param;
-
- va_start(param, cmd);
- argv[0] = cmd;
- argc = 1;
- while (argc < MAX_ARGS) {
- arg = argv[argc++] = va_arg(param, char *);
- if (!arg)
- break;
- }
- va_end(param);
- if (MAX_ARGS <= argc)
- return error("too many args to run %s", cmd);
-
- argv[argc] = NULL;
- return execv_perf_cmd(argv);
-}
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h
deleted file mode 100644
index bc4b915963f5..000000000000
--- a/tools/perf/util/exec_cmd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __PERF_EXEC_CMD_H
-#define __PERF_EXEC_CMD_H
-
-extern void perf_set_argv_exec_path(const char *exec_path);
-extern const char *perf_extract_argv0_path(const char *path);
-extern const char *perf_exec_path(void);
-extern void setup_path(void);
-extern int execv_perf_cmd(const char **argv); /* NULL terminated */
-extern int execl_perf_cmd(const char *cmd, ...);
-extern const char *system_path(const char *path);
-
-#endif /* __PERF_EXEC_CMD_H */
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index 36a885d2cd22..0ac2037c970c 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -36,4 +36,19 @@ do
}' "Documentation/perf-$cmd.txt"
done
echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#ifdef HAVE_LIBAUDIT_SUPPORT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
echo "};"
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 43838003c1a1..f50b7235ecb6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -724,7 +724,7 @@ static int write_numa_topology(int fd, struct perf_header *h __maybe_unused,
done:
free(buf);
fclose(fp);
- free(node_map);
+ cpu_map__put(node_map);
return ret;
}
@@ -868,6 +868,13 @@ static int write_auxtrace(int fd, struct perf_header *h,
return err;
}
+static int write_stat(int fd __maybe_unused,
+ struct perf_header *h __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -1159,6 +1166,12 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused,
fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
}
+static void print_stat(struct perf_header *ph __maybe_unused,
+ int fd __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains stat data\n");
+}
+
static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
FILE *fp)
{
@@ -1948,6 +1961,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings),
FEAT_OPP(HEADER_GROUP_DESC, group_desc),
FEAT_OPP(HEADER_AUXTRACE, auxtrace),
+ FEAT_OPA(HEADER_STAT, stat),
};
struct header_print_data {
@@ -2686,6 +2700,152 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
return err;
}
+static struct event_update_event *
+event_update_event__new(size_t size, u64 type, u64 id)
+{
+ struct event_update_event *ev;
+
+ size += sizeof(*ev);
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ ev = zalloc(size);
+ if (ev) {
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = type;
+ ev->id = id;
+ }
+ return ev;
+}
+
+int
+perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t size = strlen(evsel->unit);
+ int err;
+
+ ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strncpy(ev->data, evsel->unit, size);
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ struct event_update_event_scale *ev_data;
+ int err;
+
+ ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+ ev_data->scale = evsel->scale;
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t len = strlen(evsel->name);
+ int err;
+
+ ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strncpy(ev->data, evsel->name, len);
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ size_t size = sizeof(struct event_update_event);
+ struct event_update_event *ev;
+ int max, err;
+ u16 type;
+
+ if (!evsel->own_cpus)
+ return 0;
+
+ ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = PERF_EVENT_UPDATE__CPUS;
+ ev->id = evsel->id[0];
+
+ cpu_map_data__synthesize((struct cpu_map_data *) ev->data,
+ evsel->own_cpus,
+ type, max);
+
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct cpu_map *map;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id: %" PRIu64 "\n", ev->id);
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+ break;
+ case PERF_EVENT_UPDATE__UNIT:
+ ret += fprintf(fp, "... unit: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ ret += fprintf(fp, "... name: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+ ret += fprintf(fp, "... ");
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ ret += cpu_map__fprintf(map, fp);
+ else
+ ret += fprintf(fp, "failed to get cpus\n");
+ break;
+ default:
+ ret += fprintf(fp, "... unknown type\n");
+ break;
+ }
+
+ return ret;
+}
+
int perf_event__synthesize_attrs(struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process)
@@ -2745,6 +2905,51 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
return 0;
}
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct cpu_map *map;
+
+ if (!pevlist || *pevlist == NULL)
+ return -EINVAL;
+
+ evlist = *pevlist;
+
+ evsel = perf_evlist__id2evsel(evlist, ev->id);
+ if (evsel == NULL)
+ return -EINVAL;
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__UNIT:
+ evsel->unit = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ evsel->name = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ evsel->scale = ev_scale->scale;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ evsel->own_cpus = map;
+ else
+ pr_err("failed to get event_update cpus\n");
+ default:
+ break;
+ }
+
+ return 0;
+}
+
int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
struct perf_evlist *evlist,
perf_event__handler_t process)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 05f27cb6b7e3..cff9892452ee 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -31,6 +31,7 @@ enum {
HEADER_PMU_MAPPINGS,
HEADER_GROUP_DESC,
HEADER_AUXTRACE,
+ HEADER_STAT,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
@@ -105,8 +106,24 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
int perf_event__synthesize_attrs(struct perf_tool *tool,
struct perf_session *session,
perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
int perf_event__synthesize_tracing_data(struct perf_tool *tool,
int fd, struct perf_evlist *evlist,
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
new file mode 100644
index 000000000000..dc1e41c9b054
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -0,0 +1,103 @@
+#include "cache.h"
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+
+static int autocorrect;
+static struct cmdnames aliases;
+
+static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "help.autocorrect"))
+ autocorrect = perf_config_int(var,value);
+ /* Also use aliases for command lookup */
+ if (!prefixcmp(var, "alias."))
+ add_cmdname(&aliases, var + 6, strlen(var + 6));
+
+ return perf_default_config(var, value, cb);
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+ const struct cmdname *const *c1 = p1, *const *c2 = p2;
+ const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+ int l1 = (*c1)->len;
+ int l2 = (*c2)->len;
+ return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+ unsigned int i;
+
+ ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
+
+ for (i = 0; i < old->cnt; i++)
+ cmds->names[cmds->cnt++] = old->names[i];
+ zfree(&old->names);
+ old->cnt = 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+ unsigned int i, n = 0, best_similarity = 0;
+ struct cmdnames main_cmds, other_cmds;
+
+ memset(&main_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(main_cmds));
+ memset(&aliases, 0, sizeof(aliases));
+
+ perf_config(perf_unknown_cmd_config, NULL);
+
+ load_command_list("perf-", &main_cmds, &other_cmds);
+
+ add_cmd_list(&main_cmds, &aliases);
+ add_cmd_list(&main_cmds, &other_cmds);
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(main_cmds.names), cmdname_compare);
+ uniq(&main_cmds);
+
+ if (main_cmds.cnt) {
+ /* This reuses cmdname->len for similarity index */
+ for (i = 0; i < main_cmds.cnt; ++i)
+ main_cmds.names[i]->len =
+ levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(*main_cmds.names), levenshtein_compare);
+
+ best_similarity = main_cmds.names[0]->len;
+ n = 1;
+ while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ ++n;
+ }
+
+ if (autocorrect && n == 1) {
+ const char *assumed = main_cmds.names[0]->name;
+
+ main_cmds.names[0] = NULL;
+ clean_cmdnames(&main_cmds);
+ fprintf(stderr, "WARNING: You called a perf program named '%s', "
+ "which does not exist.\n"
+ "Continuing under the assumption that you meant '%s'\n",
+ cmd, assumed);
+ if (autocorrect > 0) {
+ fprintf(stderr, "in %0.1f seconds automatically...\n",
+ (float)autocorrect/10.0);
+ poll(NULL, 0, autocorrect * 100);
+ }
+ return assumed;
+ }
+
+ fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+ if (main_cmds.cnt && best_similarity < 6) {
+ fprintf(stderr, "\nDid you mean %s?\n",
+ n < 2 ? "this": "one of these");
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ }
+
+ exit(1);
+}
diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.h
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 4fd37d6708cb..c226303e3da0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -254,6 +254,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
he_stat__decay(&he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__decay(he->stat_acc);
+ decay_callchain(he->callchain);
diff = prev_period - he->stat.period;
@@ -270,6 +271,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
if (sort__need_collapse)
rb_erase(&he->rb_node_in, &hists->entries_collapsed);
+ else
+ rb_erase(&he->rb_node_in, hists->entries_in);
--hists->nr_entries;
if (!he->filtered)
@@ -367,6 +370,25 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
if (symbol_conf.use_callchain)
callchain_init(he->callchain);
+ if (he->raw_data) {
+ he->raw_data = memdup(he->raw_data, he->raw_size);
+
+ if (he->raw_data == NULL) {
+ map__put(he->ms.map);
+ if (he->branch_info) {
+ map__put(he->branch_info->from.map);
+ map__put(he->branch_info->to.map);
+ free(he->branch_info);
+ }
+ if (he->mem_info) {
+ map__put(he->mem_info->iaddr.map);
+ map__put(he->mem_info->daddr.map);
+ }
+ free(he->stat_acc);
+ free(he);
+ return NULL;
+ }
+ }
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
}
@@ -459,7 +481,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct symbol *sym_parent,
struct branch_info *bi,
struct mem_info *mi,
- u64 period, u64 weight, u64 transaction,
+ struct perf_sample *sample,
bool sample_self)
{
struct hist_entry entry = {
@@ -476,15 +498,17 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
.level = al->level,
.stat = {
.nr_events = 1,
- .period = period,
- .weight = weight,
+ .period = sample->period,
+ .weight = sample->weight,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
.hists = hists,
.branch_info = bi,
.mem_info = mi,
- .transaction = transaction,
+ .transaction = sample->transaction,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
};
return hists__findnew_entry(hists, &entry, al, sample_self);
@@ -524,12 +548,13 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
u64 cost;
struct mem_info *mi = iter->priv;
struct hists *hists = evsel__hists(iter->evsel);
+ struct perf_sample *sample = iter->sample;
struct hist_entry *he;
if (mi == NULL)
return -EINVAL;
- cost = iter->sample->weight;
+ cost = sample->weight;
if (!cost)
cost = 1;
@@ -540,8 +565,10 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
* and this is indirectly achieved by passing period=weight here
* and the he_stat__add_period() function.
*/
+ sample->period = cost;
+
he = __hists__add_entry(hists, al, iter->parent, NULL, mi,
- cost, cost, 0, true);
+ sample, true);
if (!he)
return -ENOMEM;
@@ -628,6 +655,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
struct branch_info *bi;
struct perf_evsel *evsel = iter->evsel;
struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
struct hist_entry *he = NULL;
int i = iter->curr;
int err = 0;
@@ -641,9 +669,11 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
* The report shows the percentage of total branches captured
* and not events sampled. Thus we use a pseudo period of 1.
*/
+ sample->period = 1;
+ sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
- 1, bi->flags.cycles ? bi->flags.cycles : 1,
- 0, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -680,8 +710,7 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location
struct hist_entry *he;
he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -742,8 +771,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
int err = 0;
he = __hists__add_entry(hists, al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, true);
+ sample, true);
if (he == NULL)
return -ENOMEM;
@@ -795,6 +823,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
.sym = al->sym,
},
.parent = iter->parent,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
};
int i;
struct callchain_cursor cursor;
@@ -816,8 +846,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
}
he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction, false);
+ sample, false);
if (he == NULL)
return -ENOMEM;
@@ -924,9 +953,6 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
- continue;
-
cmp = fmt->cmp(fmt, left, right);
if (cmp)
break;
@@ -942,9 +968,6 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
- continue;
-
cmp = fmt->collapse(fmt, left, right);
if (cmp)
break;
@@ -975,6 +998,8 @@ void hist_entry__delete(struct hist_entry *he)
if (he->srcfile && he->srcfile[0])
free(he->srcfile);
free_callchain(he->callchain);
+ free(he->trace_output);
+ free(he->raw_data);
free(he);
}
@@ -982,9 +1007,8 @@ void hist_entry__delete(struct hist_entry *he)
* collapse the histogram
*/
-static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
- struct rb_root *root,
- struct hist_entry *he)
+bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+ struct rb_root *root, struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -1024,7 +1048,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
return true;
}
-static struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
{
struct rb_root *root;
@@ -1088,7 +1112,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
int64_t cmp = 0;
perf_hpp__for_each_sort_list(fmt) {
- if (perf_hpp__should_skip(fmt))
+ if (perf_hpp__should_skip(fmt, a->hists))
continue;
cmp = fmt->sort(fmt, a, b);
@@ -1559,10 +1583,8 @@ int perf_hist_config(const char *var, const char *value)
return 0;
}
-static int hists_evsel__init(struct perf_evsel *evsel)
+int __hists__init(struct hists *hists)
{
- struct hists *hists = evsel__hists(evsel);
-
memset(hists, 0, sizeof(*hists));
hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
hists->entries_in = &hists->entries_in_array[0];
@@ -1573,6 +1595,43 @@ static int hists_evsel__init(struct perf_evsel *evsel)
return 0;
}
+static void hists__delete_remaining_entries(struct rb_root *root)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ while (!RB_EMPTY_ROOT(root)) {
+ node = rb_first(root);
+ rb_erase(node, root);
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ hist_entry__delete(he);
+ }
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+ hists__delete_entries(hists);
+ hists__delete_remaining_entries(&hists->entries_in_array[0]);
+ hists__delete_remaining_entries(&hists->entries_in_array[1]);
+ hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ hists__delete_all_entries(hists);
+}
+
+static int hists_evsel__init(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ __hists__init(hists);
+ return 0;
+}
+
/*
* XXX We probably need a hists_evsel__exit() to free the hist_entries
* stored in the rbtree...
@@ -1581,7 +1640,8 @@ static int hists_evsel__init(struct perf_evsel *evsel)
int hists__init(void)
{
int err = perf_evsel__object_config(sizeof(struct hists_evsel),
- hists_evsel__init, NULL);
+ hists_evsel__init,
+ hists_evsel__exit);
if (err)
fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a48a2078d288..d4ec4822a103 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,6 +52,7 @@ enum hist_column {
HISTC_MEM_IADDR_SYMBOL,
HISTC_TRANSACTION,
HISTC_CYCLES,
+ HISTC_TRACE,
HISTC_NR_COLS, /* Last entry */
};
@@ -114,8 +115,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
struct addr_location *al,
struct symbol *parent,
struct branch_info *bi,
- struct mem_info *mi, u64 period,
- u64 weight, u64 transaction,
+ struct mem_info *mi,
+ struct perf_sample *sample,
bool sample_self);
int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
int max_stack_depth, void *arg);
@@ -184,6 +185,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
}
int hists__init(void);
+int __hists__init(struct hists *hists);
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
+bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+ struct rb_root *root, struct hist_entry *he);
struct perf_hpp {
char *buf;
@@ -261,10 +267,20 @@ void perf_hpp__append_sort_keys(void);
bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
-static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format)
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+ struct hists *hists)
{
- return format->elide;
+ if (format->elide)
+ return true;
+
+ if (perf_hpp__is_dynamic_entry(format) &&
+ !perf_hpp__defined_dynamic_entry(format, hists))
+ return true;
+
+ return false;
}
void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
deleted file mode 100644
index 6f19c548ecc0..000000000000
--- a/tools/perf/util/include/linux/string.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <string.h>
-
-void *memdup(const void *src, size_t len);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 97f963a3dcb9..81a2eb77ba7f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1744,7 +1744,7 @@ static void intel_pt_free(struct perf_session *session)
auxtrace_heap__free(&pt->heap);
intel_pt_free_events(session);
session->auxtrace = NULL;
- thread__delete(pt->unknown_thread);
+ thread__put(pt->unknown_thread);
free(pt);
}
@@ -2153,7 +2153,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
return 0;
err_delete_thread:
- thread__delete(pt->unknown_thread);
+ thread__zput(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
auxtrace_queues__free(&pt->queues);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8b303ff20289..ad79297c76c8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos)
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
@@ -44,6 +45,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
+ memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
return -ENOMEM;
@@ -122,6 +125,7 @@ void machine__delete_threads(struct machine *machine)
void machine__exit(struct machine *machine)
{
+ machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
@@ -348,13 +352,18 @@ static void machine__update_thread_pid(struct machine *machine,
}
th->mg = map_groups__get(leader->mg);
-
+out_put:
+ thread__put(leader);
return;
-
out_err:
pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+ goto out_put;
}
+/*
+ * Caller must eventually drop thread->refcnt returned with a successfull
+ * lookup/new thread inserted.
+ */
static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
@@ -372,7 +381,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
machine->last_match = NULL;
@@ -385,7 +394,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th->tid == tid) {
machine->last_match = th;
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
if (tid < th->tid)
@@ -413,7 +422,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (thread__init_map_groups(th, machine)) {
rb_erase_init(&th->rb_node, &machine->threads);
RB_CLEAR_NODE(&th->rb_node);
- thread__delete(th);
+ thread__put(th);
return NULL;
}
/*
@@ -437,7 +446,7 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
struct thread *th;
pthread_rwlock_wrlock(&machine->threads_lock);
- th = thread__get(__machine__findnew_thread(machine, pid, tid));
+ th = __machine__findnew_thread(machine, pid, tid);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -447,7 +456,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
{
struct thread *th;
pthread_rwlock_rdlock(&machine->threads_lock);
- th = thread__get(____machine__findnew_thread(machine, pid, tid, false));
+ th = ____machine__findnew_thread(machine, pid, tid, false);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -560,11 +569,29 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
+{
+ const char *dup_filename;
+
+ if (!filename || !dso || !dso->long_name)
+ return;
+ if (dso->long_name[0] != '[')
+ return;
+ if (!strchr(filename, '/'))
+ return;
+
+ dup_filename = strdup(filename);
+ if (!dup_filename)
+ return;
+
+ dso__set_long_name(dso, dup_filename, true);
+}
+
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
struct map *map = NULL;
- struct dso *dso;
+ struct dso *dso = NULL;
struct kmod_path m;
if (kmod_path__parse_name(&m, filename))
@@ -572,8 +599,15 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
m.name);
- if (map)
+ if (map) {
+ /*
+ * If the map's dso is an offline module, give dso__load()
+ * a chance to find the file path of that module by fixing
+ * long_name.
+ */
+ dso__adjust_kmod_long_name(map->dso, filename);
goto out;
+ }
dso = machine__findnew_module_dso(machine, &m, filename);
if (dso == NULL)
@@ -585,7 +619,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map_groups__insert(&machine->kmaps, map);
+ /* Put the map here because map_groups__insert alread got it */
+ map__put(map);
out:
+ /* put the dso here, corresponding to machine__findnew_module_dso */
+ dso__put(dso);
free(m.name);
return map;
}
@@ -740,6 +778,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
enum map_type type;
u64 start = machine__get_running_kernel_start(machine, NULL);
+ /* In case of renewal the kernel map, destroy previous one */
+ machine__destroy_kernel_maps(machine);
+
for (type = 0; type < MAP__NR_TYPES; ++type) {
struct kmap *kmap;
struct map *map;
@@ -788,6 +829,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
kmap->ref_reloc_sym = NULL;
}
+ map__put(machine->vmlinux_maps[type]);
machine->vmlinux_maps[type] = NULL;
}
}
@@ -1084,11 +1126,14 @@ int machine__create_kernel_maps(struct machine *machine)
struct dso *kernel = machine__get_kernel(machine);
const char *name;
u64 addr = machine__get_running_kernel_start(machine, &name);
- if (!addr)
+ int ret;
+
+ if (!addr || kernel == NULL)
return -1;
- if (kernel == NULL ||
- __machine__create_kernel_maps(machine, kernel) < 0)
+ ret = __machine__create_kernel_maps(machine, kernel);
+ dso__put(kernel);
+ if (ret < 0)
return -1;
if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
@@ -1609,6 +1654,8 @@ static int add_callchain_ip(struct thread *thread,
}
}
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ return 0;
return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
}
@@ -1863,6 +1910,9 @@ check_calls:
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
struct callchain_cursor *cursor = arg;
+
+ if (symbol_conf.hide_unresolved && entry->sym == NULL)
+ return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym);
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index afc6b56cf749..171b6d10a04b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -26,8 +26,8 @@ const char *map_type__name[MAP__NR_TYPES] = {
static inline int is_anon_memory(const char *filename)
{
return !strcmp(filename, "//anon") ||
- !strcmp(filename, "/dev/zero (deleted)") ||
- !strcmp(filename, "/anon_hugepage (deleted)");
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
}
static inline int is_no_dso_memory(const char *filename)
@@ -691,6 +691,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
__map_groups__insert(pos->groups, before);
if (verbose >= 2)
map__fprintf(before, fp);
+ map__put(before);
}
if (map->end < pos->end) {
@@ -705,6 +706,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
__map_groups__insert(pos->groups, after);
if (verbose >= 2)
map__fprintf(after, fp);
+ map__put(after);
}
put_map:
map__put(pos);
@@ -742,6 +744,7 @@ int map_groups__clone(struct map_groups *mg,
if (new == NULL)
goto out_unlock;
map_groups__insert(mg, new);
+ map__put(new);
}
err = 0;
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index 355eecf6bf59..afc088dd7d20 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -1,7 +1,7 @@
#include "perf.h"
#include "util/util.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-branch-options.h"
#define BRANCH_OPT(n, m) \
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e48d9da75707..4f7b0efdde2f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -4,9 +4,9 @@
#include "../perf.h"
#include "evlist.h"
#include "evsel.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "parse-events.h"
-#include "exec_cmd.h"
+#include <subcmd/exec-cmd.h>
#include "string.h"
#include "symbol.h"
#include "cache.h"
@@ -124,6 +124,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
.symbol = "dummy",
.alias = "",
},
+ [PERF_COUNT_SW_BPF_OUTPUT] = {
+ .symbol = "bpf-output",
+ .alias = "",
+ },
};
#define __PERF_EVENT_FIELD(config, name) \
@@ -1879,7 +1883,7 @@ restart:
for (i = 0; i < max; i++, syms++) {
- if (event_glob != NULL &&
+ if (event_glob != NULL && syms->symbol != NULL &&
!(strglobmatch(syms->symbol, event_glob) ||
(syms->alias && strglobmatch(syms->alias, event_glob))))
continue;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index 4f2c1c255d81..646ecf736aad 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -1,7 +1,7 @@
#include "perf.h"
#include "util/util.h"
#include "util/debug.h"
-#include "util/parse-options.h"
+#include <subcmd/parse-options.h>
#include "util/parse-regs-options.h"
int
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 5d13cb45b317..3654d964e49d 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -22,24 +22,6 @@ static const char *get_perf_dir(void)
return ".";
}
-/*
- * If libc has strlcpy() then that version will override this
- * implementation:
- */
-size_t __weak strlcpy(char *dest, const char *src, size_t size)
-{
- size_t ret = strlen(src);
-
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
-
- memcpy(dest, src, len);
- dest[len] = '\0';
- }
-
- return ret;
-}
-
static char *get_pathname(void)
{
static char pathname_array[4][PATH_MAX];
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e4b173dec4b9..b597bcc8fc78 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -220,6 +220,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
alias->scale = 1.0;
alias->unit[0] = '\0';
alias->per_pkg = false;
+ alias->snapshot = false;
ret = parse_events_terms(&alias->terms, val);
if (ret) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 03875f9154e7..93996ec4bbe3 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
goto out;
if (!allow_suffix) {
- pr_warning("Error: event \"%s\" already exists. "
- "(Use -f to force duplicates.)\n", buf);
+ pr_warning("Error: event \"%s\" already exists.\n"
+ " Hint: Remove existing event by 'perf probe -d'\n"
+ " or force duplicates by 'perf probe -f'\n"
+ " or set 'force=yes' in BPF source.\n",
+ buf);
ret = -EEXIST;
goto out;
}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 05012bb178d7..2be10fb27172 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -654,6 +654,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
{
Dwarf_Attribute fb_attr;
+ Dwarf_Frame *frame = NULL;
size_t nops;
int ret;
@@ -686,11 +687,11 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
#if _ELFUTILS_PREREQ(0, 142)
} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
pf->cfi != NULL) {
- Dwarf_Frame *frame;
if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
pr_warning("Failed to get call frame on 0x%jx\n",
(uintmax_t)pf->addr);
+ free(frame);
return -ENOENT;
}
#endif
@@ -699,7 +700,8 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
/* Call finder's callback handler */
ret = pf->callback(sc_die, pf);
- /* *pf->fb_ops will be cached in libdw. Don't free it. */
+ /* Since *pf->fb_ops can be a part of frame. we should free it here. */
+ free(frame);
pf->fb_ops = NULL;
return ret;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 51be28b1bca2..8162ba0e2e57 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -10,6 +10,8 @@ util/ctype.c
util/evlist.c
util/evsel.c
util/cpumap.c
+../lib/bitmap.c
+../lib/find_bit.c
../lib/hweight.c
util/thread_map.c
util/util.c
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index a8e825fca42a..d72fafc1c800 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -41,6 +41,9 @@
#include "../thread-stack.h"
#include "../trace-event.h"
#include "../machine.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "stat.h"
PyMODINIT_FUNC initperf_trace_context(void);
@@ -859,6 +862,104 @@ static void python_process_event(union perf_event *event,
}
}
+static void get_handler_name(char *str, size_t size,
+ struct perf_evsel *evsel)
+{
+ char *p = str;
+
+ scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+
+ while ((p = strchr(p, ':'))) {
+ *p = '_';
+ p++;
+ }
+}
+
+static void
+process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp,
+ struct perf_counts_values *count)
+{
+ PyObject *handler, *t;
+ static char handler_name[256];
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ get_handler_name(handler_name, sizeof(handler_name),
+ counter);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
+ PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
+
+ tuple_set_u64(t, n++, tstamp);
+ tuple_set_u64(t, n++, count->val);
+ tuple_set_u64(t, n++, count->ena);
+ tuple_set_u64(t, n++, count->run);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+ struct perf_evsel *counter, u64 tstamp)
+{
+ struct thread_map *threads = counter->threads;
+ struct cpu_map *cpus = counter->cpus;
+ int cpu, thread;
+
+ if (config->aggr_mode == AGGR_GLOBAL) {
+ process_stat(counter, -1, -1, tstamp,
+ &counter->counts->aggr);
+ return;
+ }
+
+ for (thread = 0; thread < threads->nr; thread++) {
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ process_stat(counter, cpus->map[cpu],
+ thread_map__pid(threads, thread), tstamp,
+ perf_counts(counter->counts, cpu, thread));
+ }
+ }
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+ PyObject *handler, *t;
+ static const char handler_name[] = "stat__interval";
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ tuple_set_u64(t, n++, tstamp);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
static int run_start_sub(void)
{
main_module = PyImport_AddModule("__main__");
@@ -1201,10 +1302,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
}
struct scripting_ops python_scripting_ops = {
- .name = "Python",
- .start_script = python_start_script,
- .flush_script = python_flush_script,
- .stop_script = python_stop_script,
- .process_event = python_process_event,
- .generate_script = python_generate_script,
+ .name = "Python",
+ .start_script = python_start_script,
+ .flush_script = python_flush_script,
+ .stop_script = python_stop_script,
+ .process_event = python_process_event,
+ .process_stat = python_process_stat,
+ .process_stat_interval = python_process_stat_interval,
+ .generate_script = python_generate_script,
};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c35ffdd360fe..d5636ba94b20 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -17,6 +17,7 @@
#include "asm/bug.h"
#include "auxtrace.h"
#include "thread-stack.h"
+#include "stat.h"
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
@@ -36,6 +37,9 @@ static int perf_session__open(struct perf_session *session)
if (perf_data_file__is_pipe(file))
return 0;
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
if (!perf_evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n");
return -1;
@@ -205,6 +209,18 @@ static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
return 0;
}
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
@@ -296,6 +312,67 @@ int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
return 0;
}
+
+static
+int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
void perf_tool__fill_defaults(struct perf_tool *tool)
{
if (tool->sample == NULL)
@@ -328,6 +405,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->unthrottle = process_event_stub;
if (tool->attr == NULL)
tool->attr = process_event_synth_attr_stub;
+ if (tool->event_update == NULL)
+ tool->event_update = process_event_synth_event_update_stub;
if (tool->tracing_data == NULL)
tool->tracing_data = process_event_synth_tracing_data_stub;
if (tool->build_id == NULL)
@@ -346,6 +425,16 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->auxtrace = process_event_auxtrace_stub;
if (tool->auxtrace_error == NULL)
tool->auxtrace_error = process_event_auxtrace_error_stub;
+ if (tool->thread_map == NULL)
+ tool->thread_map = process_event_thread_map_stub;
+ if (tool->cpu_map == NULL)
+ tool->cpu_map = process_event_cpu_map_stub;
+ if (tool->stat_config == NULL)
+ tool->stat_config = process_event_stat_config_stub;
+ if (tool->stat == NULL)
+ tool->stat = process_stat_stub;
+ if (tool->stat_round == NULL)
+ tool->stat_round = process_stat_round_stub;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -569,6 +658,13 @@ static void perf_event__hdr_attr_swap(union perf_event *event,
mem_bswap_64(event->attr.id, size);
}
+static void perf_event__event_update_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_update.type = bswap_64(event->event_update.type);
+ event->event_update.id = bswap_64(event->event_update.id);
+}
+
static void perf_event__event_type_swap(union perf_event *event,
bool sample_id_all __maybe_unused)
{
@@ -616,6 +712,81 @@ static void perf_event__auxtrace_error_swap(union perf_event *event,
event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
}
+static void perf_event__thread_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ unsigned i;
+
+ event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+ for (i = 0; i < event->thread_map.nr; i++)
+ event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct cpu_map_data *data = &event->cpu_map.data;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_mask *mask;
+ unsigned i;
+
+ data->type = bswap_64(data->type);
+
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ cpus = (struct cpu_map_entries *)data->data;
+
+ cpus->nr = bswap_16(cpus->nr);
+
+ for (i = 0; i < cpus->nr; i++)
+ cpus->cpu[i] = bswap_16(cpus->cpu[i]);
+ break;
+ case PERF_CPU_MAP__MASK:
+ mask = (struct cpu_map_mask *) data->data;
+
+ mask->nr = bswap_16(mask->nr);
+ mask->long_size = bswap_16(mask->long_size);
+
+ switch (mask->long_size) {
+ case 4: mem_bswap_32(&mask->mask, mask->nr); break;
+ case 8: mem_bswap_64(&mask->mask, mask->nr); break;
+ default:
+ pr_err("cpu_map swap: unsupported long size\n");
+ }
+ default:
+ break;
+ }
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ u64 size;
+
+ size = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+ size += 1; /* nr item itself */
+ mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat.id = bswap_64(event->stat.id);
+ event->stat.thread = bswap_32(event->stat.thread);
+ event->stat.cpu = bswap_32(event->stat.cpu);
+ event->stat.val = bswap_64(event->stat.val);
+ event->stat.ena = bswap_64(event->stat.ena);
+ event->stat.run = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat_round.type = bswap_64(event->stat_round.type);
+ event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
typedef void (*perf_event__swap_op)(union perf_event *event,
bool sample_id_all);
@@ -643,6 +814,12 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
[PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
[PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
+ [PERF_RECORD_THREAD_MAP] = perf_event__thread_map_swap,
+ [PERF_RECORD_CPU_MAP] = perf_event__cpu_map_swap,
+ [PERF_RECORD_STAT_CONFIG] = perf_event__stat_config_swap,
+ [PERF_RECORD_STAT] = perf_event__stat_swap,
+ [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
+ [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
[PERF_RECORD_HEADER_MAX] = NULL,
};
@@ -1154,6 +1331,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
perf_session__set_comm_exec(session);
}
return err;
+ case PERF_RECORD_EVENT_UPDATE:
+ return tool->event_update(tool, event, &session->evlist);
case PERF_RECORD_HEADER_EVENT_TYPE:
/*
* Depreceated, but we need to handle it for sake
@@ -1179,6 +1358,16 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
return tool->auxtrace_error(tool, event, session);
+ case PERF_RECORD_THREAD_MAP:
+ return tool->thread_map(tool, event, session);
+ case PERF_RECORD_CPU_MAP:
+ return tool->cpu_map(tool, event, session);
+ case PERF_RECORD_STAT_CONFIG:
+ return tool->stat_config(tool, event, session);
+ case PERF_RECORD_STAT:
+ return tool->stat(tool, event, session);
+ case PERF_RECORD_STAT_ROUND:
+ return tool->stat_round(tool, event, session);
default:
return -EINVAL;
}
@@ -1311,17 +1500,20 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
-struct thread *perf_session__register_idle_thread(struct perf_session *session)
+int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;
+ int err = 0;
thread = machine__findnew_thread(&session->machines.host, 0, 0);
if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
pr_err("problem inserting idle task.\n");
- thread = NULL;
+ err = -1;
}
- return thread;
+ /* machine__findnew_thread() got the thread, so put it */
+ thread__put(thread);
+ return err;
}
static void perf_session__warn_about_errors(const struct perf_session *session)
@@ -1676,7 +1868,7 @@ int perf_session__process_events(struct perf_session *session)
u64 size = perf_data_file__size(session->file);
int err;
- if (perf_session__register_idle_thread(session) == NULL)
+ if (perf_session__register_idle_thread(session) < 0)
return -ENOMEM;
if (!perf_data_file__is_pipe(session->file))
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 3e900c0efc73..5f792e35d4c1 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -89,7 +89,7 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_
}
struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
-struct thread *perf_session__register_idle_thread(struct perf_session *session);
+int perf_session__register_idle_thread(struct perf_session *session);
size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2d8ccd4d9e1b..ec722346e6ff 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -4,6 +4,8 @@
#include "comm.h"
#include "symbol.h"
#include "evsel.h"
+#include "evlist.h"
+#include <traceevent/event-parse.h>
regex_t parent_regex;
const char default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -13,6 +15,7 @@ const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cy
const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
const char default_top_sort_order[] = "dso,symbol";
const char default_diff_sort_order[] = "dso,symbol";
+const char default_tracepoint_sort_order[] = "trace";
const char *sort_order;
const char *field_order;
regex_t ignore_callees_regex;
@@ -443,6 +446,70 @@ struct sort_entry sort_socket = {
.se_width_idx = HISTC_SOCKET,
};
+/* --sort trace */
+
+static char *get_trace_output(struct hist_entry *he)
+{
+ struct trace_seq seq;
+ struct perf_evsel *evsel;
+ struct pevent_record rec = {
+ .data = he->raw_data,
+ .size = he->raw_size,
+ };
+
+ evsel = hists_to_evsel(he->hists);
+
+ trace_seq_init(&seq);
+ if (symbol_conf.raw_trace) {
+ pevent_print_fields(&seq, he->raw_data, he->raw_size,
+ evsel->tp_format);
+ } else {
+ pevent_event_info(&seq, evsel->tp_format, &rec);
+ }
+ return seq.buffer;
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(left->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return 0;
+
+ if (left->trace_output == NULL)
+ left->trace_output = get_trace_output(left);
+ if (right->trace_output == NULL)
+ right->trace_output = get_trace_output(right);
+
+ hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
+ hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
+
+ return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(he->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+
+ if (he->trace_output == NULL)
+ he->trace_output = get_trace_output(he);
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+ .se_header = "Trace output",
+ .se_cmp = sort__trace_cmp,
+ .se_snprintf = hist_entry__trace_snprintf,
+ .se_width_idx = HISTC_TRACE,
+};
+
/* sort keys for branch stacks */
static int64_t
@@ -1312,6 +1379,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+ DIM(SORT_TRACE, "trace", sort_trace),
};
#undef DIM
@@ -1529,6 +1597,455 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
return 0;
}
+struct hpp_dynamic_entry {
+ struct perf_hpp_fmt hpp;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ unsigned dynamic_len;
+ bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+ if (!hde->hpp.len) {
+ int len = hde->dynamic_len;
+ int namelen = strlen(hde->field->name);
+ int fieldlen = hde->field->size;
+
+ if (namelen > len)
+ len = namelen;
+
+ if (!(hde->field->flags & FIELD_IS_STRING)) {
+ /* length for print hex numbers */
+ fieldlen = hde->field->size * 2 + 2;
+ }
+ if (fieldlen > len)
+ len = fieldlen;
+
+ hde->hpp.len = len;
+ }
+ return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+ struct hist_entry *he)
+{
+ char *str, *pos;
+ struct format_field *field = hde->field;
+ size_t namelen;
+ bool last = false;
+
+ if (hde->raw_trace)
+ return;
+
+ /* parse pretty print result and update max length */
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ size_t len;
+
+ str += namelen + 1;
+ len = pos - str;
+
+ if (len > hde->dynamic_len)
+ hde->dynamic_len = len;
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+ char *str, *pos;
+ struct format_field *field;
+ size_t namelen;
+ bool last = false;
+ int ret;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ if (hde->raw_trace)
+ goto raw_field;
+
+ field = hde->field;
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ str += namelen + 1;
+ str = strndup(str, pos - str);
+
+ if (str == NULL)
+ return scnprintf(hpp->buf, hpp->size,
+ "%*.*s", len, len, "ERROR");
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+
+ if (str == NULL) {
+ struct trace_seq seq;
+raw_field:
+ trace_seq_init(&seq);
+ pevent_print_field(&seq, he->raw_data, hde->field);
+ str = seq.buffer;
+ }
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+ free(str);
+ return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_dynamic_entry *hde;
+ struct format_field *field;
+ unsigned offset, size;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ field = hde->field;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ unsigned long long dyn;
+
+ pevent_read_number_field(field, a->raw_data, &dyn);
+ offset = dyn & 0xffff;
+ size = (dyn >> 16) & 0xffff;
+
+ /* record max width for output */
+ if (size > hde->dynamic_len)
+ hde->dynamic_len = size;
+ } else {
+ offset = field->offset;
+ size = field->size;
+
+ update_dynamic_len(hde, a);
+ update_dynamic_len(hde, b);
+ }
+
+ return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+ return fmt->cmp == __sort__hde_cmp;
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = malloc(sizeof(*hde));
+ if (hde == NULL) {
+ pr_debug("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hde->evsel = evsel;
+ hde->field = field;
+ hde->dynamic_len = 0;
+
+ hde->hpp.name = field->name;
+ hde->hpp.header = __sort__hde_header;
+ hde->hpp.width = __sort__hde_width;
+ hde->hpp.entry = __sort__hde_entry;
+ hde->hpp.color = NULL;
+
+ hde->hpp.cmp = __sort__hde_cmp;
+ hde->hpp.collapse = __sort__hde_cmp;
+ hde->hpp.sort = __sort__hde_cmp;
+
+ INIT_LIST_HEAD(&hde->hpp.list);
+ INIT_LIST_HEAD(&hde->hpp.sort_list);
+ hde->hpp.elide = false;
+ hde->hpp.len = 0;
+ hde->hpp.user_len = 0;
+
+ return hde;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+ char *event_name, *field_name, *opt_name;
+
+ event_name = str;
+ field_name = strchr(str, '.');
+
+ if (field_name) {
+ *field_name++ = '\0';
+ } else {
+ event_name = NULL;
+ field_name = str;
+ }
+
+ opt_name = strchr(field_name, '/');
+ if (opt_name)
+ *opt_name++ = '\0';
+
+ *event = event_name;
+ *field = field_name;
+ *opt = opt_name;
+
+ return 0;
+}
+
+/* find match evsel using a given event name. The event name can be:
+ * 1. '%' + event index (e.g. '%1' for first event)
+ * 2. full event name (e.g. sched:sched_switch)
+ * 3. partial event name (should not contain ':')
+ */
+static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name)
+{
+ struct perf_evsel *evsel = NULL;
+ struct perf_evsel *pos;
+ bool full_name;
+
+ /* case 1 */
+ if (event_name[0] == '%') {
+ int nr = strtol(event_name+1, NULL, 0);
+
+ if (nr > evlist->nr_entries)
+ return NULL;
+
+ evsel = perf_evlist__first(evlist);
+ while (--nr > 0)
+ evsel = perf_evsel__next(evsel);
+
+ return evsel;
+ }
+
+ full_name = !!strchr(event_name, ':');
+ evlist__for_each(evlist, pos) {
+ /* case 2 */
+ if (full_name && !strcmp(pos->name, event_name))
+ return pos;
+ /* case 3 */
+ if (!full_name && strstr(pos->name, event_name)) {
+ if (evsel) {
+ pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+ event_name, evsel->name, pos->name);
+ return NULL;
+ }
+ evsel = pos;
+ }
+ }
+
+ return evsel;
+}
+
+static int __dynamic_dimension__add(struct perf_evsel *evsel,
+ struct format_field *field,
+ bool raw_trace)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = __alloc_dynamic_entry(evsel, field);
+ if (hde == NULL)
+ return -ENOMEM;
+
+ hde->raw_trace = raw_trace;
+
+ perf_hpp__register_sort_field(&hde->hpp);
+ return 0;
+}
+
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+{
+ int ret;
+ struct format_field *field;
+
+ field = evsel->tp_format->format.fields;
+ while (field) {
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ if (ret < 0)
+ return ret;
+
+ field = field->next;
+ }
+ return 0;
+}
+
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+{
+ int ret;
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ ret = add_evsel_fields(evsel, raw_trace);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int add_all_matching_fields(struct perf_evlist *evlist,
+ char *field_name, bool raw_trace)
+{
+ int ret = -ESRCH;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ field = pevent_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL)
+ continue;
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+{
+ char *str, *event_name, *field_name, *opt_name;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ bool raw_trace = symbol_conf.raw_trace;
+ int ret = 0;
+
+ if (evlist == NULL)
+ return -ENOENT;
+
+ str = strdup(tok);
+ if (str == NULL)
+ return -ENOMEM;
+
+ if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (opt_name) {
+ if (strcmp(opt_name, "raw")) {
+ pr_debug("unsupported field option %s\n", opt_name);
+ ret = -EINVAL;
+ goto out;
+ }
+ raw_trace = true;
+ }
+
+ if (!strcmp(field_name, "trace_fields")) {
+ ret = add_all_dynamic_fields(evlist, raw_trace);
+ goto out;
+ }
+
+ if (event_name == NULL) {
+ ret = add_all_matching_fields(evlist, field_name, raw_trace);
+ goto out;
+ }
+
+ evsel = find_evsel(evlist, event_name);
+ if (evsel == NULL) {
+ pr_debug("Cannot find event: %s\n", event_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ pr_debug("%s is not a tracepoint event\n", event_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!strcmp(field_name, "*")) {
+ ret = add_evsel_fields(evsel, raw_trace);
+ } else {
+ field = pevent_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL) {
+ pr_debug("Cannot find event field for %s.%s\n",
+ event_name, field_name);
+ return -ENOENT;
+ }
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace);
+ }
+
+out:
+ free(str);
+ return ret;
+}
+
static int __sort_dimension__add(struct sort_dimension *sd)
{
if (sd->taken)
@@ -1583,7 +2100,8 @@ int hpp_dimension__add_output(unsigned col)
return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
}
-int sort_dimension__add(const char *tok)
+static int sort_dimension__add(const char *tok,
+ struct perf_evlist *evlist __maybe_unused)
{
unsigned int i;
@@ -1664,10 +2182,13 @@ int sort_dimension__add(const char *tok)
return 0;
}
+ if (!add_dynamic_entry(evlist, tok))
+ return 0;
+
return -ESRCH;
}
-static const char *get_default_sort_order(void)
+static const char *get_default_sort_order(struct perf_evlist *evlist)
{
const char *default_sort_orders[] = {
default_sort_order,
@@ -1675,14 +2196,33 @@ static const char *get_default_sort_order(void)
default_mem_sort_order,
default_top_sort_order,
default_diff_sort_order,
+ default_tracepoint_sort_order,
};
+ bool use_trace = true;
+ struct perf_evsel *evsel;
BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+ if (evlist == NULL)
+ goto out_no_evlist;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ use_trace = false;
+ break;
+ }
+ }
+
+ if (use_trace) {
+ sort__mode = SORT_MODE__TRACEPOINT;
+ if (symbol_conf.raw_trace)
+ return "trace_fields";
+ }
+out_no_evlist:
return default_sort_orders[sort__mode];
}
-static int setup_sort_order(void)
+static int setup_sort_order(struct perf_evlist *evlist)
{
char *new_sort_order;
@@ -1703,7 +2243,7 @@ static int setup_sort_order(void)
* because it's checked over the rest of the code.
*/
if (asprintf(&new_sort_order, "%s,%s",
- get_default_sort_order(), sort_order + 1) < 0) {
+ get_default_sort_order(evlist), sort_order + 1) < 0) {
error("Not enough memory to set up --sort");
return -ENOMEM;
}
@@ -1712,13 +2252,41 @@ static int setup_sort_order(void)
return 0;
}
-static int __setup_sorting(void)
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+ char *n;
+
+ if (!str || strstr(str, pre))
+ return str;
+
+ if (asprintf(&n, "%s,%s", pre, str) < 0)
+ return NULL;
+
+ free(str);
+ return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+ keys = prefix_if_not_in("overhead", keys);
+
+ if (symbol_conf.cumulate_callchain)
+ keys = prefix_if_not_in("overhead_children", keys);
+
+ return keys;
+}
+
+static int __setup_sorting(struct perf_evlist *evlist)
{
char *tmp, *tok, *str;
const char *sort_keys;
int ret = 0;
- ret = setup_sort_order();
+ ret = setup_sort_order(evlist);
if (ret)
return ret;
@@ -1732,7 +2300,7 @@ static int __setup_sorting(void)
return 0;
}
- sort_keys = get_default_sort_order();
+ sort_keys = get_default_sort_order(evlist);
}
str = strdup(sort_keys);
@@ -1741,9 +2309,20 @@ static int __setup_sorting(void)
return -ENOMEM;
}
+ /*
+ * Prepend overhead fields for backward compatibility.
+ */
+ if (!is_strict_order(field_order)) {
+ str = setup_overhead(str);
+ if (str == NULL) {
+ error("Not enough memory to setup overhead keys");
+ return -ENOMEM;
+ }
+ }
+
for (tok = strtok_r(str, ", ", &tmp);
tok; tok = strtok_r(NULL, ", ", &tmp)) {
- ret = sort_dimension__add(tok);
+ ret = sort_dimension__add(tok, evlist);
if (ret == -EINVAL) {
error("Invalid --sort key: `%s'", tok);
break;
@@ -1954,16 +2533,16 @@ out:
return ret;
}
-int setup_sorting(void)
+int setup_sorting(struct perf_evlist *evlist)
{
int err;
- err = __setup_sorting();
+ err = __setup_sorting(evlist);
if (err < 0)
return err;
if (parent_pattern != default_parent_pattern) {
- err = sort_dimension__add("parent");
+ err = sort_dimension__add("parent", evlist);
if (err < 0)
return err;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 31228851e397..687bbb124428 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -18,7 +18,7 @@
#include "debug.h"
#include "header.h"
-#include "parse-options.h"
+#include <subcmd/parse-options.h>
#include "parse-events.h"
#include "hist.h"
#include "thread.h"
@@ -122,6 +122,9 @@ struct hist_entry {
struct branch_info *branch_info;
struct hists *hists;
struct mem_info *mem_info;
+ void *raw_data;
+ u32 raw_size;
+ void *trace_output;
struct callchain_root callchain[0]; /* must be last member */
};
@@ -164,6 +167,7 @@ enum sort_mode {
SORT_MODE__MEMORY,
SORT_MODE__TOP,
SORT_MODE__DIFF,
+ SORT_MODE__TRACEPOINT,
};
enum sort_type {
@@ -180,6 +184,7 @@ enum sort_type {
SORT_LOCAL_WEIGHT,
SORT_GLOBAL_WEIGHT,
SORT_TRANSACTION,
+ SORT_TRACE,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -209,8 +214,6 @@ enum sort_type {
*/
struct sort_entry {
- struct list_head list;
-
const char *se_header;
int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
@@ -224,10 +227,11 @@ struct sort_entry {
extern struct sort_entry sort_thread;
extern struct list_head hist_entry__sort_list;
-int setup_sorting(void);
+struct perf_evlist;
+struct pevent;
+int setup_sorting(struct perf_evlist *evlist);
int setup_output_field(void);
void reset_output_field(void);
-extern int sort_dimension__add(const char *);
void sort__setup_elide(FILE *fp);
void perf_hpp__set_elide(int idx, bool elide);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2d9d8306dbd3..2f901d15e063 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -341,3 +341,65 @@ int perf_stat_process_counter(struct perf_stat_config *config,
return 0;
}
+
+int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_counts_values count;
+ struct stat_event *st = &event->stat;
+ struct perf_evsel *counter;
+
+ count.val = st->val;
+ count.ena = st->ena;
+ count.run = st->run;
+
+ counter = perf_evlist__id2evsel(session->evlist, st->id);
+ if (!counter) {
+ pr_err("Failed to resolve counter for stat event.\n");
+ return -EINVAL;
+ }
+
+ *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ counter->supported = true;
+ return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+ struct stat_event *st = (struct stat_event *) event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
+ st->id, st->cpu, st->thread);
+ ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
+ st->val, st->ena, st->run);
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+ struct stat_round_event *rd = (struct stat_round_event *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
+ rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+ struct perf_stat_config sc;
+ size_t ret;
+
+ perf_event__read_stat_config(&sc, &event->stat_config);
+
+ ret = fprintf(fp, "\n");
+ ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+ ret += fprintf(fp, "... scale %d\n", sc.scale);
+ ret += fprintf(fp, "... interval %u\n", sc.interval);
+
+ return ret;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index da1d11c4f8c1..086f4e128d63 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,4 +90,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct perf_evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+int perf_event__process_stat_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index fc8781de62db..7f7e072be746 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -342,22 +342,6 @@ char *rtrim(char *s)
return s;
}
-/**
- * memdup - duplicate region of memory
- * @src: memory region to duplicate
- * @len: memory region length
- */
-void *memdup(const void *src, size_t len)
-{
- void *p;
-
- p = malloc(len);
- if (p)
- memcpy(p, src, len);
-
- return p;
-}
-
char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
{
/*
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 475d88d0a1c9..562b8ebeae5b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1026,8 +1026,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
curr_dso->long_name_len = dso->long_name_len;
curr_map = map__new2(start, curr_dso,
map->type);
+ dso__put(curr_dso);
if (curr_map == NULL) {
- dso__put(curr_dso);
goto out_elf_end;
}
if (adjust_kernel_syms) {
@@ -1042,7 +1042,14 @@ int dso__load_sym(struct dso *dso, struct map *map,
}
curr_dso->symtab_type = dso->symtab_type;
map_groups__insert(kmaps, curr_map);
+ /*
+ * Add it before we drop the referece to curr_map,
+ * i.e. while we still are sure to have a reference
+ * to this DSO via curr_map->dso.
+ */
dsos__add(&map->groups->machine->dsos, curr_dso);
+ /* kmaps already got it */
+ map__put(curr_map);
dso__set_loaded(curr_dso, map->type);
} else
curr_dso = curr_map->dso;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index cd08027a6d2c..3b2de6eb3376 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -39,6 +39,7 @@ struct symbol_conf symbol_conf = {
.cumulate_callchain = true,
.show_hist_headers = true,
.symfs = "",
+ .event_group = true,
};
static enum dso_binary_type binary_type_symtab[] = {
@@ -1860,24 +1861,44 @@ static void vmlinux_path__exit(void)
zfree(&vmlinux_path);
}
+static const char * const vmlinux_paths[] = {
+ "vmlinux",
+ "/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+ "/boot/vmlinux-%s",
+ "/usr/lib/debug/boot/vmlinux-%s",
+ "/lib/modules/%s/build/vmlinux",
+ "/usr/lib/debug/lib/modules/%s/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ return -1;
+ ++vmlinux_path__nr_entries;
+
+ return 0;
+}
+
static int vmlinux_path__init(struct perf_env *env)
{
struct utsname uts;
char bf[PATH_MAX];
char *kernel_version;
+ unsigned int i;
- vmlinux_path = malloc(sizeof(char *) * 6);
+ vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+ ARRAY_SIZE(vmlinux_paths_upd)));
if (vmlinux_path == NULL)
return -1;
- vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+ if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+ goto out_fail;
/* only try kernel version if no symfs was given */
if (symbol_conf.symfs[0] != 0)
@@ -1892,28 +1913,11 @@ static int vmlinux_path__init(struct perf_env *env)
kernel_version = uts.release;
}
- snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/usr/lib/debug/boot/vmlinux-%s",
- kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
- snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
- kernel_version);
- vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
- if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
- goto out_fail;
- ++vmlinux_path__nr_entries;
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+ snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+ if (vmlinux_path__add(bf) < 0)
+ goto out_fail;
+ }
return 0;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index dcd786e364f2..ccd1caa40e11 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -108,7 +108,9 @@ struct symbol_conf {
show_hist_headers,
branch_callstack,
has_filter,
- show_ref_callgraph;
+ show_ref_callgraph,
+ hide_unresolved,
+ raw_trace;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c
new file mode 100644
index 000000000000..90b47d8aa19c
--- /dev/null
+++ b/tools/perf/util/term.c
@@ -0,0 +1,35 @@
+#include "util.h"
+
+void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+ struct termios tc;
+
+ tcgetattr(0, old);
+ tc = *old;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h
new file mode 100644
index 000000000000..2c06a61846a1
--- /dev/null
+++ b/tools/perf/util/term.h
@@ -0,0 +1,10 @@
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 0a9ae8014729..dfd00c6dad6e 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -19,8 +19,10 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
thread->mg = map_groups__new(machine);
} else {
leader = __machine__findnew_thread(machine, pid, pid);
- if (leader)
+ if (leader) {
thread->mg = map_groups__get(leader->mg);
+ thread__put(leader);
+ }
}
return thread->mg ? 0 : -1;
@@ -53,7 +55,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
goto err_thread;
list_add(&comm->list, &thread->comm_list);
- atomic_set(&thread->refcnt, 0);
+ atomic_set(&thread->refcnt, 1);
RB_CLEAR_NODE(&thread->rb_node);
}
@@ -95,6 +97,10 @@ struct thread *thread__get(struct thread *thread)
void thread__put(struct thread *thread)
{
if (thread && atomic_dec_and_test(&thread->refcnt)) {
+ /*
+ * Remove it from the dead_threads list, as last reference
+ * is gone.
+ */
list_del_init(&thread->node);
thread__delete(thread);
}
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 6ec3c5ca438f..08afc6909953 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -13,6 +13,7 @@
#include "thread_map.h"
#include "util.h"
#include "debug.h"
+#include "event.h"
/* Skip "." and ".." directories */
static int filter(const struct dirent *dir)
@@ -304,6 +305,7 @@ out:
out_free_threads:
zfree(&threads);
+ strlist__delete(slist);
goto out;
}
@@ -408,3 +410,29 @@ void thread_map__read_comms(struct thread_map *threads)
for (i = 0; i < threads->nr; ++i)
comm_init(threads, i);
}
+
+static void thread_map__copy_event(struct thread_map *threads,
+ struct thread_map_event *event)
+{
+ unsigned i;
+
+ threads->nr = (int) event->nr;
+
+ for (i = 0; i < event->nr; i++) {
+ thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+ threads->map[i].comm = strndup(event->entries[i].comm, 16);
+ }
+
+ atomic_set(&threads->refcnt, 1);
+}
+
+struct thread_map *thread_map__new_event(struct thread_map_event *event)
+{
+ struct thread_map *threads;
+
+ threads = thread_map__alloc(event->nr);
+ if (threads)
+ thread_map__copy_event(threads, event);
+
+ return threads;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index af679d8a50f8..85e4c7c4fbde 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -16,11 +16,14 @@ struct thread_map {
struct thread_map_data map[];
};
+struct thread_map_event;
+
struct thread_map *thread_map__new_dummy(void);
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
struct thread_map *thread_map__new_by_uid(uid_t uid);
struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__new_event(struct thread_map_event *event);
struct thread_map *thread_map__get(struct thread_map *map);
void thread_map__put(struct thread_map *map);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index cab8cc24831b..55de4cffcd4e 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -50,12 +50,18 @@ struct perf_tool {
throttle,
unthrottle;
event_attr_op attr;
+ event_attr_op event_update;
event_op2 tracing_data;
event_oe finished_round;
event_op2 build_id,
id_index,
auxtrace_info,
- auxtrace_error;
+ auxtrace_error,
+ thread_map,
+ cpu_map,
+ stat_config,
+ stat,
+ stat_round;
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index b85ee55cca0c..bce5b1dac268 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -65,6 +65,7 @@ int tracing_data_put(struct tracing_data *tdata);
struct addr_location;
struct perf_session;
+struct perf_stat_config;
struct scripting_ops {
const char *name;
@@ -75,6 +76,9 @@ struct scripting_ops {
struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al);
+ void (*process_stat)(struct perf_stat_config *config,
+ struct perf_evsel *evsel, u64 tstamp);
+ void (*process_stat_interval)(u64 tstamp);
int (*generate_script) (struct pevent *pevent, const char *outfile);
};
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 2dcfe9a7c8d0..cf5e250bc78e 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include "event.h"
#include "perf_regs.h"
+#include "callchain.h"
static char *debuginfo_path;
@@ -52,25 +53,28 @@ static int report_module(u64 ip, struct unwind_info *ui)
return __report_module(&al, ip, ui);
}
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
static int entry(u64 ip, struct unwind_info *ui)
{
- struct unwind_entry e;
+ struct unwind_entry *e = &ui->entries[ui->idx++];
struct addr_location al;
if (__report_module(&al, ip, ui))
return -1;
- e.ip = ip;
- e.map = al.map;
- e.sym = al.sym;
+ e->ip = ip;
+ e->map = al.map;
+ e->sym = al.sym;
pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
al.sym ? al.sym->name : "''",
ip,
al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
-
- return ui->cb(&e, ui->arg);
+ return 0;
}
static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
@@ -92,6 +96,16 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, addr, &al);
if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, addr, &al);
+ }
+
+ if (!al.map) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
return -1;
}
@@ -168,7 +182,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct perf_sample *data,
int max_stack)
{
- struct unwind_info ui = {
+ struct unwind_info *ui, ui_buf = {
.sample = data,
.thread = thread,
.machine = thread->mg->machine,
@@ -177,35 +191,54 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
.max_stack = max_stack,
};
Dwarf_Word ip;
- int err = -EINVAL;
+ int err = -EINVAL, i;
if (!data->user_regs.regs)
return -EINVAL;
- ui.dwfl = dwfl_begin(&offline_callbacks);
- if (!ui.dwfl)
+ ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+ if (!ui)
+ return -ENOMEM;
+
+ *ui = ui_buf;
+
+ ui->dwfl = dwfl_begin(&offline_callbacks);
+ if (!ui->dwfl)
goto out;
err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
if (err)
goto out;
- err = report_module(ip, &ui);
+ err = report_module(ip, ui);
if (err)
goto out;
- if (!dwfl_attach_state(ui.dwfl, EM_NONE, thread->tid, &callbacks, &ui))
+ if (!dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui))
goto out;
- err = dwfl_getthread_frames(ui.dwfl, thread->tid, frame_callback, &ui);
+ err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
- if (err && !ui.max_stack)
+ if (err && !ui->max_stack)
err = 0;
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < ui->idx && !err; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = ui->idx - i - 1;
+
+ err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+ }
+
out:
if (err)
pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
- dwfl_end(ui.dwfl);
+ dwfl_end(ui->dwfl);
+ free(ui);
return 0;
}
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
index 417a1426f3ad..58328669ed16 100644
--- a/tools/perf/util/unwind-libdw.h
+++ b/tools/perf/util/unwind-libdw.h
@@ -16,6 +16,8 @@ struct unwind_info {
unwind_entry_cb_t cb;
void *arg;
int max_stack;
+ int idx;
+ struct unwind_entry entries[];
};
#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index c83832b555e5..ee7e372297e5 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -319,6 +319,15 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
MAP__FUNCTION, ip, &al);
+ if (!al.map) {
+ /*
+ * We've seen cases (softice) where DWARF unwinder went
+ * through non executable mmaps, which we need to lookup
+ * in MAP__VARIABLE tree.
+ */
+ thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
+ MAP__VARIABLE, ip, &al);
+ }
return al.map;
}
@@ -416,20 +425,19 @@ get_proc_name(unw_addr_space_t __maybe_unused as,
static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
unw_word_t *data)
{
- struct addr_location al;
+ struct map *map;
ssize_t size;
- thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
- MAP__FUNCTION, addr, &al);
- if (!al.map) {
+ map = find_map(addr, ui);
+ if (!map) {
pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
return -1;
}
- if (!al.map->dso)
+ if (!map->dso)
return -1;
- size = dso__data_read_addr(al.map->dso, al.map, ui->machine,
+ size = dso__data_read_addr(map->dso, map, ui->machine,
addr, (u8 *) data, sizeof(*data));
return !(size == sizeof(*data));
@@ -614,23 +622,48 @@ void unwind__finish_access(struct thread *thread)
static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
void *arg, int max_stack)
{
+ u64 val;
+ unw_word_t ips[max_stack];
unw_addr_space_t addr_space;
unw_cursor_t c;
- int ret;
-
- addr_space = thread__priv(ui->thread);
- if (addr_space == NULL)
- return -1;
+ int ret, i = 0;
- ret = unw_init_remote(&c, addr_space, ui);
+ ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP);
if (ret)
- display_error(ret);
+ return ret;
- while (!ret && (unw_step(&c) > 0) && max_stack--) {
- unw_word_t ip;
+ ips[i++] = (unw_word_t) val;
- unw_get_reg(&c, UNW_REG_IP, &ip);
- ret = ip ? entry(ip, ui->thread, cb, arg) : 0;
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ addr_space = thread__priv(ui->thread);
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
}
return ret;
@@ -640,24 +673,17 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct thread *thread,
struct perf_sample *data, int max_stack)
{
- u64 ip;
struct unwind_info ui = {
.sample = data,
.thread = thread,
.machine = thread->mg->machine,
};
- int ret;
if (!data->user_regs.regs)
return -EINVAL;
- ret = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
- if (ret)
- return ret;
-
- ret = entry(ip, thread, cb, arg);
- if (ret)
- return -ENOMEM;
+ if (max_stack <= 0)
+ return -EINVAL;
- return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0;
+ return get_entries(&ui, cb, arg, max_stack);
}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 47b1e36c7ea0..88b8f8d21f58 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -16,12 +16,15 @@
#include <linux/kernel.h>
#include <unistd.h>
#include "callchain.h"
+#include "strlist.h"
+#include <subcmd/exec-cmd.h>
struct callchain_param callchain_param = {
.mode = CHAIN_GRAPH_ABS,
.min_percent = 0.5,
.order = ORDER_CALLEE,
- .key = CCKEY_FUNCTION
+ .key = CCKEY_FUNCTION,
+ .value = CCVAL_PERCENT,
};
/*
@@ -351,41 +354,8 @@ void sighandler_dump_stack(int sig)
{
psignal(sig, "perf");
dump_stack();
- exit(sig);
-}
-
-void get_term_dimensions(struct winsize *ws)
-{
- char *s = getenv("LINES");
-
- if (s != NULL) {
- ws->ws_row = atoi(s);
- s = getenv("COLUMNS");
- if (s != NULL) {
- ws->ws_col = atoi(s);
- if (ws->ws_row && ws->ws_col)
- return;
- }
- }
-#ifdef TIOCGWINSZ
- if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
- ws->ws_row && ws->ws_col)
- return;
-#endif
- ws->ws_row = 25;
- ws->ws_col = 80;
-}
-
-void set_term_quiet_input(struct termios *old)
-{
- struct termios tc;
-
- tcgetattr(0, old);
- tc = *old;
- tc.c_lflag &= ~(ICANON | ECHO);
- tc.c_cc[VMIN] = 0;
- tc.c_cc[VTIME] = 0;
- tcsetattr(0, TCSANOW, &tc);
+ signal(sig, SIG_DFL);
+ raise(sig);
}
int parse_nsec_time(const char *str, u64 *ptime)
@@ -695,3 +665,28 @@ fetch_kernel_version(unsigned int *puint, char *str,
*puint = (version << 16) + (patchlevel << 8) + sublevel;
return 0;
}
+
+const char *perf_tip(const char *dirpath)
+{
+ struct strlist *tips;
+ struct str_node *node;
+ char *tip = NULL;
+ struct strlist_config conf = {
+ .dirname = system_path(dirpath) ,
+ };
+
+ tips = strlist__new("tips.txt", &conf);
+ if (tips == NULL || strlist__nr_entries(tips) == 1) {
+ tip = (char *)"Cannot find tips.txt file";
+ goto out;
+ }
+
+ node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+ if (asprintf(&tip, "Tip: %s", node->s) < 0)
+ tip = (char *)"Tip: get more memory! ;-)";
+
+out:
+ strlist__delete(tips);
+
+ return tip;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index dcc659017976..fe915e616f9b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -53,6 +53,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
+#include <term.h>
#include <errno.h>
#include <limits.h>
#include <sys/param.h>
@@ -150,12 +151,6 @@ extern void set_warning_routine(void (*routine)(const char *err, va_list params)
extern int prefixcmp(const char *str, const char *prefix);
extern void set_buildid_dir(const char *dir);
-static inline const char *skip_prefix(const char *str, const char *prefix)
-{
- size_t len = strlen(prefix);
- return strncmp(str, prefix, len) ? NULL : str + len;
-}
-
#ifdef __GLIBC_PREREQ
#if __GLIBC_PREREQ(2, 1)
#define HAVE_STRCHRNUL
@@ -186,14 +181,6 @@ static inline void *zalloc(size_t size)
#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
-static inline int has_extension(const char *filename, const char *ext)
-{
- size_t len = strlen(filename);
- size_t extlen = strlen(ext);
-
- return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
-}
-
/* Sane ctype - no locale, and works with signed chars */
#undef isascii
#undef isspace
@@ -282,9 +269,6 @@ void sighandler_dump_stack(int sig);
extern unsigned int page_size;
extern int cacheline_size;
-void get_term_dimensions(struct winsize *ws);
-void set_term_quiet_input(struct termios *old);
-
struct parse_tag {
char tag;
int mult;
@@ -358,4 +342,6 @@ int fetch_kernel_version(unsigned int *puint,
#define KVER_FMT "%d.%d.%d"
#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+const char *perf_tip(const char *dirpath);
+
#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5236e073919d..0f80eefb0bfd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -38,8 +38,6 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-grace=120
-
T=/tmp/kvm-test-1-run.sh.$$
trap 'rm -rf $T' 0
touch $T
@@ -152,7 +150,7 @@ fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
# Generate architecture-specific and interaction-specific qemu arguments
-qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$builddir/console.log"`"
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
# Generate qemu -append arguments
qemu_append="`identify_qemu_append "$QEMU"`"
@@ -168,7 +166,7 @@ then
touch $resdir/buildonly
exit 0
fi
-echo "NOTE: $QEMU either did not run or was interactive" > $builddir/console.log
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
qemu_pid=$!
@@ -214,7 +212,7 @@ then
else
break
fi
- if test $kruntime -ge $((seconds + grace))
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
@@ -224,6 +222,5 @@ then
done
fi
-cp $builddir/console.log $resdir
parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index f6483609ebc2..4a431767f77a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -42,6 +42,7 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KMAKE_ARG=""
+TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
resdir=""
configs=""
@@ -149,6 +150,11 @@ do
resdir=$2
shift
;;
+ --shutdown-grace)
+ checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+ TORTURE_SHUTDOWN_GRACE=$2
+ shift
+ ;;
--torture)
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
TORTURE_SUITE=$2
@@ -266,6 +272,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
if ! test -e $resdir
then
@@ -307,10 +314,10 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast)
+function dump(first, pastlast, batchnum)
{
- print "echo ----Start batch: `date`";
- print "echo ----Start batch: `date` >> " rd "/log";
+ print "echo ----Start batch " batchnum ": `date`";
+ print "echo ----Start batch " batchnum ": `date` >> " rd "/log";
jn=1
for (j = first; j < pastlast; j++) {
builddir=KVM "/b" jn
@@ -371,25 +378,28 @@ END {
njobs = i;
nc = ncpus;
first = 0;
+ batchnum = 1;
# Each pass through the following loop considers one test.
for (i = 0; i < njobs; i++) {
if (ncpus == 0) {
# Sequential test specified, each test its own batch.
- dump(i, i + 1);
+ dump(i, i + 1, batchnum);
first = i;
+ batchnum++;
} else if (nc < cpus[i] && i != 0) {
# Out of CPUs, dump out a batch.
- dump(first, i);
+ dump(first, i, batchnum);
first = i;
nc = ncpus;
+ batchnum++;
}
# Account for the CPUs needed by the current test.
nc -= cpus[i];
}
# Dump the last batch.
if (ncpus != 0)
- dump(first, i);
+ dump(first, i, batchnum);
}' >> $T/script
cat << ___EOF___ >> $T/script
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index d8f35cf116be..844787a0d7be 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,9 +24,6 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-T=/tmp/abat-chk-badness.sh.$$
-trap 'rm -f $T' 0
-
file="$1"
title="$2"
@@ -36,9 +33,41 @@ if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T
-if test -s $T
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+if test -s $1.diags
then
print_warning Assertion failure in $file $title
- cat $T
+ # cat $1.diags
+ summary=""
+ n_badness=`grep -c Badness $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary Badness: $n_badness"
+ fi
+ n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+ if test "$n_warn" -ne 0
+ then
+ summary="$summary Warnings: $n_warn"
+ fi
+ n_bugs=`egrep -c 'BUG|Oops:' $1`
+ if test "$n_bugs" -ne 0
+ then
+ summary="$summary Bugs: $n_bugs"
+ fi
+ n_calltrace=`grep -c 'Call Trace:' $1`
+ if test "$n_calltrace" -ne 0
+ then
+ summary="$summary Call Traces: $n_calltrace"
+ fi
+ n_lockdep=`grep -c =========== $1`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary lockdep: $n_badness"
+ fi
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|Stall ended before state dump start' $1`
+ if test "$n_stalls" -ne 0
+ then
+ summary="$summary Stalls: $n_stalls"
+ fi
+ print_warning Summary: $summary
fi
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 9ef33a743b73..24396ae8355b 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -20,7 +20,6 @@ CONFIG_PROVE_RCU
CONFIG_NO_HZ_FULL_SYSIDLE
CONFIG_RCU_NOCB_CPU
-CONFIG_RCU_USER_QS
Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 657f3a035488..4e2b1893d40d 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -72,10 +72,6 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
Always used in KVM testing.
-CONFIG_RCU_USER_QS
-
- Redundant with CONFIG_NO_HZ_FULL.
-
CONFIG_PREEMPT_RCU
CONFIG_TREE_RCU
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index 627ec7425f78..fd88e3025bed 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -97,7 +97,7 @@ int get_cur_clocksource(char *buf, size_t size)
int change_clocksource(char *clocksource)
{
int fd;
- size_t size;
+ ssize_t size;
fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);