summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/debugfs-wilco-ec45
-rw-r--r--Documentation/ABI/testing/sysfs-class-power51
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu4
-rw-r--r--Documentation/accounting/psi.txt107
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst13
-rw-r--r--Documentation/admin-guide/hw-vuln/l1tf.rst (renamed from Documentation/admin-guide/l1tf.rst)1
-rw-r--r--Documentation/admin-guide/hw-vuln/mds.rst308
-rw-r--r--Documentation/admin-guide/index.rst6
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt54
-rw-r--r--Documentation/arm64/perf.txt85
-rw-r--r--Documentation/arm64/pointer-authentication.txt22
-rw-r--r--Documentation/core-api/kernel-api.rst4
-rw-r--r--Documentation/dev-tools/gcov.rst18
-rw-r--r--Documentation/device-mapper/dm-dust.txt272
-rw-r--r--Documentation/device-mapper/dm-integrity.txt32
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-system.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.txt1
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-sysregs.txt5
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt29
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.yaml36
-rw-r--r--Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml22
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.yaml25
-rw-r--r--Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml807
-rw-r--r--Documentation/devicetree/bindings/bus/ti-sysc.txt6
-rw-r--r--Documentation/devicetree/bindings/clock/xlnx,zynqmp-clk.txt63
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt1
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml44
-rw-r--r--Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt54
-rw-r--r--Documentation/devicetree/bindings/fpga/xlnx,zynqmp-pcap-fpga.txt25
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca953x.txt2
-rw-r--r--Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt14
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.txt2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt1
-rw-r--r--Documentation/devicetree/bindings/input/gpio-vibrator.yaml37
-rw-r--r--Documentation/devicetree/bindings/input/lpc32xx-key.txt5
-rw-r--r--Documentation/devicetree/bindings/input/max77650-onkey.txt26
-rw-r--r--Documentation/devicetree/bindings/input/microchip,qt1050.txt78
-rw-r--r--Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt6
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix.txt3
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/iqs5xx.txt80
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml54
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt7
-rw-r--r--Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml129
-rw-r--r--Documentation/devicetree/bindings/leds/leds-max77650.txt57
-rw-r--r--Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.txt16
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt1
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.txt35
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt1
-rw-r--r--Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt17
-rw-r--r--Documentation/devicetree/bindings/mfd/max77620.txt9
-rw-r--r--Documentation/devicetree/bindings/mfd/max77650.txt46
-rw-r--r--Documentation/devicetree/bindings/mfd/stmfx.txt28
-rw-r--r--Documentation/devicetree/bindings/mfd/ti-lmu.txt4
-rw-r--r--Documentation/devicetree/bindings/misc/intel,ixp4xx-queue-manager.yaml49
-rw-r--r--Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt2
-rw-r--r--Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml97
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-nand.txt1
-rw-r--r--Documentation/devicetree/bindings/mtd/denali-nand.txt40
-rw-r--r--Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt32
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.txt16
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-controller.yaml143
-rw-r--r--Documentation/devicetree/bindings/mtd/nand.txt75
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.txt17
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-cfe-nor-partitions.txt24
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-imagetag.txt45
-rw-r--r--Documentation/devicetree/bindings/mtd/sunxi-nand.txt48
-rw-r--r--Documentation/devicetree/bindings/net/keystone-netcp.txt6
-rw-r--r--Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt4
-rw-r--r--Documentation/devicetree/bindings/pci/designware-pcie.txt7
-rw-r--r--Documentation/devicetree/bindings/pci/pci-keystone.txt58
-rw-r--r--Documentation/devicetree/bindings/pci/pci.txt50
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt116
-rw-r--r--Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt4
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.txt11
-rw-r--r--Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt1
-rw-r--r--Documentation/devicetree/bindings/power/supply/gpio-charger.txt8
-rw-r--r--Documentation/devicetree/bindings/power/supply/ingenic,battery.txt31
-rw-r--r--Documentation/devicetree/bindings/power/supply/lt3651-charger.txt (renamed from Documentation/devicetree/bindings/power/supply/ltc3651-charger.txt)10
-rw-r--r--Documentation/devicetree/bindings/power/supply/max77650-charger.txt28
-rw-r--r--Documentation/devicetree/bindings/power/supply/microchip,ucs1002.txt27
-rw-r--r--Documentation/devicetree/bindings/power/supply/olpc_battery.txt2
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.txt7
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt22
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-meson.txt3
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt1
-rw-r--r--Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt7
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt51
-rw-r--r--Documentation/devicetree/bindings/serial/mtk-uart.txt4
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/pwrap.txt1
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/scpsys.txt5
-rw-r--r--Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt33
-rw-r--r--Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt62
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.txt14
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.txt1
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt10
-rw-r--r--Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml42
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt1
-rw-r--r--Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt1
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt468
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.yaml977
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/mtk-wdt.txt1
-rw-r--r--Documentation/driver-api/gpio/driver.rst361
-rw-r--r--Documentation/filesystems/autofs-mount-control.txt6
-rw-r--r--Documentation/filesystems/autofs.txt66
-rw-r--r--Documentation/firmware-guide/acpi/dsd/data-node-references.rst6
-rw-r--r--Documentation/firmware-guide/acpi/dsd/graph.rst120
-rw-r--r--Documentation/gpio/index.rst17
-rw-r--r--Documentation/gpio/sysfs.rst (renamed from Documentation/gpio/sysfs.txt)39
-rw-r--r--Documentation/index.rst2
-rw-r--r--Documentation/media/uapi/v4l/field-order.rst16
-rw-r--r--Documentation/networking/rxrpc.txt21
-rw-r--r--Documentation/sysctl/vm.txt12
-rw-r--r--Documentation/trace/ftrace.rst31
-rw-r--r--Documentation/trace/histogram.rst27
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl7
-rw-r--r--Documentation/translations/it_IT/process/license-rules.rst60
-rw-r--r--Documentation/virtual/kvm/api.txt225
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt3
-rw-r--r--Documentation/virtual/kvm/devices/xive.txt197
-rw-r--r--Documentation/vm/hmm.rst94
-rw-r--r--Documentation/x86/amd-memory-encryption.rst (renamed from Documentation/x86/amd-memory-encryption.txt)13
-rw-r--r--Documentation/x86/boot.rst (renamed from Documentation/x86/boot.txt)528
-rw-r--r--Documentation/x86/conf.py10
-rw-r--r--Documentation/x86/earlyprintk.rst (renamed from Documentation/x86/earlyprintk.txt)122
-rw-r--r--Documentation/x86/entry_64.rst (renamed from Documentation/x86/entry_64.txt)12
-rw-r--r--Documentation/x86/exception-tables.rst (renamed from Documentation/x86/exception-tables.txt)247
-rw-r--r--Documentation/x86/i386/IO-APIC.rst (renamed from Documentation/x86/i386/IO-APIC.txt)28
-rw-r--r--Documentation/x86/i386/index.rst10
-rw-r--r--Documentation/x86/index.rst31
-rw-r--r--Documentation/x86/intel_mpx.rst (renamed from Documentation/x86/intel_mpx.txt)120
-rw-r--r--Documentation/x86/kernel-stacks.rst (renamed from Documentation/x86/kernel-stacks)20
-rw-r--r--Documentation/x86/mds.rst193
-rw-r--r--Documentation/x86/microcode.rst (renamed from Documentation/x86/microcode.txt)62
-rw-r--r--Documentation/x86/mtrr.rst354
-rw-r--r--Documentation/x86/mtrr.txt329
-rw-r--r--Documentation/x86/orc-unwinder.rst (renamed from Documentation/x86/orc-unwinder.txt)27
-rw-r--r--Documentation/x86/pat.rst242
-rw-r--r--Documentation/x86/pat.txt230
-rw-r--r--Documentation/x86/protection-keys.rst (renamed from Documentation/x86/protection-keys.txt)33
-rw-r--r--Documentation/x86/pti.rst (renamed from Documentation/x86/pti.txt)17
-rw-r--r--Documentation/x86/resctrl_ui.rst (renamed from Documentation/x86/resctrl_ui.txt)916
-rw-r--r--Documentation/x86/tlb.rst (renamed from Documentation/x86/tlb.txt)30
-rw-r--r--Documentation/x86/topology.rst (renamed from Documentation/x86/topology.txt)92
-rw-r--r--Documentation/x86/usb-legacy-support.rst (renamed from Documentation/x86/usb-legacy-support.txt)40
-rw-r--r--Documentation/x86/x86_64/5level-paging.rst (renamed from Documentation/x86/x86_64/5level-paging.txt)16
-rw-r--r--Documentation/x86/x86_64/boot-options.rst335
-rw-r--r--Documentation/x86/x86_64/boot-options.txt278
-rw-r--r--Documentation/x86/x86_64/cpu-hotplug-spec.rst (renamed from Documentation/x86/x86_64/cpu-hotplug-spec)5
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets.rst (renamed from Documentation/x86/x86_64/fake-numa-for-cpusets)25
-rw-r--r--Documentation/x86/x86_64/index.rst16
-rw-r--r--Documentation/x86/x86_64/machinecheck.rst (renamed from Documentation/x86/x86_64/machinecheck)12
-rw-r--r--Documentation/x86/x86_64/mm.rst161
-rw-r--r--Documentation/x86/x86_64/mm.txt153
-rw-r--r--Documentation/x86/x86_64/uefi.rst (renamed from Documentation/x86/x86_64/uefi.txt)30
-rw-r--r--Documentation/x86/zero-page.rst45
-rw-r--r--Documentation/x86/zero-page.txt40
-rw-r--r--Documentation/xilinx/eemi.txt4
162 files changed, 8514 insertions, 3152 deletions
diff --git a/Documentation/ABI/testing/debugfs-wilco-ec b/Documentation/ABI/testing/debugfs-wilco-ec
index f814f112e213..73a5a66ddca6 100644
--- a/Documentation/ABI/testing/debugfs-wilco-ec
+++ b/Documentation/ABI/testing/debugfs-wilco-ec
@@ -1,23 +1,46 @@
+What: /sys/kernel/debug/wilco_ec/h1_gpio
+Date: April 2019
+KernelVersion: 5.2
+Description:
+ As part of Chrome OS's FAFT (Fully Automated Firmware Testing)
+ tests, we need to ensure that the H1 chip is properly setting
+ some GPIO lines. The h1_gpio attribute exposes the state
+ of the lines:
+ - ENTRY_TO_FACT_MODE in BIT(0)
+ - SPI_CHROME_SEL in BIT(1)
+
+ Output will formatted with "0x%02x\n".
+
What: /sys/kernel/debug/wilco_ec/raw
Date: January 2019
KernelVersion: 5.1
Description:
Write and read raw mailbox commands to the EC.
- For writing:
- Bytes 0-1 indicate the message type:
- 00 F0 = Execute Legacy Command
- 00 F2 = Read/Write NVRAM Property
- Byte 2 provides the command code
- Bytes 3+ consist of the data passed in the request
+ You can write a hexadecimal sentence to raw, and that series of
+ bytes will be sent to the EC. Then, you can read the bytes of
+ response by reading from raw.
- At least three bytes are required, for the msg type and command,
- with additional bytes optional for additional data.
+ For writing, bytes 0-1 indicate the message type, one of enum
+ wilco_ec_msg_type. Byte 2+ consist of the data passed in the
+ request, starting at MBOX[0]
+
+ At least three bytes are required for writing, two for the type
+ and at least a single byte of data. Only the first
+ EC_MAILBOX_DATA_SIZE bytes of MBOX will be used.
Example:
// Request EC info type 3 (EC firmware build date)
- $ echo 00 f0 38 00 03 00 > raw
+ // Corresponds with sending type 0x00f0 with
+ // MBOX = [38, 00, 03, 00]
+ $ echo 00 f0 38 00 03 00 > /sys/kernel/debug/wilco_ec/raw
// View the result. The decoded ASCII result "12/21/18" is
// included after the raw hex.
- $ cat raw
- 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00 .12/21/18.8...
+ // Corresponds with MBOX = [00, 00, 31, 32, 2f, 32, 31, 38, ...]
+ $ cat /sys/kernel/debug/wilco_ec/raw
+ 00 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00 ..12/21/18.8...
+
+ Note that the first 32 bytes of the received MBOX[] will be
+ printed, even if some of the data is junk. It is up to you to
+ know how many of the first bytes of data are the actual
+ response.
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index 5e23e22dce1b..b77e30b9014e 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -114,15 +114,60 @@ Description:
Access: Read
Valid values: Represented in microamps
+What: /sys/class/power_supply/<supply_name>/charge_control_limit
+Date: Oct 2012
+Contact: linux-pm@vger.kernel.org
+Description:
+ Maximum allowable charging current. Used for charge rate
+ throttling for thermal cooling or improving battery health.
+
+ Access: Read, Write
+ Valid values: Represented in microamps
+
+What: /sys/class/power_supply/<supply_name>/charge_control_limit_max
+Date: Oct 2012
+Contact: linux-pm@vger.kernel.org
+Description:
+ Maximum legal value for the charge_control_limit property.
+
+ Access: Read
+ Valid values: Represented in microamps
+
+What: /sys/class/power_supply/<supply_name>/charge_control_start_threshold
+Date: April 2019
+Contact: linux-pm@vger.kernel.org
+Description:
+ Represents a battery percentage level, below which charging will
+ begin.
+
+ Access: Read, Write
+ Valid values: 0 - 100 (percent)
+
+What: /sys/class/power_supply/<supply_name>/charge_control_end_threshold
+Date: April 2019
+Contact: linux-pm@vger.kernel.org
+Description:
+ Represents a battery percentage level, above which charging will
+ stop.
+
+ Access: Read, Write
+ Valid values: 0 - 100 (percent)
+
What: /sys/class/power_supply/<supply_name>/charge_type
Date: July 2009
Contact: linux-pm@vger.kernel.org
Description:
Represents the type of charging currently being applied to the
- battery.
+ battery. "Trickle", "Fast", and "Standard" all mean different
+ charging speeds. "Adaptive" means that the charger uses some
+ algorithm to adjust the charge rate dynamically, without
+ any user configuration required. "Custom" means that the charger
+ uses the charge_control_* properties as configuration for some
+ different algorithm.
- Access: Read
- Valid values: "Unknown", "N/A", "Trickle", "Fast"
+ Access: Read, Write
+ Valid values: "Unknown", "N/A", "Trickle", "Fast", "Standard",
+ "Adaptive", "Custom"
What: /sys/class/power_supply/<supply_name>/charge_term_current
Date: July 2014
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 4fb76c0e8d30..1528239f69b2 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -484,6 +484,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spectre_v2
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/l1tf
+ /sys/devices/system/cpu/vulnerabilities/mds
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
@@ -496,8 +497,7 @@ Description: Information about CPU vulnerabilities
"Vulnerable" CPU is affected and no mitigation in effect
"Mitigation: $M" CPU is affected and mitigation $M is in effect
- Details about the l1tf file can be found in
- Documentation/admin-guide/l1tf.rst
+ See also: Documentation/admin-guide/hw-vuln/index.rst
What: /sys/devices/system/cpu/smt
/sys/devices/system/cpu/smt/active
diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index 7e71c9c1d8e9..5cbe5659e3b7 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -63,6 +63,110 @@ as well as medium and long term trends. The total absolute stall time
spikes which wouldn't necessarily make a dent in the time averages,
or to average trends over custom time frames.
+Monitoring for pressure thresholds
+==================================
+
+Users can register triggers and use poll() to be woken up when resource
+pressure exceeds certain thresholds.
+
+A trigger describes the maximum cumulative stall time over a specific
+time window, e.g. 100ms of total stall time within any 500ms window to
+generate a wakeup event.
+
+To register a trigger user has to open psi interface file under
+/proc/pressure/ representing the resource to be monitored and write the
+desired threshold and time window. The open file descriptor should be
+used to wait for trigger events using select(), poll() or epoll().
+The following format is used:
+
+<some|full> <stall amount in us> <time window in us>
+
+For example writing "some 150000 1000000" into /proc/pressure/memory
+would add 150ms threshold for partial memory stall measured within
+1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
+would add 50ms threshold for full io stall measured within 1sec time window.
+
+Triggers can be set on more than one psi metric and more than one trigger
+for the same psi metric can be specified. However for each trigger a separate
+file descriptor is required to be able to poll it separately from others,
+therefore for each trigger a separate open() syscall should be made even
+when opening the same psi interface file.
+
+Monitors activate only when system enters stall state for the monitored
+psi metric and deactivates upon exit from the stall state. While system is
+in the stall state psi signal growth is monitored at a rate of 10 times per
+tracking window.
+
+The kernel accepts window sizes ranging from 500ms to 10s, therefore min
+monitoring update interval is 50ms and max is 1s. Min limit is set to
+prevent overly frequent polling. Max limit is chosen as a high enough number
+after which monitors are most likely not needed and psi averages can be used
+instead.
+
+When activated, psi monitor stays active for at least the duration of one
+tracking window to avoid repeated activations/deactivations when system is
+bouncing in and out of the stall state.
+
+Notifications to the userspace are rate-limited to one per tracking window.
+
+The trigger will de-register when the file descriptor used to define the
+trigger is closed.
+
+Userspace monitor usage example
+===============================
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <poll.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Monitor memory partial stall with 1s tracking window size
+ * and 150ms threshold.
+ */
+int main() {
+ const char trig[] = "some 150000 1000000";
+ struct pollfd fds;
+ int n;
+
+ fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+ if (fds.fd < 0) {
+ printf("/proc/pressure/memory open error: %s\n",
+ strerror(errno));
+ return 1;
+ }
+ fds.events = POLLPRI;
+
+ if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+ printf("/proc/pressure/memory write error: %s\n",
+ strerror(errno));
+ return 1;
+ }
+
+ printf("waiting for events...\n");
+ while (1) {
+ n = poll(&fds, 1, -1);
+ if (n < 0) {
+ printf("poll error: %s\n", strerror(errno));
+ return 1;
+ }
+ if (fds.revents & POLLERR) {
+ printf("got POLLERR, event source is gone\n");
+ return 0;
+ }
+ if (fds.revents & POLLPRI) {
+ printf("event triggered!\n");
+ } else {
+ printf("unknown event received: 0x%x\n", fds.revents);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
Cgroup2 interface
=================
@@ -71,3 +175,6 @@ mounted, pressure stall information is also tracked for tasks grouped
into cgroups. Each subdirectory in the cgroupfs mountpoint contains
cpu.pressure, memory.pressure, and io.pressure files; the format is
the same as the /proc/pressure/ files.
+
+Per-cgroup psi monitors can be specified and used the same way as
+system-wide ones.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
new file mode 100644
index 000000000000..ffc064c1ec68
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -0,0 +1,13 @@
+========================
+Hardware vulnerabilities
+========================
+
+This section describes CPU vulnerabilities and provides an overview of the
+possible mitigations along with guidance for selecting mitigations if they
+are configurable at compile, boot or run time.
+
+.. toctree::
+ :maxdepth: 1
+
+ l1tf
+ mds
diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index 9af977384168..31653a9f0e1b 100644
--- a/Documentation/admin-guide/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -445,6 +445,7 @@ The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
module parameter is ignored and writes to the sysfs file are rejected.
+.. _mitigation_selection:
Mitigation selection guide
--------------------------
diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
new file mode 100644
index 000000000000..e3a796c0d3a2
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/mds.rst
@@ -0,0 +1,308 @@
+MDS - Microarchitectural Data Sampling
+======================================
+
+Microarchitectural Data Sampling is a hardware vulnerability which allows
+unprivileged speculative access to data which is available in various CPU
+internal buffers.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+ - Processors from AMD, Centaur and other non Intel vendors
+
+ - Older processor models, where the CPU family is < 6
+
+ - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
+
+ - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
+ IA32_ARCH_CAPABILITIES MSR.
+
+Whether a processor is affected or not can be read out from the MDS
+vulnerability file in sysfs. See :ref:`mds_sys_info`.
+
+Not all processors are affected by all variants of MDS, but the mitigation
+is identical for all of them so the kernel treats them as a single
+vulnerability.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the MDS vulnerability:
+
+ ============== ===== ===================================================
+ CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
+ CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
+ CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
+ CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
+ ============== ===== ===================================================
+
+Problem
+-------
+
+When performing store, load, L1 refill operations, processors write data
+into temporary microarchitectural structures (buffers). The data in the
+buffer can be forwarded to load operations as an optimization.
+
+Under certain conditions, usually a fault/assist caused by a load
+operation, data unrelated to the load memory address can be speculatively
+forwarded from the buffers. Because the load operation causes a fault or
+assist and its result will be discarded, the forwarded data will not cause
+incorrect program execution or state changes. But a malicious operation
+may be able to forward this speculative data to a disclosure gadget which
+allows in turn to infer the value via a cache side channel attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+Deeper technical information is available in the MDS specific x86
+architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the MDS vulnerabilities can be mounted from malicious non
+priviledged user space applications running on hosts or guest. Malicious
+guest OSes can obviously mount attacks as well.
+
+Contrary to other speculation based vulnerabilities the MDS vulnerability
+does not allow the attacker to control the memory target address. As a
+consequence the attacks are purely sampling based, but as demonstrated with
+the TLBleed attack samples can be postprocessed successfully.
+
+Web-Browsers
+^^^^^^^^^^^^
+
+ It's unclear whether attacks through Web-Browsers are possible at
+ all. The exploitation through Java-Script is considered very unlikely,
+ but other widely used web technologies like Webassembly could possibly be
+ abused.
+
+
+.. _mds_sys_info:
+
+MDS system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current MDS
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/mds
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable but microcode is not updated.
+
+ The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+If the processor is vulnerable then the following information is appended
+to the above information:
+
+ ======================== ============================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT mitigated' SMT is enabled and mitigated
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ============================================
+
+.. _vmwerv:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ If the processor is vulnerable, but the availability of the microcode based
+ mitigation mechanism is not advertised via CPUID the kernel selects a best
+ effort mitigation mode. This mode invokes the mitigation instructions
+ without a guarantee that they clear the CPU buffers.
+
+ This is done to address virtualization scenarios where the host has the
+ microcode update applied, but the hypervisor is not yet updated to expose
+ the CPUID to the guest. If the host has updated microcode the protection
+ takes effect otherwise a few cpu cycles are wasted pointlessly.
+
+ The state in the mds sysfs file reflects this situation accordingly.
+
+
+Mitigation mechanism
+-------------------------
+
+The kernel detects the affected CPUs and the presence of the microcode
+which is required.
+
+If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default. The mitigation can be controlled at boot
+time via a kernel command line option. See
+:ref:`mds_mitigation_control_command_line`.
+
+.. _cpu_buffer_clear:
+
+CPU buffer clearing
+^^^^^^^^^^^^^^^^^^^
+
+ The mitigation for MDS clears the affected CPU buffers on return to user
+ space and when entering a guest.
+
+ If SMT is enabled it also clears the buffers on idle entry when the CPU
+ is only affected by MSBDS and not any other MDS variant, because the
+ other variants cannot be protected against cross Hyper-Thread attacks.
+
+ For CPUs which are only affected by MSBDS the user space, guest and idle
+ transition mitigations are sufficient and SMT is not affected.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The protection for host to guest transition depends on the L1TF
+ vulnerability of the CPU:
+
+ - CPU is affected by L1TF:
+
+ If the L1D flush mitigation is enabled and up to date microcode is
+ available, the L1D flush mitigation is automatically protecting the
+ guest transition.
+
+ If the L1D flush mitigation is disabled then the MDS mitigation is
+ invoked explicit when the host MDS mitigation is enabled.
+
+ For details on L1TF and virtualization see:
+ :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
+
+ - CPU is not affected by L1TF:
+
+ CPU buffers are flushed before entering the guest when the host MDS
+ mitigation is enabled.
+
+ The resulting MDS protection matrix for the host to guest transition:
+
+ ============ ===== ============= ============ =================
+ L1TF MDS VMX-L1FLUSH Host MDS MDS-State
+
+ Don't care No Don't care N/A Not affected
+
+ Yes Yes Disabled Off Vulnerable
+
+ Yes Yes Disabled Full Mitigated
+
+ Yes Yes Enabled Don't care Mitigated
+
+ No Yes N/A Off Vulnerable
+
+ No Yes N/A Full Mitigated
+ ============ ===== ============= ============ =================
+
+ This only covers the host to guest transition, i.e. prevents leakage from
+ host to guest, but does not protect the guest internally. Guests need to
+ have their own protections.
+
+.. _xeon_phi:
+
+XEON PHI specific considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The XEON PHI processor family is affected by MSBDS which can be exploited
+ cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
+ to use MWAIT in user space (Ring 3) which opens an potential attack vector
+ for malicious user space. The exposure can be disabled on the kernel
+ command line with the 'ring3mwait=disable' command line option.
+
+ XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
+ before the CPU enters a idle state. As XEON PHI is not affected by L1TF
+ either disabling SMT is not required for full protection.
+
+.. _mds_smt_control:
+
+SMT control
+^^^^^^^^^^^
+
+ All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
+ means on CPUs which are affected by MFBDS or MLPDS it is necessary to
+ disable SMT for full protection. These are most of the affected CPUs; the
+ exception is XEON PHI, see :ref:`xeon_phi`.
+
+ Disabling SMT can have a significant performance impact, but the impact
+ depends on the type of workloads.
+
+ See the relevant chapter in the L1TF mitigation documentation for details:
+ :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
+
+
+.. _mds_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the MDS mitigations at boot
+time with the option "mds=". The valid arguments for this option are:
+
+ ============ =============================================================
+ full If the CPU is vulnerable, enable all available mitigations
+ for the MDS vulnerability, CPU buffer clearing on exit to
+ userspace and when entering a VM. Idle transitions are
+ protected as well if SMT is enabled.
+
+ It does not automatically disable SMT.
+
+ full,nosmt The same as mds=full, with SMT disabled on vulnerable
+ CPUs. This is the complete mitigation.
+
+ off Disables MDS mitigations completely.
+
+ ============ =============================================================
+
+Not specifying this option is equivalent to "mds=full".
+
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace
+^^^^^^^^^^^^^^^^^^^^
+
+ If all userspace applications are from a trusted source and do not
+ execute untrusted code which is supplied externally, then the mitigation
+ can be disabled.
+
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The same considerations as above versus trusted user space apply.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The protection depends on the state of the L1TF mitigations.
+ See :ref:`virt_mechanism`.
+
+ If the MDS mitigation is enabled and SMT is disabled, guest to host and
+ guest to guest attacks are prevented.
+
+.. _mds_default_mitigations:
+
+Default mitigations
+-------------------
+
+ The kernel default mitigations for vulnerable processors are:
+
+ - Enable CPU buffer clearing
+
+ The kernel does not by default enforce the disabling of SMT, which leaves
+ SMT systems vulnerable when running untrusted code. The same rationale as
+ for L1TF applies.
+ See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`.
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5b8286fdd91b..8001917ee012 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -17,14 +17,12 @@ etc.
kernel-parameters
devices
-This section describes CPU vulnerabilities and provides an overview of the
-possible mitigations along with guidance for selecting mitigations if they
-are configurable at compile, boot or run time.
+This section describes CPU vulnerabilities and their mitigations.
.. toctree::
:maxdepth: 1
- l1tf
+ hw-vuln/index
Here is a set of documents aimed at users who are trying to track down
problems and bugs in particular.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 08df58805703..52e6fbb042cc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1830,6 +1830,9 @@
ip= [IP_PNP]
See Documentation/filesystems/nfs/nfsroot.txt.
+ ipcmni_extend [KNL] Extend the maximum number of unique System V
+ IPC identifiers from 32,768 to 16,777,216.
+
irqaffinity= [SMP] Set the default irq affinity mask
The argument is a cpu list, as described above.
@@ -2143,7 +2146,7 @@
Default is 'flush'.
- For details see: Documentation/admin-guide/l1tf.rst
+ For details see: Documentation/admin-guide/hw-vuln/l1tf.rst
l2cr= [PPC]
@@ -2389,6 +2392,32 @@
Format: <first>,<last>
Specifies range of consoles to be captured by the MDA.
+ mds= [X86,INTEL]
+ Control mitigation for the Micro-architectural Data
+ Sampling (MDS) vulnerability.
+
+ Certain CPUs are vulnerable to an exploit against CPU
+ internal buffers which can forward information to a
+ disclosure gadget under certain conditions.
+
+ In vulnerable processors, the speculatively
+ forwarded data can be used in a cache side channel
+ attack, to access data to which the attacker does
+ not have direct access.
+
+ This parameter controls the MDS mitigation. The
+ options are:
+
+ full - Enable MDS mitigation on vulnerable CPUs
+ full,nosmt - Enable MDS mitigation and disable
+ SMT on vulnerable CPUs
+ off - Unconditionally disable MDS mitigation
+
+ Not specifying this option is equivalent to
+ mds=full.
+
+ For details see: Documentation/admin-guide/hw-vuln/mds.rst
+
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used when the kernel is not able
to see the whole system memory or for test.
@@ -2565,6 +2594,7 @@
spec_store_bypass_disable=off [X86,PPC]
ssbd=force-off [ARM64]
l1tf=off [X86]
+ mds=off [X86]
auto (default)
Mitigate all CPU vulnerabilities, but leave SMT
@@ -2579,6 +2609,7 @@
if needed. This is for users who always want to
be fully mitigated, even if it means losing SMT.
Equivalent to: l1tf=flush,nosmt [X86]
+ mds=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -3146,6 +3177,16 @@
This will also cause panics on machine check exceptions.
Useful together with panic=30 to trigger a reboot.
+ page_alloc.shuffle=
+ [KNL] Boolean flag to control whether the page allocator
+ should randomize its free lists. The randomization may
+ be automatically enabled if the kernel detects it is
+ running on a platform with a direct-mapped memory-side
+ cache, and this parameter can be used to
+ override/disable that behavior. The state of the flag
+ can be read from sysfs at:
+ /sys/module/page_alloc/parameters/shuffle.
+
page_owner= [KNL] Boot-time page_owner enabling option.
Storage of the information about who allocated
each page is disabled in default. With this switch,
@@ -4026,7 +4067,9 @@
[[,]s[mp]#### \
[[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \
[[,]f[orce]
- Where reboot_mode is one of warm (soft) or cold (hard) or gpio,
+ Where reboot_mode is one of warm (soft) or cold (hard) or gpio
+ (prefix with 'panic_' to set mode for panic
+ reboot only),
reboot_type is one of bios, acpi, kbd, triple, efi, or pci,
reboot_force is either force or not specified,
reboot_cpu is s[mp]#### with #### being the processor
@@ -5217,6 +5260,13 @@
with /sys/devices/system/xen_memory/xen_memory0/scrub_pages.
Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT.
+ xen_timer_slop= [X86-64,XEN]
+ Set the timer slop (in nanoseconds) for the virtual Xen
+ timers (default is 100000). This adjusts the minimum
+ delta of virtualized Xen timers, where lower values
+ improve timer resolution at the expense of processing
+ more timer interrupts.
+
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
diff --git a/Documentation/arm64/perf.txt b/Documentation/arm64/perf.txt
new file mode 100644
index 000000000000..0d6a7d87d49e
--- /dev/null
+++ b/Documentation/arm64/perf.txt
@@ -0,0 +1,85 @@
+Perf Event Attributes
+=====================
+
+Author: Andrew Murray <andrew.murray@arm.com>
+Date: 2019-03-06
+
+exclude_user
+------------
+
+This attribute excludes userspace.
+
+Userspace always runs at EL0 and thus this attribute will exclude EL0.
+
+
+exclude_kernel
+--------------
+
+This attribute excludes the kernel.
+
+The kernel runs at EL2 with VHE and EL1 without. Guest kernels always run
+at EL1.
+
+For the host this attribute will exclude EL1 and additionally EL2 on a VHE
+system.
+
+For the guest this attribute will exclude EL1. Please note that EL2 is
+never counted within a guest.
+
+
+exclude_hv
+----------
+
+This attribute excludes the hypervisor.
+
+For a VHE host this attribute is ignored as we consider the host kernel to
+be the hypervisor.
+
+For a non-VHE host this attribute will exclude EL2 as we consider the
+hypervisor to be any code that runs at EL2 which is predominantly used for
+guest/host transitions.
+
+For the guest this attribute has no effect. Please note that EL2 is
+never counted within a guest.
+
+
+exclude_host / exclude_guest
+----------------------------
+
+These attributes exclude the KVM host and guest, respectively.
+
+The KVM host may run at EL0 (userspace), EL1 (non-VHE kernel) and EL2 (VHE
+kernel or non-VHE hypervisor).
+
+The KVM guest may run at EL0 (userspace) and EL1 (kernel).
+
+Due to the overlapping exception levels between host and guests we cannot
+exclusively rely on the PMU's hardware exception filtering - therefore we
+must enable/disable counting on the entry and exit to the guest. This is
+performed differently on VHE and non-VHE systems.
+
+For non-VHE systems we exclude EL2 for exclude_host - upon entering and
+exiting the guest we disable/enable the event as appropriate based on the
+exclude_host and exclude_guest attributes.
+
+For VHE systems we exclude EL1 for exclude_guest and exclude both EL0,EL2
+for exclude_host. Upon entering and exiting the guest we modify the event
+to include/exclude EL0 as appropriate based on the exclude_host and
+exclude_guest attributes.
+
+The statements above also apply when these attributes are used within a
+non-VHE guest however please note that EL2 is never counted within a guest.
+
+
+Accuracy
+--------
+
+On non-VHE hosts we enable/disable counters on the entry/exit of host/guest
+transition at EL2 - however there is a period of time between
+enabling/disabling the counters and entering/exiting the guest. We are
+able to eliminate counters counting host events on the boundaries of guest
+entry/exit when counting guest events by filtering out EL2 for
+exclude_host. However when using !exclude_hv there is a small blackout
+window at the guest entry/exit where host events are not captured.
+
+On VHE systems there are no blackout windows.
diff --git a/Documentation/arm64/pointer-authentication.txt b/Documentation/arm64/pointer-authentication.txt
index 5baca42ba146..fc71b33de87e 100644
--- a/Documentation/arm64/pointer-authentication.txt
+++ b/Documentation/arm64/pointer-authentication.txt
@@ -87,7 +87,21 @@ used to get and set the keys for a thread.
Virtualization
--------------
-Pointer authentication is not currently supported in KVM guests. KVM
-will mask the feature bits from ID_AA64ISAR1_EL1, and attempted use of
-the feature will result in an UNDEFINED exception being injected into
-the guest.
+Pointer authentication is enabled in KVM guest when each virtual cpu is
+initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
+requesting these two separate cpu features to be enabled. The current KVM
+guest implementation works by enabling both features together, so both
+these userspace flags are checked before enabling pointer authentication.
+The separate userspace flag will allow to have no userspace ABI changes
+if support is added in the future to allow these two features to be
+enabled independently of one another.
+
+As Arm Architecture specifies that Pointer Authentication feature is
+implemented along with the VHE feature so KVM arm64 ptrauth code relies
+on VHE mode to be present.
+
+Additionally, when these vcpu feature flags are not set then KVM will
+filter out the Pointer Authentication system key registers from
+KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
+register. Any attempt to use the Pointer Authentication instructions will
+result in an UNDEFINED exception being injected into the guest.
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 71f5d2fe39b7..a29c99d13331 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -147,10 +147,10 @@ Division Functions
.. kernel-doc:: include/linux/math64.h
:internal:
-.. kernel-doc:: lib/div64.c
+.. kernel-doc:: lib/math/div64.c
:functions: div_s64_rem div64_u64_rem div64_u64 div64_s64
-.. kernel-doc:: lib/gcd.c
+.. kernel-doc:: lib/math/gcd.c
:export:
UUID/GUID
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
index 69a7d90c320a..46aae52a41d0 100644
--- a/Documentation/dev-tools/gcov.rst
+++ b/Documentation/dev-tools/gcov.rst
@@ -34,10 +34,6 @@ Configure the kernel with::
CONFIG_DEBUG_FS=y
CONFIG_GCOV_KERNEL=y
-select the gcc's gcov format, default is autodetect based on gcc version::
-
- CONFIG_GCOV_FORMAT_AUTODETECT=y
-
and to get coverage data for the entire kernel::
CONFIG_GCOV_PROFILE_ALL=y
@@ -169,6 +165,20 @@ b) gcov is run on the BUILD machine
[user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+Note on compilers
+-----------------
+
+GCC and LLVM gcov tools are not necessarily compatible. Use gcov_ to work with
+GCC-generated .gcno and .gcda files, and use llvm-cov_ for Clang.
+
+.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
+
+Build differences between GCC and Clang gcov are handled by Kconfig. It
+automatically selects the appropriate gcov format depending on the detected
+toolchain.
+
+
Troubleshooting
---------------
diff --git a/Documentation/device-mapper/dm-dust.txt b/Documentation/device-mapper/dm-dust.txt
new file mode 100644
index 000000000000..954d402a1f6a
--- /dev/null
+++ b/Documentation/device-mapper/dm-dust.txt
@@ -0,0 +1,272 @@
+dm-dust
+=======
+
+This target emulates the behavior of bad sectors at arbitrary
+locations, and the ability to enable the emulation of the failures
+at an arbitrary time.
+
+This target behaves similarly to a linear target. At a given time,
+the user can send a message to the target to start failing read
+requests on specific blocks (to emulate the behavior of a hard disk
+drive with bad sectors).
+
+When the failure behavior is enabled (i.e.: when the output of
+"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
+in the "bad block list" will fail with EIO ("Input/output error").
+
+Writes of blocks in the "bad block list will result in the following:
+
+1. Remove the block from the "bad block list".
+2. Successfully complete the write.
+
+This emulates the "remapped sector" behavior of a drive with bad
+sectors.
+
+Normally, a drive that is encountering bad sectors will most likely
+encounter more bad sectors, at an unknown time or location.
+With dm-dust, the user can use the "addbadblock" and "removebadblock"
+messages to add arbitrary bad blocks at new locations, and the
+"enable" and "disable" messages to modulate the state of whether the
+configured "bad blocks" will be treated as bad, or bypassed.
+This allows the pre-writing of test data and metadata prior to
+simulating a "failure" event where bad sectors start to appear.
+
+Table parameters:
+-----------------
+<device_path> <offset> <blksz>
+
+Mandatory parameters:
+ <device_path>: path to the block device.
+ <offset>: offset to data area from start of device_path
+ <blksz>: block size in bytes
+ (minimum 512, maximum 1073741824, must be a power of 2)
+
+Usage instructions:
+-------------------
+
+First, find the size (in 512-byte sectors) of the device to be used:
+
+$ sudo blockdev --getsz /dev/vdb1
+33552384
+
+Create the dm-dust device:
+(For a device with a block size of 512 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
+
+(For a device with a block size of 4096 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
+
+Check the status of the read behavior ("bypass" indicates that all I/O
+will be passed through to the underlying device):
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
+128+0 records in
+128+0 records out
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+Adding and removing bad blocks:
+-------------------------------
+
+At any time (i.e.: whether the device has the "bad block" emulation
+enabled or disabled), bad blocks may be added or removed from the
+device via the "addbadblock" and "removebadblock" messages:
+
+$ sudo dmsetup message dust1 0 addbadblock 60
+kernel: device-mapper: dust: badblock added at block 60
+
+$ sudo dmsetup message dust1 0 addbadblock 67
+kernel: device-mapper: dust: badblock added at block 67
+
+$ sudo dmsetup message dust1 0 addbadblock 72
+kernel: device-mapper: dust: badblock added at block 72
+
+These bad blocks will be stored in the "bad block list".
+While the device is in "bypass" mode, reads and writes will succeed:
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+Enabling block read failures:
+-----------------------------
+
+To enable the "fail read on bad block" behavior, send the "enable" message:
+
+$ sudo dmsetup message dust1 0 enable
+kernel: device-mapper: dust: enabling read failures on bad sectors
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block
+
+With the device in "fail read on bad block" mode, attempting to read a
+block will encounter an "Input/output error":
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
+dd: error reading '/dev/mapper/dust1': Input/output error
+0+0 records in
+0+0 records out
+0 bytes copied, 0.00040651 s, 0.0 kB/s
+
+...and writing to the bad blocks will remove the blocks from the list,
+therefore emulating the "remap" behavior of hard disk drives:
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+kernel: device-mapper: dust: block 60 removed from badblocklist by write
+kernel: device-mapper: dust: block 67 removed from badblocklist by write
+kernel: device-mapper: dust: block 72 removed from badblocklist by write
+kernel: device-mapper: dust: block 87 removed from badblocklist by write
+
+Bad block add/remove error handling:
+------------------------------------
+
+Attempting to add a bad block that already exists in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 addbadblock 88
+device-mapper: message ioctl on dust1 failed: Invalid argument
+kernel: device-mapper: dust: block 88 already in badblocklist
+
+Attempting to remove a bad block that doesn't exist in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 removebadblock 87
+device-mapper: message ioctl on dust1 failed: Invalid argument
+kernel: device-mapper: dust: block 87 not found in badblocklist
+
+Counting the number of bad blocks in the bad block list:
+--------------------------------------------------------
+
+To count the number of bad blocks configured in the device, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 countbadblocks
+
+A message will print with the number of bad blocks currently
+configured on the device:
+
+kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
+
+Querying for specific bad blocks:
+---------------------------------
+
+To find out if a specific block is in the bad block list, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 queryblock 72
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 found in badblocklist
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 not found in badblocklist
+
+The "queryblock" message command will work in both the "enabled"
+and "disabled" modes, allowing the verification of whether a block
+will be treated as "bad" without having to issue I/O to the device,
+or having to "enable" the bad block emulation.
+
+Clearing the bad block list:
+----------------------------
+
+To clear the bad block list (without needing to individually run
+a "removebadblock" message command for every block), run the
+following message command:
+
+$ sudo dmsetup message dust1 0 clearbadblocks
+
+After clearing the bad block list, the following message will appear:
+
+kernel: device-mapper: dust: clearbadblocks: badblocks cleared
+
+If there were no bad blocks to clear, the following message will
+appear:
+
+kernel: device-mapper: dust: clearbadblocks: no badblocks found
+
+Message commands list:
+----------------------
+
+Below is a list of the messages that can be sent to a dust device:
+
+Operations on blocks (requires a <blknum> argument):
+
+addbadblock <blknum>
+queryblock <blknum>
+removebadblock <blknum>
+
+...where <blknum> is a block number within range of the device
+ (corresponding to the block size of the device.)
+
+Single argument message commands:
+
+countbadblocks
+clearbadblocks
+disable
+enable
+quiet
+
+Device removal:
+---------------
+
+When finished, remove the device via the "dmsetup remove" command:
+
+$ sudo dmsetup remove dust1
+
+Quiet mode:
+-----------
+
+On test runs with many bad blocks, it may be desirable to avoid
+excessive logging (from bad blocks added, removed, or "remapped").
+This can be done by enabling "quiet mode" via the following message:
+
+$ sudo dmsetup message dust1 0 quiet
+
+This will suppress log messages from add / remove / removed by write
+operations. Log messages from "countbadblocks" or "queryblock"
+message commands will still print in quiet mode.
+
+The status of quiet mode can be seen by running "dmsetup status":
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block quiet
+
+To disable quiet mode, send the "quiet" message again:
+
+$ sudo dmsetup message dust1 0 quiet
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block verbose
+
+(The presence of "verbose" indicates normal logging.)
+
+"Why not...?"
+-------------
+
+scsi_debug has a "medium error" mode that can fail reads on one
+specified sector (sector 0x1234, hardcoded in the source code), but
+it uses RAM for the persistent storage, which drastically decreases
+the potential device size.
+
+dm-flakey fails all I/O from all block locations at a specified time
+frequency, and not a given point in time.
+
+When a bad sector occurs on a hard disk drive, reads to that sector
+are failed by the device, usually resulting in an error code of EIO
+("I/O error") or ENODATA ("No data available"). However, a write to
+the sector may succeed, and result in the sector becoming readable
+after the device controller no longer experiences errors reading the
+sector (or after a reallocation of the sector). However, there may
+be bad sectors that occur on the device in the future, in a different,
+unpredictable location.
+
+This target seeks to provide a device that can exhibit the behavior
+of a bad sector at a known sector location, at a known time, based
+on a large storage device (at least tens of gigabytes, not occupying
+system memory).
diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
index 297251b0d2d5..d63d78ffeb73 100644
--- a/Documentation/device-mapper/dm-integrity.txt
+++ b/Documentation/device-mapper/dm-integrity.txt
@@ -21,6 +21,13 @@ mode it calculates and verifies the integrity tag internally. In this
mode, the dm-integrity target can be used to detect silent data
corruption on the disk or in the I/O path.
+There's an alternate mode of operation where dm-integrity uses bitmap
+instead of a journal. If a bit in the bitmap is 1, the corresponding
+region's data and integrity tags are not synchronized - if the machine
+crashes, the unsynchronized regions will be recalculated. The bitmap mode
+is faster than the journal mode, because we don't have to write the data
+twice, but it is also less reliable, because if data corruption happens
+when the machine crashes, it may not be detected.
When loading the target for the first time, the kernel driver will format
the device. But it will only format the device if the superblock contains
@@ -59,6 +66,10 @@ Target arguments:
either both data and tag or none of them are written. The
journaled mode degrades write throughput twice because the
data have to be written twice.
+ B - bitmap mode - data and metadata are written without any
+ synchronization, the driver maintains a bitmap of dirty
+ regions where data and metadata don't match. This mode can
+ only be used with internal hash.
R - recovery mode - in this mode, journal is not replayed,
checksums are not checked and writes to the device are not
allowed. This mode is useful for data recovery if the
@@ -79,6 +90,10 @@ interleave_sectors:number
a power of two. If the device is already formatted, the value from
the superblock is used.
+meta_device:device
+ Don't interleave the data and metadata on on device. Use a
+ separate device for metadata.
+
buffer_sectors:number
The number of sectors in one buffer. The value is rounded down to
a power of two.
@@ -146,6 +161,15 @@ block_size:number
Supported values are 512, 1024, 2048 and 4096 bytes. If not
specified the default block size is 512 bytes.
+sectors_per_bit:number
+ In the bitmap mode, this parameter specifies the number of
+ 512-byte sectors that corresponds to one bitmap bit.
+
+bitmap_flush_interval:number
+ The bitmap flush interval in milliseconds. The metadata buffers
+ are synchronized when this interval expires.
+
+
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the
tables with suspend and resume). The other arguments should not be changed
@@ -167,7 +191,13 @@ The layout of the formatted block device:
provides (i.e. the size of the device minus the size of all
metadata and padding). The user of this target should not send
bios that access data beyond the "provided data sectors" limit.
- * flags - a flag is set if journal_mac is used
+ * flags
+ SB_FLAG_HAVE_JOURNAL_MAC - a flag is set if journal_mac is used
+ SB_FLAG_RECALCULATING - recalculating is in progress
+ SB_FLAG_DIRTY_BITMAP - journal area contains the bitmap of dirty
+ blocks
+ * log2(sectors per block)
+ * a position where recalculating finished
* journal
The journal is divided into sections, each section contains:
* metadata area (4kiB), it contains journal entries
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
index f4d04a067282..82edbaaa3f85 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
@@ -11,3 +11,15 @@ Example:
reg = <0xffd08000 0x1000>;
cpu1-start-addr = <0xffd080c4>;
};
+
+ARM64 - Stratix10
+Required properties:
+- compatible : "altr,sys-mgr-s10"
+- reg : Should contain 1 register range(address and length)
+ for system manager register.
+
+Example:
+ sysmgr@ffd12000 {
+ compatible = "altr,sys-mgr-s10";
+ reg = <0xffd12000 0x228>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
index 7f40cb5f490b..061f7b98a07f 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -110,6 +110,7 @@ Board compatible values (alphabetically, grouped by SoC):
- "amlogic,u200" (Meson g12a s905d2)
- "amediatech,x96-max" (Meson g12a s905x2)
+ - "seirobotics,sei510" (Meson g12a s905x2)
Amlogic Meson Firmware registers Interface
------------------------------------------
diff --git a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
index e61d00e25b95..9fbde401a090 100644
--- a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
@@ -84,7 +84,7 @@ SHDWC SAMA5D2-Compatible Shutdown Controller
1) shdwc node
required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
+- compatible: should be "atmel,sama5d2-shdwc" or "microchip,sam9x60-shdwc".
- reg: should contain registers location and length
- clocks: phandle to input clock.
- #address-cells: should be one. The cell is the wake-up input index.
@@ -96,6 +96,9 @@ optional properties:
microseconds. It's usually a board-related property.
- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+optional microchip,sam9x60-shdwc properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
The node contains child nodes for each wake-up input that the platform uses.
2) input nodes
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
index 72d481c8dd48..5d7dbabbb784 100644
--- a/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,scu.txt
@@ -22,9 +22,11 @@ Required properties:
-------------------
- compatible: should be "fsl,imx-scu".
- mbox-names: should include "tx0", "tx1", "tx2", "tx3",
- "rx0", "rx1", "rx2", "rx3".
-- mboxes: List of phandle of 4 MU channels for tx and 4 MU channels
- for rx. All 8 MU channels must be in the same MU instance.
+ "rx0", "rx1", "rx2", "rx3";
+ include "gip3" if want to support general MU interrupt.
+- mboxes: List of phandle of 4 MU channels for tx, 4 MU channels for
+ rx, and 1 optional MU channel for general interrupt.
+ All MU channels must be in the same MU instance.
Cross instances are not allowed. The MU instance can only
be one of LSIO MU0~M4 for imx8qxp and imx8qm. Users need
to make sure use the one which is not conflict with other
@@ -34,6 +36,7 @@ Required properties:
Channel 1 must be "tx1" or "rx1".
Channel 2 must be "tx2" or "rx2".
Channel 3 must be "tx3" or "rx3".
+ General interrupt rx channel must be "gip3".
e.g.
mboxes = <&lsio_mu1 0 0
&lsio_mu1 0 1
@@ -42,10 +45,18 @@ Required properties:
&lsio_mu1 1 0
&lsio_mu1 1 1
&lsio_mu1 1 2
- &lsio_mu1 1 3>;
+ &lsio_mu1 1 3
+ &lsio_mu1 3 3>;
See Documentation/devicetree/bindings/mailbox/fsl,mu.txt
for detailed mailbox binding.
+Note: Each mu which supports general interrupt should have an alias correctly
+numbered in "aliases" node.
+e.g.
+aliases {
+ mu1 = &lsio_mu1;
+};
+
i.MX SCU Client Device Node:
============================================================
@@ -124,6 +135,10 @@ Required properties:
Example (imx8qxp):
-------------
+aliases {
+ mu1 = &lsio_mu1;
+};
+
lsio_mu1: mailbox@5d1c0000 {
...
#mbox-cells = <2>;
@@ -133,7 +148,8 @@ firmware {
scu {
compatible = "fsl,imx-scu";
mbox-names = "tx0", "tx1", "tx2", "tx3",
- "rx0", "rx1", "rx2", "rx3";
+ "rx0", "rx1", "rx2", "rx3",
+ "gip3";
mboxes = <&lsio_mu1 0 0
&lsio_mu1 0 1
&lsio_mu1 0 2
@@ -141,7 +157,8 @@ firmware {
&lsio_mu1 1 0
&lsio_mu1 1 1
&lsio_mu1 1 2
- &lsio_mu1 1 3>;
+ &lsio_mu1 1 3
+ &lsio_mu1 3 3>;
clk: clk {
compatible = "fsl,imx8qxp-clk", "fsl,scu-clk";
diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml
index 7e2cd6ad26bd..407138ebc0d0 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -51,6 +51,13 @@ properties:
- const: i2se,duckbill-2
- const: fsl,imx28
+ - description: i.MX50 based Boards
+ items:
+ - enum:
+ - fsl,imx50-evk
+ - kobo,aura
+ - const: fsl,imx50
+
- description: i.MX51 Babbage Board
items:
- enum:
@@ -67,6 +74,7 @@ properties:
- fsl,imx53-evk
- fsl,imx53-qsb
- fsl,imx53-smd
+ - menlo,m53menlo
- const: fsl,imx53
- description: i.MX6Q based Boards
@@ -90,6 +98,7 @@ properties:
- description: i.MX6DL based Boards
items:
- enum:
+ - eckelmann,imx6dl-ci4x10
- fsl,imx6dl-sabreauto # i.MX6 DualLite/Solo SABRE Automotive Board
- fsl,imx6dl-sabresd # i.MX6 DualLite SABRE Smart Device Board
- technologic,imx6dl-ts4900
@@ -137,10 +146,18 @@ properties:
- const: fsl,imx6ull # This seems odd. Should be last?
- const: fsl,imx6ulz
+ - description: i.MX7S based Boards
+ items:
+ - enum:
+ - tq,imx7s-mba7 # i.MX7S TQ MBa7 with TQMa7S SoM
+ - const: fsl,imx7s
+
- description: i.MX7D based Boards
items:
- enum:
- fsl,imx7d-sdb # i.MX7 SabreSD Board
+ - tq,imx7d-mba7 # i.MX7D TQ MBa7 with TQMa7D SoM
+ - zii,imx7d-rpu2 # ZII RPU2 Board
- const: fsl,imx7d
- description:
@@ -154,6 +171,12 @@ properties:
- const: compulab,cl-som-imx7
- const: fsl,imx7d
+ - description: i.MX8MM based Boards
+ items:
+ - enum:
+ - fsl,imx8mm-evk # i.MX8MM EVK Board
+ - const: fsl,imx8mm
+
- description: i.MX8QXP based Boards
items:
- enum:
@@ -176,6 +199,19 @@ properties:
- fsl,vf610
- fsl,vf610m4
+ - description: ZII's VF610 based Boards
+ items:
+ - enum:
+ - zii,vf610cfu1 # ZII VF610 CFU1 Board
+ - zii,vf610dev-c # ZII VF610 Development Board, Rev C
+ - zii,vf610dev-b # ZII VF610 Development Board, Rev B
+ - zii,vf610scu4-aib # ZII VF610 SCU4 AIB
+ - zii,vf610dtu # ZII VF610 SSMB DTU Board
+ - zii,vf610spu3 # ZII VF610 SSMB SPU3 Board
+ - zii,vf610spb4 # ZII VF610 SPB4 Board
+ - const: zii,vf610dev
+ - const: fsl,vf610
+
- description: LS1012A based Boards
items:
- enum:
diff --git a/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
new file mode 100644
index 000000000000..f4f7451e5e8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/intel-ixp4xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel IXP4xx Device Tree Bindings
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - linksys,nslu2
+ - const: intel,ixp42x
+ - items:
+ - enum:
+ - gateworks,gw2358
+ - const: intel,ixp43x
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index 2ecc712bf707..1c1e48fd94b5 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -92,6 +92,9 @@ SoCs:
- DRA718
compatible = "ti,dra718", "ti,dra722", "ti,dra72", "ti,dra7"
+- AM5748
+ compatible = "ti,am5748", "ti,dra762", "ti,dra7"
+
- AM5728
compatible = "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
@@ -184,6 +187,9 @@ Boards:
- AM57XX SBC-AM57x
compatible = "compulab,sbc-am57x", "compulab,cl-som-am57x", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
+- AM5748 IDK
+ compatible = "ti,am5748-idk", "ti,am5748", "ti,dra762", "ti,dra7";
+
- AM5728 IDK
compatible = "ti,am5728-idk", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index 061a03edf9c8..5c6bbf10abc9 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -97,6 +97,7 @@ properties:
- enum:
- friendlyarm,nanopc-t4
- friendlyarm,nanopi-m4
+ - friendlyarm,nanopi-neo4
- const: rockchip,rk3399
- description: GeekBuying GeekBox
@@ -146,7 +147,7 @@ properties:
- const: google,gru
- const: rockchip,rk3399
- - description: Google Jaq (Haier Chromebook 11 and more)
+ - description: Google Jaq (Haier Chromebook 11 and more w/ uSD)
items:
- const: google,veyron-jaq-rev5
- const: google,veyron-jaq-rev4
@@ -159,6 +160,12 @@ properties:
- description: Google Jerry (Hisense Chromebook C11 and more)
items:
+ - const: google,veyron-jerry-rev15
+ - const: google,veyron-jerry-rev14
+ - const: google,veyron-jerry-rev13
+ - const: google,veyron-jerry-rev12
+ - const: google,veyron-jerry-rev11
+ - const: google,veyron-jerry-rev10
- const: google,veyron-jerry-rev7
- const: google,veyron-jerry-rev6
- const: google,veyron-jerry-rev5
@@ -199,6 +206,17 @@ properties:
- const: google,veyron
- const: rockchip,rk3288
+ - description: Google Mighty (Haier Chromebook 11 and more w/ SD)
+ items:
+ - const: google,veyron-mighty-rev5
+ - const: google,veyron-mighty-rev4
+ - const: google,veyron-mighty-rev3
+ - const: google,veyron-mighty-rev2
+ - const: google,veyron-mighty-rev1
+ - const: google,veyron-mighty
+ - const: google,veyron
+ - const: rockchip,rk3288
+
- description: Google Minnie (Asus Chromebook Flip C100P)
items:
- const: google,veyron-minnie-rev4
@@ -308,6 +326,11 @@ properties:
- const: netxeon,r89
- const: rockchip,rk3288
+ - description: Orange Pi RK3399 board
+ items:
+ - const: rockchip,rk3399-orangepi
+ - const: rockchip,rk3399
+
- description: Phytec phyCORE-RK3288 Rapid Development Kit
items:
- const: phytec,rk3288-pcm-947
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt b/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
index 99980aee26e5..c92d411fd023 100644
--- a/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32-syscon.txt
@@ -5,10 +5,12 @@ Properties:
- " st,stm32mp157-syscfg " - for stm32mp157 based SoCs,
second value must be always "syscon".
- reg : offset and length of the register set.
+ - clocks: phandle to the syscfg clock
Example:
syscfg: syscon@50020000 {
compatible = "st,stm32mp157-syscfg", "syscon";
reg = <0x50020000 0x400>;
+ clocks = <&rcc SYSCFG>;
};
diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt
deleted file mode 100644
index 9254cbe7d516..000000000000
--- a/Documentation/devicetree/bindings/arm/sunxi.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Allwinner sunXi Platforms Device Tree Bindings
-
-Each device tree must specify which Allwinner SoC it uses,
-using one of the following compatible strings:
-
- allwinner,sun4i-a10
- allwinner,sun5i-a10s
- allwinner,sun5i-a13
- allwinner,sun5i-r8
- allwinner,sun6i-a31
- allwinner,sun7i-a20
- allwinner,sun8i-a23
- allwinner,sun8i-a33
- allwinner,sun8i-a83t
- allwinner,sun8i-h2-plus
- allwinner,sun8i-h3
- allwinner,sun8i-r40
- allwinner,sun8i-t3
- allwinner,sun8i-v3s
- allwinner,sun9i-a80
- allwinner,sun50i-a64
- allwinner,suniv-f1c100s
- nextthing,gr8
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
new file mode 100644
index 000000000000..285f4fc8519d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -0,0 +1,807 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR X11)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/sunxi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner platforms device tree bindings
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+
+ - description: Allwinner A23 Evaluation Board
+ items:
+ - const: allwinner,sun8i-a23-evb
+ - const: allwinner,sun8i-a23
+
+ - description: Allwinner A31 APP4 Evaluation Board
+ items:
+ - const: allwinner,app4-evb1
+ - const: allwinner,sun6i-a31
+
+ - description: Allwinner A83t Homlet Evaluation Board v2
+ items:
+ - const: allwinner,h8homlet-v2
+ - const: allwinner,sun8i-a83t
+
+ - description: Allwinner GA10H Quad Core Tablet v1.1
+ items:
+ - const: allwinner,ga10h-v1.1
+ - const: allwinner,sun8i-a33
+
+ - description: Allwinner GT90H Tablet v4
+ items:
+ - const: allwinner,gt90h-v4
+ - const: allwinner,sun8i-a23
+
+ - description: Allwinner R16 EVB (Parrot)
+ items:
+ - const: allwinner,parrot
+ - const: allwinner,sun8i-a33
+
+ - description: Amarula A64 Relic
+ items:
+ - const: amarula,a64-relic
+ - const: allwinner,sun50i-a64
+
+ - description: Auxtek T003 A10s HDMI TV Stick
+ items:
+ - const: allwinner,auxtek-t003
+ - const: allwinner,sun5i-a10s
+
+ - description: Auxtek T004 A10s HDMI TV Stick
+ items:
+ - const: allwinner,auxtek-t004
+ - const: allwinner,sun5i-a10s
+
+ - description: BA10 TV Box
+ items:
+ - const: allwinner,ba10-tvbox
+ - const: allwinner,sun4i-a10
+
+ - description: BananaPi
+ items:
+ - const: lemaker,bananapi
+ - const: allwinner,sun7i-a20
+
+ - description: BananaPi M1 Plus
+ items:
+ - const: sinovoip,bpi-m1-plus
+ - const: allwinner,sun7i-a20
+
+ - description: BananaPi M2
+ items:
+ - const: sinovoip,bpi-m2
+ - const: allwinner,sun6i-a31s
+
+ - description: BananaPi M2 Berry
+ items:
+ - const: sinovoip,bpi-m2-berry
+ - const: allwinner,sun8i-r40
+
+ - description: BananaPi M2 Plus
+ items:
+ - const: sinovoip,bpi-m2-plus
+ - const: allwinner,sun8i-h3
+
+ - description: BananaPi M2 Plus
+ items:
+ - const: sinovoip,bpi-m2-plus
+ - const: allwinner,sun50i-h5
+
+ - description: BananaPi M2 Plus v1.2
+ items:
+ - const: bananapi,bpi-m2-plus-v1.2
+ - const: allwinner,sun8i-h3
+
+ - description: BananaPi M2 Plus v1.2
+ items:
+ - const: bananapi,bpi-m2-plus-v1.2
+ - const: allwinner,sun50i-h5
+
+ - description: BananaPi M2 Magic
+ items:
+ - const: sinovoip,bananapi-m2m
+ - const: allwinner,sun8i-a33
+
+ - description: BananaPi M2 Ultra
+ items:
+ - const: sinovoip,bpi-m2-ultra
+ - const: allwinner,sun8i-r40
+
+ - description: BananaPi M2 Zero
+ items:
+ - const: sinovoip,bpi-m2-zero
+ - const: allwinner,sun8i-h2-plus
+
+ - description: BananaPi M3
+ items:
+ - const: sinovoip,bpi-m3
+ - const: allwinner,sun8i-a83t
+
+ - description: BananaPi M64
+ items:
+ - const: sinovoip,bananapi-m64
+ - const: allwinner,sun50i-a64
+
+ - description: BananaPro
+ items:
+ - const: lemaker,bananapro
+ - const: allwinner,sun7i-a20
+
+ - description: Beelink GS1
+ items:
+ - const: azw,beelink-gs1
+ - const: allwinner,sun50i-h6
+
+ - description: Beelink X2
+ items:
+ - const: roofull,beelink-x2
+ - const: allwinner,sun8i-h3
+
+ - description: Chuwi V7 CW0825
+ items:
+ - const: chuwi,v7-cw0825
+ - const: allwinner,sun4i-a10
+
+ - description: Colorfly E708 Q1 Tablet
+ items:
+ - const: colorfly,e708-q1
+ - const: allwinner,sun6i-a31s
+
+ - description: CSQ CS908 Set Top Box
+ items:
+ - const: csq,cs908
+ - const: allwinner,sun6i-a31s
+
+ - description: Cubietech Cubieboard
+ items:
+ - const: cubietech,a10-cubieboard
+ - const: allwinner,sun4i-a10
+
+ - description: Cubietech Cubieboard2
+ items:
+ - const: cubietech,cubieboard2
+ - const: allwinner,sun7i-a20
+
+ - description: Cubietech Cubieboard4
+ items:
+ - const: cubietech,a80-cubieboard4
+ - const: allwinner,sun9i-a80
+
+ - description: Cubietech Cubietruck
+ items:
+ - const: cubietech,cubietruck
+ - const: allwinner,sun7i-a20
+
+ - description: Cubietech Cubietruck Plus
+ items:
+ - const: cubietech,cubietruck-plus
+ - const: allwinner,sun8i-a83t
+
+ - description: Difrnce DIT4350
+ items:
+ - const: difrnce,dit4350
+ - const: allwinner,sun5i-a13
+
+ - description: Dserve DSRV9703C
+ items:
+ - const: dserve,dsrv9703c
+ - const: allwinner,sun4i-a10
+
+ - description: Empire Electronix D709 Tablet
+ items:
+ - const: empire-electronix,d709
+ - const: allwinner,sun5i-a13
+
+ - description: Empire Electronix M712 Tablet
+ items:
+ - const: empire-electronix,m712
+ - const: allwinner,sun5i-a13
+
+ - description: FriendlyARM NanoPi A64
+ items:
+ - const: friendlyarm,nanopi-a64
+ - const: allwinner,sun50i-a64
+
+ - description: FriendlyARM NanoPi M1
+ items:
+ - const: friendlyarm,nanopi-m1
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi M1 Plus
+ items:
+ - const: friendlyarm,nanopi-m1-plus
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo
+ items:
+ - const: friendlyarm,nanopi-neo
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo 2
+ items:
+ - const: friendlyarm,nanopi-neo2
+ - const: allwinner,sun50i-h5
+
+ - description: FriendlyARM NanoPi Neo Air
+ items:
+ - const: friendlyarm,nanopi-neo-air
+ - const: allwinner,sun8i-h3
+
+ - description: FriendlyARM NanoPi Neo Plus2
+ items:
+ - const: friendlyarm,nanopi-neo-plus2
+ - const: allwinner,sun50i-h5
+
+ - description: Gemei G9 Tablet
+ items:
+ - const: gemei,g9
+ - const: allwinner,sun4i-a10
+
+ - description: Hyundai A7HD
+ items:
+ - const: hyundai,a7hd
+ - const: allwinner,sun4i-a10
+
+ - description: HSG H702
+ items:
+ - const: hsg,h702
+ - const: allwinner,sun5i-a13
+
+ - description: I12 TV Box
+ items:
+ - const: allwinner,i12-tvbox
+ - const: allwinner,sun7i-a20
+
+ - description: ICNova A20 SWAC
+ items:
+ - const: swac,icnova-a20-swac
+ - const: incircuit,icnova-a20
+ - const: allwinner,sun7i-a20
+
+ - description: INet-1
+ items:
+ - const: inet-tek,inet1
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-86DZ Rev 01
+ items:
+ - const: primux,inet86dz
+ - const: allwinner,sun8i-a23
+
+ - description: iNet-9F Rev 03
+ items:
+ - const: inet-tek,inet9f-rev03
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-97F Rev 02
+ items:
+ - const: primux,inet97fv2
+ - const: allwinner,sun4i-a10
+
+ - description: iNet-98V Rev 02
+ items:
+ - const: primux,inet98v-rev2
+ - const: allwinner,sun5i-a13
+
+ - description: iNet D978 Rev 02 Tablet
+ items:
+ - const: primux,inet-d978-rev2
+ - const: allwinner,sun8i-a33
+
+ - description: iNet Q972 Tablet
+ items:
+ - const: inet-tek,inet-q972
+ - const: allwinner,sun6i-a31s
+
+ - description: Itead Ibox A20
+ items:
+ - const: itead,itead-ibox-a20
+ - const: allwinner,sun7i-a20
+
+ - description: Itead Iteaduino Plus A10
+ items:
+ - const: itead,iteaduino-plus-a10
+ - const: allwinner,sun4i-a10
+
+ - description: Jesurun Q5
+ items:
+ - const: jesurun,q5
+ - const: allwinner,sun4i-a10
+
+ - description: Lamobo R1
+ items:
+ - const: lamobo,lamobo-r1
+ - const: allwinner,sun7i-a20
+
+ - description: Libre Computer Board ALL-H3-CC H2+
+ items:
+ - const: libretech,all-h3-cc-h2-plus
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Libre Computer Board ALL-H3-CC H3
+ items:
+ - const: libretech,all-h3-cc-h3
+ - const: allwinner,sun8i-h3
+
+ - description: Libre Computer Board ALL-H3-CC H5
+ items:
+ - const: libretech,all-h3-cc-h5
+ - const: allwinner,sun50i-h5
+
+ - description: Lichee Pi One
+ items:
+ - const: licheepi,licheepi-one
+ - const: allwinner,sun5i-a13
+
+ - description: Lichee Pi Zero
+ items:
+ - const: licheepi,licheepi-zero
+ - const: allwinner,sun8i-v3s
+
+ - description: Lichee Pi Zero (with Dock)
+ items:
+ - const: licheepi,licheepi-zero-dock
+ - const: licheepi,licheepi-zero
+ - const: allwinner,sun8i-v3s
+
+ - description: Linksprite PCDuino
+ items:
+ - const: linksprite,a10-pcduino
+ - const: allwinner,sun4i-a10
+
+ - description: Linksprite PCDuino2
+ items:
+ - const: linksprite,a10-pcduino2
+ - const: allwinner,sun4i-a10
+
+ - description: Linksprite PCDuino3
+ items:
+ - const: linksprite,pcduino3
+ - const: allwinner,sun7i-a20
+
+ - description: Linksprite PCDuino3 Nano
+ items:
+ - const: linksprite,pcduino3-nano
+ - const: allwinner,sun7i-a20
+
+ - description: HAOYU Electronics Marsboard A10
+ items:
+ - const: haoyu,a10-marsboard
+ - const: allwinner,sun4i-a10
+
+ - description: MapleBoard MP130
+ items:
+ - const: mapleboard,mp130
+ - const: allwinner,sun8i-h3
+
+ - description: Mele A1000
+ items:
+ - const: mele,a1000
+ - const: allwinner,sun4i-a10
+
+ - description: Mele A1000G Quad Set Top Box
+ items:
+ - const: mele,a1000g-quad
+ - const: allwinner,sun6i-a31
+
+ - description: Mele I7 Quad Set Top Box
+ items:
+ - const: mele,i7
+ - const: allwinner,sun6i-a31
+
+ - description: Mele M3
+ items:
+ - const: mele,m3
+ - const: allwinner,sun7i-a20
+
+ - description: Mele M9 Set Top Box
+ items:
+ - const: mele,m9
+ - const: allwinner,sun6i-a31
+
+ - description: Merrii A20 Hummingboard
+ items:
+ - const: merrii,a20-hummingbird
+ - const: allwinner,sun7i-a20
+
+ - description: Merrii A31 Hummingboard
+ items:
+ - const: merrii,a31-hummingbird
+ - const: allwinner,sun6i-a31
+
+ - description: Merrii A80 Optimus
+ items:
+ - const: merrii,a80-optimus
+ - const: allwinner,sun9i-a80
+
+ - description: Miniand Hackberry
+ items:
+ - const: miniand,hackberry
+ - const: allwinner,sun4i-a10
+
+ - description: MK802
+ items:
+ - const: allwinner,mk802
+ - const: allwinner,sun4i-a10
+
+ - description: MK802-A10s
+ items:
+ - const: allwinner,a10s-mk802
+ - const: allwinner,sun5i-a10s
+
+ - description: MK802-II
+ items:
+ - const: allwinner,mk802ii
+ - const: allwinner,sun4i-a10
+
+ - description: MK808c
+ items:
+ - const: allwinner,mk808c
+ - const: allwinner,sun7i-a20
+
+ - description: MSI Primo81 Tablet
+ items:
+ - const: msi,primo81
+ - const: allwinner,sun6i-a31s
+
+ - description: Emlid Neutis N5 Developper Board
+ items:
+ - const: emlid,neutis-n5-devboard
+ - const: emlid,neutis-n5
+ - const: allwinner,sun50i-h5
+
+ - description: NextThing Co. CHIP
+ items:
+ - const: nextthing,chip
+ - const: allwinner,sun5i-r8
+ - const: allwinner,sun5i-a13
+
+ - description: NextThing Co. CHIP Pro
+ items:
+ - const: nextthing,chip-pro
+ - const: nextthing,gr8
+
+ - description: NextThing Co. GR8 Evaluation Board
+ items:
+ - const: nextthing,gr8-evb
+ - const: nextthing,gr8
+
+ - description: Nintendo NES Classic
+ items:
+ - const: nintendo,nes-classic
+ - const: allwinner,sun8i-r16
+ - const: allwinner,sun8i-a33
+
+ - description: Nintendo Super NES Classic
+ items:
+ - const: nintendo,super-nes-classic
+ - const: nintendo,nes-classic
+ - const: allwinner,sun8i-r16
+ - const: allwinner,sun8i-a33
+
+ - description: Oceanic 5inMFD (5205)
+ items:
+ - const: oceanic,5205-5inmfd
+ - const: allwinner,sun50i-a64
+
+ - description: Olimex A10-OlinuXino LIME
+ items:
+ - const: olimex,a10-olinuxino-lime
+ - const: allwinner,sun4i-a10
+
+ - description: Olimex A10s-OlinuXino Micro
+ items:
+ - const: olimex,a10s-olinuxino-micro
+ - const: allwinner,sun5i-a10s
+
+ - description: Olimex A13-OlinuXino
+ items:
+ - const: olimex,a13-olinuxino
+ - const: allwinner,sun5i-a13
+
+ - description: Olimex A13-OlinuXino Micro
+ items:
+ - const: olimex,a13-olinuxino-micro
+ - const: allwinner,sun5i-a13
+
+ - description: Olimex A20-Olimex SOM Evaluation Board
+ items:
+ - const: olimex,a20-olimex-som-evb
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-Olimex SOM Evaluation Board (with eMMC)
+ items:
+ - const: olimex,a20-olimex-som-evb-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME
+ items:
+ - const: olimex,a20-olinuxino-lime
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME2
+ items:
+ - const: olimex,a20-olinuxino-lime2
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino LIME2 (with eMMC)
+ items:
+ - const: olimex,a20-olinuxino-lime2-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino Micro
+ items:
+ - const: olimex,a20-olinuxino-micro
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-OlinuXino Micro (with eMMC)
+ items:
+ - const: olimex,a20-olinuxino-micro-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-SOM204 Evaluation Board
+ items:
+ - const: olimex,a20-olimex-som204-evb
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A20-SOM204 Evaluation Board (with eMMC)
+ items:
+ - const: olimex,a20-olimex-som204-evb-emmc
+ - const: allwinner,sun7i-a20
+
+ - description: Olimex A33-OlinuXino
+ items:
+ - const: olimex,a33-olinuxino
+ - const: allwinner,sun8i-a33
+
+ - description: Olimex A64-OlinuXino
+ items:
+ - const: olimex,a64-olinuxino
+ - const: allwinner,sun50i-a64
+
+ - description: Olimex A64 Teres-I
+ items:
+ - const: olimex,a64-teres-i
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64
+ items:
+ - const: pine64,pine64
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64+
+ items:
+ - const: pine64,pine64-plus
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 PineH64
+ items:
+ - const: pine64,pine-h64
+ - const: allwinner,sun50i-h6
+
+ - description: Pine64 LTS
+ items:
+ - const: pine64,pine64-lts
+ - const: allwinner,sun50i-r18
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 Pinebook
+ items:
+ - const: pine64,pinebook
+ - const: allwinner,sun50i-a64
+
+ - description: Pine64 SoPine Baseboard
+ items:
+ - const: pine64,sopine-baseboard
+ - const: pine64,sopine
+ - const: allwinner,sun50i-a64
+
+ - description: PineRiver Mini X-Plus
+ items:
+ - const: pineriver,mini-xplus
+ - const: allwinner,sun4i-a10
+
+ - description: Point of View Protab2-IPS9
+ items:
+ - const: pov,protab2-ips9
+ - const: allwinner,sun4i-a10
+
+ - description: Polaroid MID2407PXE03 Tablet
+ items:
+ - const: polaroid,mid2407pxe03
+ - const: allwinner,sun8i-a23
+
+ - description: Polaroid MID2809PXE04 Tablet
+ items:
+ - const: polaroid,mid2809pxe04
+ - const: allwinner,sun8i-a23
+
+ - description: Q8 A13 Tablet
+ items:
+ - const: allwinner,q8-a13
+ - const: allwinner,sun5i-a13
+
+ - description: Q8 A23 Tablet
+ items:
+ - const: allwinner,q8-a23
+ - const: allwinner,sun8i-a23
+
+ - description: Q8 A33 Tablet
+ items:
+ - const: allwinner,q8-a33
+ - const: allwinner,sun8i-a33
+
+ - description: Qihua CQA3T BV3
+ items:
+ - const: qihua,t3-cqa3t-bv3
+ - const: allwinner,sun8i-t3
+ - const: allwinner,sun8i-r40
+
+ - description: R7 A10s HDMI TV Stick
+ items:
+ - const: allwinner,r7-tv-dongle
+ - const: allwinner,sun5i-a10s
+
+ - description: RerVision H3-DVK
+ items:
+ - const: rervision,h3-dvk
+ - const: allwinner,sun8i-h3
+
+ - description: Sinlinx SinA31s Core Board
+ items:
+ - const: sinlinx,sina31s
+ - const: allwinner,sun6i-a31s
+
+ - description: Sinlinx SinA31s Development Board
+ items:
+ - const: sinlinx,sina31s-sdk
+ - const: allwinner,sun6i-a31s
+
+ - description: Sinlinx SinA33
+ items:
+ - const: sinlinx,sina33
+ - const: allwinner,sun8i-a33
+
+ - description: TBS A711 Tablet
+ items:
+ - const: tbs-biometrics,a711
+ - const: allwinner,sun8i-a83t
+
+ - description: Utoo P66
+ items:
+ - const: utoo,p66
+ - const: allwinner,sun5i-a13
+
+ - description: Wexler TAB7200
+ items:
+ - const: wexler,tab7200
+ - const: allwinner,sun7i-a20
+
+ - description: WITS A31 Colombus Evaluation Board
+ items:
+ - const: wits,colombus
+ - const: allwinner,sun6i-a31
+
+ - description: WITS Pro A20 DKT
+ items:
+ - const: wits,pro-a20-dkt
+ - const: allwinner,sun7i-a20
+
+ - description: Wobo i5
+ items:
+ - const: wobo,a10s-wobo-i5
+ - const: allwinner,sun5i-a10s
+
+ - description: Yones TopTech BS1078 v2 Tablet
+ items:
+ - const: yones-toptech,bs1078-v2
+ - const: allwinner,sun6i-a31s
+
+ - description: Xunlong OrangePi
+ items:
+ - const: xunlong,orangepi
+ - const: allwinner,sun7i-a20
+
+ - description: Xunlong OrangePi 2
+ items:
+ - const: xunlong,orangepi-2
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi 3
+ items:
+ - const: xunlong,orangepi-3
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi Lite
+ items:
+ - const: xunlong,orangepi-lite
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Lite2
+ items:
+ - const: xunlong,orangepi-lite2
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi Mini
+ items:
+ - const: xunlong,orangepi-mini
+ - const: allwinner,sun7i-a20
+
+ - description: Xunlong OrangePi One
+ items:
+ - const: xunlong,orangepi-one
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi One Plus
+ items:
+ - const: xunlong,orangepi-one-plus
+ - const: allwinner,sun50i-h6
+
+ - description: Xunlong OrangePi PC
+ items:
+ - const: xunlong,orangepi-pc
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi PC 2
+ items:
+ - const: xunlong,orangepi-pc2
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi PC Plus
+ items:
+ - const: xunlong,orangepi-pc-plus
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Plus
+ items:
+ - const: xunlong,orangepi-plus
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Plus 2E
+ items:
+ - const: xunlong,orangepi-plus2e
+ - const: allwinner,sun8i-h3
+
+ - description: Xunlong OrangePi Prime
+ items:
+ - const: xunlong,orangepi-prime
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi R1
+ items:
+ - const: xunlong,orangepi-r1
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Xunlong OrangePi Win
+ items:
+ - const: xunlong,orangepi-win
+ - const: allwinner,sun50i-a64
+
+ - description: Xunlong OrangePi Zero
+ items:
+ - const: xunlong,orangepi-zero
+ - const: allwinner,sun8i-h2-plus
+
+ - description: Xunlong OrangePi Zero Plus
+ items:
+ - const: xunlong,orangepi-zero-plus
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi Zero Plus2
+ items:
+ - const: xunlong,orangepi-zero-plus2
+ - const: allwinner,sun50i-h5
+
+ - description: Xunlong OrangePi Zero Plus2
+ items:
+ - const: xunlong,orangepi-zero-plus2-h3
+ - const: allwinner,sun8i-h3
diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.txt b/Documentation/devicetree/bindings/bus/ti-sysc.txt
index 85a23f551f02..233eb8294204 100644
--- a/Documentation/devicetree/bindings/bus/ti-sysc.txt
+++ b/Documentation/devicetree/bindings/bus/ti-sysc.txt
@@ -94,6 +94,8 @@ Optional properties:
- ti,no-idle-on-init interconnect target module should not be idled at init
+- ti,no-idle interconnect target module should not be idled
+
Example: Single instance of MUSB controller on omap4 using interconnect ranges
using offsets from l4_cfg second segment (0x4a000000 + 0x80000 = 0x4a0ab000):
@@ -131,6 +133,6 @@ using offsets from l4_cfg second segment (0x4a000000 + 0x80000 = 0x4a0ab000):
};
};
-Note that other SoCs, such as am335x can have multipe child devices. On am335x
+Note that other SoCs, such as am335x can have multiple child devices. On am335x
there are two MUSB instances, two USB PHY instances, and a single CPPI41 DMA
-instance as children of a single interconnet target module.
+instance as children of a single interconnect target module.
diff --git a/Documentation/devicetree/bindings/clock/xlnx,zynqmp-clk.txt b/Documentation/devicetree/bindings/clock/xlnx,zynqmp-clk.txt
new file mode 100644
index 000000000000..391ee1a60bed
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/xlnx,zynqmp-clk.txt
@@ -0,0 +1,63 @@
+--------------------------------------------------------------------------
+Device Tree Clock bindings for the Zynq Ultrascale+ MPSoC controlled using
+Zynq MPSoC firmware interface
+--------------------------------------------------------------------------
+The clock controller is a h/w block of Zynq Ultrascale+ MPSoC clock
+tree. It reads required input clock frequencies from the devicetree and acts
+as clock provider for all clock consumers of PS clocks.
+
+See clock_bindings.txt for more information on the generic clock bindings.
+
+Required properties:
+ - #clock-cells: Must be 1
+ - compatible: Must contain: "xlnx,zynqmp-clk"
+ - clocks: List of clock specifiers which are external input
+ clocks to the given clock controller. Please refer
+ the next section to find the input clocks for a
+ given controller.
+ - clock-names: List of clock names which are exteral input clocks
+ to the given clock controller. Please refer to the
+ clock bindings for more details.
+
+Input clocks for zynqmp Ultrascale+ clock controller:
+
+The Zynq UltraScale+ MPSoC has one primary and four alternative reference clock
+inputs. These required clock inputs are:
+ - pss_ref_clk (PS reference clock)
+ - video_clk (reference clock for video system )
+ - pss_alt_ref_clk (alternative PS reference clock)
+ - aux_ref_clk
+ - gt_crx_ref_clk (transceiver reference clock)
+
+The following strings are optional parameters to the 'clock-names' property in
+order to provide an optional (E)MIO clock source:
+ - swdt0_ext_clk
+ - swdt1_ext_clk
+ - gem0_emio_clk
+ - gem1_emio_clk
+ - gem2_emio_clk
+ - gem3_emio_clk
+ - mio_clk_XX # with XX = 00..77
+ - mio_clk_50_or_51 #for the mux clock to gem tsu from 50 or 51
+
+
+Output clocks are registered based on clock information received
+from firmware. Output clocks indexes are mentioned in
+include/dt-bindings/clock/xlnx-zynqmp-clk.h.
+
+-------
+Example
+-------
+
+firmware {
+ zynqmp_firmware: zynqmp-firmware {
+ compatible = "xlnx,zynqmp-firmware";
+ method = "smc";
+ zynqmp_clk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "xlnx,zynqmp-clk";
+ clocks = <&pss_ref_clk>, <&video_clk>, <&pss_alt_ref_clk>, <&aux_ref_clk>, <&gt_crx_ref_clk>;
+ clock-names = "pss_ref_clk", "video_clk", "pss_alt_ref_clk","aux_ref_clk", "gt_crx_ref_clk";
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 3c9a57a8443b..9d8bbac27d8b 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -9,6 +9,7 @@ Required properties:
"fsl,imx53-sdma"
"fsl,imx6q-sdma"
"fsl,imx7d-sdma"
+ "fsl,imx8mq-sdma"
The -to variants should be preferred since they allow to determine the
correct ROM script addresses needed for the driver to work without additional
firmware.
diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
new file mode 100644
index 000000000000..8cb136c376fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/firmware/intel-ixp4xx-network-processing-engine.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx Network Processing Engine
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ On the IXP4xx SoCs, the Network Processing Engine (NPE) is a small
+ processor that can load a firmware to perform offloading of networking
+ and crypto tasks. It also manages the MDIO bus to the ethernet PHYs
+ on the IXP4xx platform. All IXP4xx platforms have three NPEs at
+ consecutive memory locations. They are all included in the same
+ device node since they are not independent of each other.
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: intel,ixp4xx-network-processing-engine
+
+ reg:
+ minItems: 3
+ maxItems: 3
+ items:
+ - description: NPE0 register range
+ - description: NPE1 register range
+ - description: NPE2 register range
+
+required:
+ - compatible
+ - reg
+
+examples:
+ - |
+ npe@c8006000 {
+ compatible = "intel,ixp4xx-network-processing-engine";
+ reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
index 614bac55df86..a4fe136be2ba 100644
--- a/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
+++ b/Documentation/devicetree/bindings/firmware/xilinx/xlnx,zynqmp-firmware.txt
@@ -17,53 +17,6 @@ Required properties:
- "smc" : SMC #0, following the SMCCC
- "hvc" : HVC #0, following the SMCCC
---------------------------------------------------------------------------
-Device Tree Clock bindings for the Zynq Ultrascale+ MPSoC controlled using
-Zynq MPSoC firmware interface
---------------------------------------------------------------------------
-The clock controller is a h/w block of Zynq Ultrascale+ MPSoC clock
-tree. It reads required input clock frequencies from the devicetree and acts
-as clock provider for all clock consumers of PS clocks.
-
-See clock_bindings.txt for more information on the generic clock bindings.
-
-Required properties:
- - #clock-cells: Must be 1
- - compatible: Must contain: "xlnx,zynqmp-clk"
- - clocks: List of clock specifiers which are external input
- clocks to the given clock controller. Please refer
- the next section to find the input clocks for a
- given controller.
- - clock-names: List of clock names which are exteral input clocks
- to the given clock controller. Please refer to the
- clock bindings for more details.
-
-Input clocks for zynqmp Ultrascale+ clock controller:
-
-The Zynq UltraScale+ MPSoC has one primary and four alternative reference clock
-inputs. These required clock inputs are:
- - pss_ref_clk (PS reference clock)
- - video_clk (reference clock for video system )
- - pss_alt_ref_clk (alternative PS reference clock)
- - aux_ref_clk
- - gt_crx_ref_clk (transceiver reference clock)
-
-The following strings are optional parameters to the 'clock-names' property in
-order to provide an optional (E)MIO clock source:
- - swdt0_ext_clk
- - swdt1_ext_clk
- - gem0_emio_clk
- - gem1_emio_clk
- - gem2_emio_clk
- - gem3_emio_clk
- - mio_clk_XX # with XX = 00..77
- - mio_clk_50_or_51 #for the mux clock to gem tsu from 50 or 51
-
-
-Output clocks are registered based on clock information received
-from firmware. Output clocks indexes are mentioned in
-include/dt-bindings/clock/xlnx,zynqmp-clk.h.
-
-------
Example
-------
@@ -72,11 +25,6 @@ firmware {
zynqmp_firmware: zynqmp-firmware {
compatible = "xlnx,zynqmp-firmware";
method = "smc";
- zynqmp_clk: clock-controller {
- #clock-cells = <1>;
- compatible = "xlnx,zynqmp-clk";
- clocks = <&pss_ref_clk>, <&video_clk>, <&pss_alt_ref_clk>, <&aux_ref_clk>, <&gt_crx_ref_clk>;
- clock-names = "pss_ref_clk", "video_clk", "pss_alt_ref_clk","aux_ref_clk", "gt_crx_ref_clk";
- };
+ ...
};
};
diff --git a/Documentation/devicetree/bindings/fpga/xlnx,zynqmp-pcap-fpga.txt b/Documentation/devicetree/bindings/fpga/xlnx,zynqmp-pcap-fpga.txt
new file mode 100644
index 000000000000..3052bf619dd5
--- /dev/null
+++ b/Documentation/devicetree/bindings/fpga/xlnx,zynqmp-pcap-fpga.txt
@@ -0,0 +1,25 @@
+Devicetree bindings for Zynq Ultrascale MPSoC FPGA Manager.
+The ZynqMP SoC uses the PCAP (Processor configuration Port) to configure the
+Programmable Logic (PL). The configuration uses the firmware interface.
+
+Required properties:
+- compatible: should contain "xlnx,zynqmp-pcap-fpga"
+
+Example for full FPGA configuration:
+
+ fpga-region0 {
+ compatible = "fpga-region";
+ fpga-mgr = <&zynqmp_pcap>;
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ };
+
+ firmware {
+ zynqmp_firmware: zynqmp-firmware {
+ compatible = "xlnx,zynqmp-firmware";
+ method = "smc";
+ zynqmp_pcap: pcap {
+ compatible = "xlnx,zynqmp-pcap-fpga";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
index fb144e2b6522..dab537c20def 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
@@ -2,6 +2,7 @@
Required properties:
- compatible: Has to contain one of the following:
+ nxp,pca6416
nxp,pca9505
nxp,pca9534
nxp,pca9535
@@ -30,6 +31,7 @@ Required properties:
ti,tca6424
ti,tca9539
ti,tca9554
+ onnn,cat9554
onnn,pca9654
exar,xra1202
- gpio-controller: if used as gpio expander.
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
index 18a2cde2e5f3..1b1a74129141 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
@@ -37,6 +37,20 @@ Optional properties:
- operating-points-v2 : Refer to Documentation/devicetree/bindings/opp/opp.txt
for details.
+- resets : Phandle of the GPU reset line.
+
+Vendor-specific bindings
+------------------------
+
+The Mali GPU is integrated very differently from one SoC to
+another. In order to accomodate those differences, you have the option
+to specify one more vendor-specific compatible, among:
+
+- "amlogic,meson-gxm-mali"
+ Required properties:
+ - resets : Should contain phandles of :
+ + GPU reset line
+ + GPU APB glue reset line
Example for a Mali-T760:
diff --git a/Documentation/devicetree/bindings/hwmon/pwm-fan.txt b/Documentation/devicetree/bindings/hwmon/pwm-fan.txt
index 6ced829b0e58..41b76762953a 100644
--- a/Documentation/devicetree/bindings/hwmon/pwm-fan.txt
+++ b/Documentation/devicetree/bindings/hwmon/pwm-fan.txt
@@ -21,8 +21,6 @@ Optional properties:
Example:
fan0: pwm-fan {
compatible = "pwm-fan";
- cooling-min-state = <0>;
- cooling-max-state = <3>;
#cooling-cells = <2>;
pwms = <&pwm 0 10000 0>;
cooling-levels = <0 102 170 230>;
diff --git a/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt b/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt
index 5c184b940669..f1f3a552459b 100644
--- a/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/imx7d-adc.txt
@@ -10,6 +10,7 @@ Required properties:
- clocks: The root clock of the ADC controller
- clock-names: Must contain "adc", matching entry in the clocks property
- vref-supply: The regulator supply ADC reference voltage
+- #io-channel-cells: Must be 1 as per ../iio-bindings.txt
Example:
adc1: adc@30610000 {
@@ -19,4 +20,5 @@ adc1: adc@30610000 {
clocks = <&clks IMX7D_ADC_ROOT_CLK>;
clock-names = "adc";
vref-supply = <&reg_vcc_3v3_mcu>;
+ #io-channel-cells = <1>;
};
diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt
index c81993f8d8c3..c8787688122a 100644
--- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt
@@ -13,6 +13,7 @@ VADC node:
Definition: Should contain "qcom,spmi-vadc".
Should contain "qcom,spmi-adc5" for PMIC5 ADC driver.
Should contain "qcom,spmi-adc-rev2" for PMIC rev2 ADC driver.
+ Should contain "qcom,pms405-adc" for PMS405 PMIC
- reg:
Usage: required
diff --git a/Documentation/devicetree/bindings/input/gpio-vibrator.yaml b/Documentation/devicetree/bindings/input/gpio-vibrator.yaml
new file mode 100644
index 000000000000..903475f52dbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-vibrator.yaml
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/input/gpio-vibrator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO vibrator
+
+maintainers:
+ - Luca Weiss <luca@z3ntu.xyz>
+
+description: |+
+ Registers a GPIO device as vibrator, where the on/off capability is controlled by a GPIO.
+
+properties:
+ compatible:
+ const: gpio-vibrator
+
+ enable-gpios:
+ maxItems: 1
+
+ vcc-supply:
+ description: Regulator that provides power
+
+required:
+ - compatible
+ - enable-gpios
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ vibrator {
+ compatible = "gpio-vibrator";
+ enable-gpios = <&msmgpio 86 GPIO_ACTIVE_HIGH>;
+ vcc-supply = <&pm8941_l18>;
+ };
diff --git a/Documentation/devicetree/bindings/input/lpc32xx-key.txt b/Documentation/devicetree/bindings/input/lpc32xx-key.txt
index bcf62f856358..2b075a080d30 100644
--- a/Documentation/devicetree/bindings/input/lpc32xx-key.txt
+++ b/Documentation/devicetree/bindings/input/lpc32xx-key.txt
@@ -8,6 +8,7 @@ Required Properties:
- reg: Physical base address of the controller and length of memory mapped
region.
- interrupts: The interrupt number to the cpu.
+- clocks: phandle to clock controller plus clock-specifier pair
- nxp,debounce-delay-ms: Debounce delay in ms
- nxp,scan-delay-ms: Repeated scan period in ms
- linux,keymap: the key-code to be reported when the key is pressed
@@ -22,7 +23,9 @@ Example:
key@40050000 {
compatible = "nxp,lpc3220-key";
reg = <0x40050000 0x1000>;
- interrupts = <54 0>;
+ clocks = <&clk LPC32XX_CLK_KEY>;
+ interrupt-parent = <&sic1>;
+ interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
keypad,num-rows = <1>;
keypad,num-columns = <1>;
nxp,debounce-delay-ms = <3>;
diff --git a/Documentation/devicetree/bindings/input/max77650-onkey.txt b/Documentation/devicetree/bindings/input/max77650-onkey.txt
new file mode 100644
index 000000000000..477dc74f452a
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/max77650-onkey.txt
@@ -0,0 +1,26 @@
+Onkey driver for MAX77650 PMIC from Maxim Integrated.
+
+This module is part of the MAX77650 MFD device. For more details
+see Documentation/devicetree/bindings/mfd/max77650.txt.
+
+The onkey controller is represented as a sub-node of the PMIC node on
+the device tree.
+
+Required properties:
+--------------------
+- compatible: Must be "maxim,max77650-onkey".
+
+Optional properties:
+- linux,code: The key-code to be reported when the key is pressed.
+ Defaults to KEY_POWER.
+- maxim,onkey-slide: The system's button is a slide switch, not the default
+ push button.
+
+Example:
+--------
+
+ onkey {
+ compatible = "maxim,max77650-onkey";
+ linux,code = <KEY_END>;
+ maxim,onkey-slide;
+ };
diff --git a/Documentation/devicetree/bindings/input/microchip,qt1050.txt b/Documentation/devicetree/bindings/input/microchip,qt1050.txt
new file mode 100644
index 000000000000..80e75f96252b
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/microchip,qt1050.txt
@@ -0,0 +1,78 @@
+Microchip AT42QT1050 Five-channel Touch Sensor IC
+
+The AT42QT1050 (QT1050) is a QTouchADC sensor device. The device can sense from
+one to five keys, dependent on mode. The QT1050 includes all signal processing
+functions necessary to provide stable sensing under a wide variety of changing
+conditions, and the outputs are fully debounced.
+
+The touchkey device node should be placed inside an I2C bus node.
+
+Required properties:
+- compatible: Must be "microchip,qt1050"
+- reg: The I2C address of the device
+- interrupts: The sink for the touchpad's IRQ output,
+ see ../interrupt-controller/interrupts.txt
+
+Optional properties:
+- wakeup-source: touch keys can be used as a wakeup source
+
+Each button (key) is represented as a sub-node:
+
+Each not specified key or key with linux,code set to KEY_RESERVED gets disabled
+in HW.
+
+Subnode properties:
+- linux,code: Keycode to emit.
+- reg: The key number. Valid values: 0, 1, 2, 3, 4.
+
+Optional subnode-properties:
+
+If a optional property is missing or has a invalid value the default value is
+taken.
+
+- microchip,pre-charge-time-ns:
+ Each touchpad need some time to precharge. The value depends on the mechanical
+ layout.
+ Valid value range: 0 - 637500; values must be a multiple of 2500;
+ default is 0.
+- microchip,average-samples:
+ Number of data samples which are averaged for each read.
+ Valid values: 1, 4, 16, 64, 256, 1024, 4096, 16384; default is 1.
+- microchip,average-scaling:
+ The scaling factor which is used to scale the average-samples.
+ Valid values: 1, 2, 4, 8, 16, 32, 64, 128; default is 1.
+- microchip,threshold:
+ Number of counts to register a touch detection.
+ Valid value range: 0 - 255; default is 20.
+
+Example:
+QT1050 with 3 non continuous keys, key2 and key4 are disabled.
+
+touchkeys@41 {
+ compatible = "microchip,qt1050";
+ reg = <0x41>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
+
+ up@0 {
+ reg = <0>;
+ linux,code = <KEY_UP>;
+ microchip,average-samples = <64>;
+ microchip,average-scaling = <16>;
+ microchip,pre-charge-time-ns = <10000>;
+ };
+
+ right@1 {
+ reg = <1>;
+ linux,code = <KEY_RIGHT>;
+ microchip,average-samples = <64>;
+ microchip,average-scaling = <8>;
+ };
+
+ down@3 {
+ reg = <3>;
+ linux,code = <KEY_DOWN>;
+ microchip,average-samples = <256>;
+ microchip,average-scaling = <16>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
index 1458c3179a63..496125c6bfb7 100644
--- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
+++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
@@ -2,12 +2,14 @@ Allwinner sun4i low res adc attached tablet keys
------------------------------------------------
Required properties:
- - compatible: "allwinner,sun4i-a10-lradc-keys"
+ - compatible: should be one of the following string:
+ "allwinner,sun4i-a10-lradc-keys"
+ "allwinner,sun8i-a83t-r-lradc"
- reg: mmio address range of the chip
- interrupts: interrupt to which the chip is connected
- vref-supply: powersupply for the lradc reference voltage
-Each key is represented as a sub-node of "allwinner,sun4i-a10-lradc-keys":
+Each key is represented as a sub-node of the compatible mentioned above:
Required subnode-properties:
- label: Descriptive name of the key.
diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt
index 8cf0b4d38a7e..fc03ea4cf5ab 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt
@@ -3,6 +3,7 @@ Device tree bindings for Goodix GT9xx series touchscreen controller
Required properties:
- compatible : Should be "goodix,gt1151"
+ or "goodix,gt5663"
or "goodix,gt5688"
or "goodix,gt911"
or "goodix,gt9110"
@@ -19,6 +20,8 @@ Optional properties:
- irq-gpios : GPIO pin used for IRQ. The driver uses the
interrupt gpio pin as output to reset the device.
- reset-gpios : GPIO pin used for reset
+ - AVDD28-supply : Analog power supply regulator on AVDD28 pin
+ - VDDIO-supply : GPIO power supply regulator on VDDIO pin
- touchscreen-inverted-x
- touchscreen-inverted-y
- touchscreen-size-x
diff --git a/Documentation/devicetree/bindings/input/touchscreen/iqs5xx.txt b/Documentation/devicetree/bindings/input/touchscreen/iqs5xx.txt
new file mode 100644
index 000000000000..efa0820e2469
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/iqs5xx.txt
@@ -0,0 +1,80 @@
+Azoteq IQS550/572/525 Trackpad/Touchscreen Controller
+
+Required properties:
+
+- compatible : Must be equal to one of the following:
+ "azoteq,iqs550"
+ "azoteq,iqs572"
+ "azoteq,iqs525"
+
+- reg : I2C slave address for the device.
+
+- interrupts : GPIO to which the device's active-high RDY
+ output is connected (see [0]).
+
+- reset-gpios : GPIO to which the device's active-low NRST
+ input is connected (see [1]).
+
+Optional properties:
+
+- touchscreen-min-x : See [2].
+
+- touchscreen-min-y : See [2].
+
+- touchscreen-size-x : See [2]. If this property is omitted, the
+ maximum x-coordinate is specified by the
+ device's "X Resolution" register.
+
+- touchscreen-size-y : See [2]. If this property is omitted, the
+ maximum y-coordinate is specified by the
+ device's "Y Resolution" register.
+
+- touchscreen-max-pressure : See [2]. Pressure is expressed as the sum of
+ the deltas across all channels impacted by a
+ touch event. A channel's delta is calculated
+ as its count value minus a reference, where
+ the count value is inversely proportional to
+ the channel's capacitance.
+
+- touchscreen-fuzz-x : See [2].
+
+- touchscreen-fuzz-y : See [2].
+
+- touchscreen-fuzz-pressure : See [2].
+
+- touchscreen-inverted-x : See [2]. Inversion is applied relative to that
+ which may already be specified by the device's
+ FLIP_X and FLIP_Y register fields.
+
+- touchscreen-inverted-y : See [2]. Inversion is applied relative to that
+ which may already be specified by the device's
+ FLIP_X and FLIP_Y register fields.
+
+- touchscreen-swapped-x-y : See [2]. Swapping is applied relative to that
+ which may already be specified by the device's
+ SWITCH_XY_AXIS register field.
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/gpio/gpio.txt
+[2]: Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
+
+Example:
+
+ &i2c1 {
+ /* ... */
+
+ touchscreen@74 {
+ compatible = "azoteq,iqs550";
+ reg = <0x74>;
+ interrupt-parent = <&gpio>;
+ interrupts = <17 4>;
+ reset-gpios = <&gpio 27 1>;
+
+ touchscreen-size-x = <640>;
+ touchscreen-size-y = <480>;
+
+ touchscreen-max-pressure = <16000>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml b/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
new file mode 100644
index 000000000000..bae10e261fa9
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/interrupt/intel-ixp4xx-interrupt.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx XScale Networking Processors Interrupt Controller
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ This interrupt controller is found in the Intel IXP4xx processors.
+ Some processors have 32 interrupts, some have up to 64 interrupts.
+ The exact number of interrupts is determined from the compatible
+ string.
+
+ The distinct IXP4xx families with different interrupt controller
+ variations are IXP42x, IXP43x, IXP45x and IXP46x. Those four
+ families were the only ones to reach the developer and consumer
+ market.
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - intel,ixp42x-interrupt
+ - intel,ixp43x-interrupt
+ - intel,ixp45x-interrupt
+ - intel,ixp46x-interrupt
+
+ reg:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 2
+
+required:
+ - compatible
+ - reg
+ - interrupt-controller
+ - '#interrupt-cells'
+
+examples:
+ - |
+ intcon: interrupt-controller@c8003000 {
+ compatible = "intel,ixp43x-interrupt";
+ reg = <0xc8003000 0x100>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
index c5d589108a94..0e312fea2a5d 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
@@ -1,15 +1,18 @@
-+Mediatek MT65xx/MT67xx/MT81xx sysirq
+MediaTek sysirq
-Mediatek SOCs sysirq support controllable irq inverter for each GIC SPI
+MediaTek SOCs sysirq support controllable irq inverter for each GIC SPI
interrupt.
Required properties:
- compatible: should be
+ "mediatek,mt8516-sysirq", "mediatek,mt6577-sysirq": for MT8516
+ "mediatek,mt8183-sysirq", "mediatek,mt6577-sysirq": for MT8183
"mediatek,mt8173-sysirq", "mediatek,mt6577-sysirq": for MT8173
"mediatek,mt8135-sysirq", "mediatek,mt6577-sysirq": for MT8135
"mediatek,mt8127-sysirq", "mediatek,mt6577-sysirq": for MT8127
"mediatek,mt7622-sysirq", "mediatek,mt6577-sysirq": for MT7622
"mediatek,mt7623-sysirq", "mediatek,mt6577-sysirq": for MT7623
+ "mediatek,mt7629-sysirq", "mediatek,mt6577-sysirq": for MT7629
"mediatek,mt6795-sysirq", "mediatek,mt6577-sysirq": for MT6795
"mediatek,mt6797-sysirq", "mediatek,mt6577-sysirq": for MT6797
"mediatek,mt6765-sysirq", "mediatek,mt6577-sysirq": for MT6765
diff --git a/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
new file mode 100644
index 000000000000..4d61fe0a98a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/backlight/lm3630a-backlight.yaml
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/backlight/lm3630a-backlight.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI LM3630A High-Efficiency Dual-String White LED
+
+maintainers:
+ - Lee Jones <lee.jones@linaro.org>
+ - Daniel Thompson <daniel.thompson@linaro.org>
+ - Jingoo Han <jingoohan1@gmail.com>
+
+description: |
+ The LM3630A is a current-mode boost converter which supplies the power and
+ controls the current in up to two strings of 10 LEDs per string.
+ https://www.ti.com/product/LM3630A
+
+properties:
+ compatible:
+ const: ti,lm3630a
+
+ reg:
+ maxItems: 1
+
+ ti,linear-mapping-mode:
+ description: |
+ Enable linear mapping mode. If disabled, then it will use exponential
+ mapping mode in which the ramp up/down appears to have a more uniform
+ transition to the human eye.
+ type: boolean
+
+required:
+ - compatible
+ - reg
+
+patternProperties:
+ "^led@[01]$":
+ type: object
+ description: |
+ Properties for a string of connected LEDs.
+
+ properties:
+ reg:
+ description: |
+ The control bank that is used to program the two current sinks. The
+ LM3630A has two control banks (A and B) and are represented as 0 or 1
+ in this property. The two current sinks can be controlled
+ independently with both banks, or bank A can be configured to control
+ both sinks with the led-sources property.
+ maxItems: 1
+ minimum: 0
+ maximum: 1
+
+ label:
+ maxItems: 1
+
+ led-sources:
+ allOf:
+ - minItems: 1
+ maxItems: 2
+ items:
+ minimum: 0
+ maximum: 1
+
+ default-brightness:
+ description: Default brightness level on boot.
+ minimum: 0
+ maximum: 255
+
+ max-brightness:
+ description: Maximum brightness that is allowed during runtime.
+ minimum: 0
+ maximum: 255
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+additionalProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led-controller@38 {
+ compatible = "ti,lm3630a";
+ reg = <0x38>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ led-sources = <0 1>;
+ label = "lcd-backlight";
+ default-brightness = <200>;
+ max-brightness = <255>;
+ };
+ };
+ };
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led-controller@38 {
+ compatible = "ti,lm3630a";
+ reg = <0x38>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ default-brightness = <150>;
+ ti,linear-mapping-mode;
+ };
+
+ led@1 {
+ reg = <1>;
+ default-brightness = <225>;
+ ti,linear-mapping-mode;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/leds/leds-max77650.txt b/Documentation/devicetree/bindings/leds/leds-max77650.txt
new file mode 100644
index 000000000000..3a67115cc1da
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-max77650.txt
@@ -0,0 +1,57 @@
+LED driver for MAX77650 PMIC from Maxim Integrated.
+
+This module is part of the MAX77650 MFD device. For more details
+see Documentation/devicetree/bindings/mfd/max77650.txt.
+
+The LED controller is represented as a sub-node of the PMIC node on
+the device tree.
+
+This device has three current sinks.
+
+Required properties:
+--------------------
+- compatible: Must be "maxim,max77650-led"
+- #address-cells: Must be <1>.
+- #size-cells: Must be <0>.
+
+Each LED is represented as a sub-node of the LED-controller node. Up to
+three sub-nodes can be defined.
+
+Required properties of the sub-node:
+------------------------------------
+
+- reg: Must be <0>, <1> or <2>.
+
+Optional properties of the sub-node:
+------------------------------------
+
+- label: See Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger: See Documentation/devicetree/bindings/leds/common.txt
+
+For more details, please refer to the generic GPIO DT binding document
+<devicetree/bindings/gpio/gpio.txt>.
+
+Example:
+--------
+
+ leds {
+ compatible = "maxim,max77650-led";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ label = "blue:usr0";
+ };
+
+ led@1 {
+ reg = <1>;
+ label = "red:usr1";
+ linux,default-trigger = "heartbeat";
+ };
+
+ led@2 {
+ reg = <2>;
+ label = "green:usr2";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.txt b/Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.txt
new file mode 100644
index 000000000000..282ab81a4ea6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/marvell,armada-3700-rwtm-mailbox.txt
@@ -0,0 +1,16 @@
+* rWTM BIU Mailbox driver for Armada 37xx
+
+Required properties:
+- compatible: must be "marvell,armada-3700-rwtm-mailbox"
+- reg: physical base address of the mailbox and length of memory mapped
+ region
+- interrupts: the IRQ line for the mailbox
+- #mbox-cells: must be 1
+
+Example:
+ rwtm: mailbox@b0000 {
+ compatible = "marvell,armada-3700-rwtm-mailbox";
+ reg = <0xb0000 0x100>;
+ interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
+ #mbox-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt b/Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt
index 9bb5f57e2066..94bf7896a688 100644
--- a/Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/atmel,ebi.txt
@@ -15,6 +15,7 @@ Required properties:
"atmel,at91sam9g45-ebi"
"atmel,at91sam9x5-ebi"
"atmel,sama5d3-ebi"
+ "microchip,sam9x60-ebi"
- reg: Contains offset/length value for EBI memory mapping.
This property might contain several entries if the EBI
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.txt b/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.txt
new file mode 100644
index 000000000000..bcc36c5b543c
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/mmdc.txt
@@ -0,0 +1,35 @@
+Freescale Multi Mode DDR controller (MMDC)
+
+Required properties :
+- compatible : should be one of following:
+ for i.MX6Q/i.MX6DL:
+ - "fsl,imx6q-mmdc";
+ for i.MX6QP:
+ - "fsl,imx6qp-mmdc", "fsl,imx6q-mmdc";
+ for i.MX6SL:
+ - "fsl,imx6sl-mmdc", "fsl,imx6q-mmdc";
+ for i.MX6SLL:
+ - "fsl,imx6sll-mmdc", "fsl,imx6q-mmdc";
+ for i.MX6SX:
+ - "fsl,imx6sx-mmdc", "fsl,imx6q-mmdc";
+ for i.MX6UL/i.MX6ULL/i.MX6ULZ:
+ - "fsl,imx6ul-mmdc", "fsl,imx6q-mmdc";
+ for i.MX7ULP:
+ - "fsl,imx7ulp-mmdc", "fsl,imx6q-mmdc";
+- reg : address and size of MMDC DDR controller registers
+
+Optional properties :
+- clocks : the clock provided by the SoC to access the MMDC registers
+
+Example :
+ mmdc0: memory-controller@21b0000 { /* MMDC0 */
+ compatible = "fsl,imx6q-mmdc";
+ reg = <0x021b0000 0x4000>;
+ clocks = <&clks IMX6QDL_CLK_MMDC_P0_IPG>;
+ };
+
+ mmdc1: memory-controller@21b4000 { /* MMDC1 */
+ compatible = "fsl,imx6q-mmdc";
+ reg = <0x021b4000 0x4000>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
index 3f643ef121ff..5f8880cc757e 100644
--- a/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
+++ b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
@@ -7,6 +7,7 @@ Required properties:
"atmel,sama5d2-hlcdc"
"atmel,sama5d3-hlcdc"
"atmel,sama5d4-hlcdc"
+ "microchip,sam9x60-hlcdc"
- reg: base address and size of the HLCDC device registers.
- clock-names: the name of the 3 clocks requested by the HLCDC device.
Should contain "periph_clk", "sys_clk" and "slow_clk".
diff --git a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
index 004b0158cf4d..3bf92ad37fa1 100644
--- a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
+++ b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
@@ -19,6 +19,8 @@ And these documents for the required sub-node binding details:
[4] Clock: ../clock/cirrus,lochnagar.txt
[5] Pinctrl: ../pinctrl/cirrus,lochnagar.txt
[6] Regulator: ../regulator/cirrus,lochnagar.txt
+ [7] Sound: ../sound/cirrus,lochnagar.txt
+ [8] Hardware Monitor: ../hwmon/cirrus,lochnagar.txt
Required properties:
@@ -41,6 +43,11 @@ Optional sub-nodes:
- Bindings for the regulator components, see [6]. Only available on
Lochnagar 2.
+ - lochnagar-sc : Binding for the sound card components, see [7].
+ Only available on Lochnagar 2.
+ - lochnagar-hwmon : Binding for the hardware monitor components, see [8].
+ Only available on Lochnagar 2.
+
Optional properties:
- present-gpios : Host present line, indicating the presence of a
@@ -65,4 +72,14 @@ lochnagar: lochnagar@22 {
compatible = "cirrus,lochnagar-pinctrl";
...
};
+
+ lochnagar-sc {
+ compatible = "cirrus,lochnagar2-soundcard";
+ ...
+ };
+
+ lochnagar-hwmon {
+ compatible = "cirrus,lochnagar2-hwmon";
+ ...
+ };
};
diff --git a/Documentation/devicetree/bindings/mfd/max77620.txt b/Documentation/devicetree/bindings/mfd/max77620.txt
index 9c16d51cc15b..5a642a51d58e 100644
--- a/Documentation/devicetree/bindings/mfd/max77620.txt
+++ b/Documentation/devicetree/bindings/mfd/max77620.txt
@@ -4,7 +4,8 @@ Required properties:
-------------------
- compatible: Must be one of
"maxim,max77620"
- "maxim,max20024".
+ "maxim,max20024"
+ "maxim,max77663"
- reg: I2C device address.
Optional properties:
@@ -17,6 +18,11 @@ Optional properties:
IRQ numbers for different interrupt source of MAX77620
are defined at dt-bindings/mfd/max77620.h.
+- system-power-controller: Indicates that this PMIC is controlling the
+ system power, see [1] for more details.
+
+[1] Documentation/devicetree/bindings/power/power-controller.txt
+
Optional subnodes and their properties:
=======================================
@@ -105,6 +111,7 @@ Optional properties:
Here supported time periods by device in microseconds are as follows:
MAX77620 supports 40, 80, 160, 320, 640, 1280, 2560 and 5120 microseconds.
MAX20024 supports 20, 40, 80, 160, 320, 640, 1280 and 2540 microseconds.
+MAX77663 supports 20, 40, 80, 160, 320, 640, 1280 and 2540 microseconds.
-maxim,power-ok-control: configure map power ok bit
1: Enables POK(Power OK) to control nRST_IO and GPIO1
diff --git a/Documentation/devicetree/bindings/mfd/max77650.txt b/Documentation/devicetree/bindings/mfd/max77650.txt
new file mode 100644
index 000000000000..b529d8d19335
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max77650.txt
@@ -0,0 +1,46 @@
+MAX77650 ultra low-power PMIC from Maxim Integrated.
+
+Required properties:
+-------------------
+- compatible: Must be "maxim,max77650"
+- reg: I2C device address.
+- interrupts: The interrupt on the parent the controller is
+ connected to.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Must be <2>.
+
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Must be <2>. The first cell is the pin number and
+ the second cell is used to specify the gpio active
+ state.
+
+Optional properties:
+--------------------
+gpio-line-names: Single string containing the name of the GPIO line.
+
+The GPIO-controller module is represented as part of the top-level PMIC
+node. The device exposes a single GPIO line.
+
+For device-tree bindings of other sub-modules (regulator, power supply,
+LEDs and onkey) refer to the binding documents under the respective
+sub-system directories.
+
+For more details on GPIO bindings, please refer to the generic GPIO DT
+binding document <devicetree/bindings/gpio/gpio.txt>.
+
+Example:
+--------
+
+ pmic@48 {
+ compatible = "maxim,max77650";
+ reg = <0x48>;
+
+ interrupt-controller;
+ interrupt-parent = <&gpio2>;
+ #interrupt-cells = <2>;
+ interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names = "max77650-charger";
+ };
diff --git a/Documentation/devicetree/bindings/mfd/stmfx.txt b/Documentation/devicetree/bindings/mfd/stmfx.txt
new file mode 100644
index 000000000000..f0c2f7fcf5c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/stmfx.txt
@@ -0,0 +1,28 @@
+STMicroelectonics Multi-Function eXpander (STMFX) Core bindings
+
+ST Multi-Function eXpander (STMFX) is a slave controller using I2C for
+communication with the main MCU. Its main features are GPIO expansion, main
+MCU IDD measurement (IDD is the amount of current that flows through VDD) and
+resistive touchscreen controller.
+
+Required properties:
+- compatible: should be "st,stmfx-0300".
+- reg: I2C slave address of the device.
+- interrupts: interrupt specifier triggered by MFX_IRQ_OUT signal.
+ Please refer to ../interrupt-controller/interrupt.txt
+
+Optional properties:
+- drive-open-drain: configure MFX_IRQ_OUT as open drain.
+- vdd-supply: phandle of the regulator supplying STMFX.
+
+Example:
+
+ stmfx: stmfx@42 {
+ compatible = "st,stmfx-0300";
+ reg = <0x42>;
+ interrupts = <8 IRQ_TYPE_EDGE_RISING>;
+ interrupt-parent = <&gpioi>;
+ vdd-supply = <&v3v3>;
+ };
+
+Please refer to ../pinctrl/pinctrl-stmfx.txt for STMFX GPIO expander function bindings.
diff --git a/Documentation/devicetree/bindings/mfd/ti-lmu.txt b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
index 980394d701a7..86ca786d54fc 100644
--- a/Documentation/devicetree/bindings/mfd/ti-lmu.txt
+++ b/Documentation/devicetree/bindings/mfd/ti-lmu.txt
@@ -104,8 +104,8 @@ lm3632@11 {
regulators {
compatible = "ti,lm363x-regulator";
- ti,lcm-en1-gpio = <&pioC 0 GPIO_ACTIVE_HIGH>; /* PC0 */
- ti,lcm-en2-gpio = <&pioC 1 GPIO_ACTIVE_HIGH>; /* PC1 */
+ enable-gpios = <&pioC 0 GPIO_ACTIVE_HIGH>,
+ <&pioC 1 GPIO_ACTIVE_HIGH>;
vboost {
regulator-name = "lcd_boost";
diff --git a/Documentation/devicetree/bindings/misc/intel,ixp4xx-queue-manager.yaml b/Documentation/devicetree/bindings/misc/intel,ixp4xx-queue-manager.yaml
new file mode 100644
index 000000000000..d2313b1d9405
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/intel,ixp4xx-queue-manager.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/misc/intel-ixp4xx-ahb-queue-manager.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx AHB Queue Manager
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The IXP4xx AHB Queue Manager maintains queues as circular buffers in
+ an 8KB embedded SRAM along with hardware pointers. It is used by both
+ the XScale processor and the NPEs (Network Processing Units) in the
+ IXP4xx for accelerating queues, especially for networking. Clients pick
+ queues from the queue manager with foo-queue = <&qmgr N> where the
+ &qmgr is a phandle to the queue manager and N is the queue resource
+ number. The queue resources available and their specific purpose
+ on a certain IXP4xx system will vary.
+
+properties:
+ compatible:
+ items:
+ - const: intel,ixp4xx-ahb-queue-manager
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Interrupt for queues 0-31
+ - description: Interrupt for queues 32-63
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ qmgr: queue-manager@60000000 {
+ compatible = "intel,ixp4xx-ahb-queue-manager";
+ reg = <0x60000000 0x4000>;
+ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>, <4 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
index 07242d141773..36c4bea675d5 100644
--- a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
@@ -13,6 +13,8 @@ Required Properties:
* compatible: should be one of the following.
- "hisilicon,hi3660-dw-mshc": for controllers with hi3660 specific extensions.
+ - "hisilicon,hi3670-dw-mshc", "hisilicon,hi3660-dw-mshc": for controllers
+ with hi3670 specific extensions.
- "hisilicon,hi4511-dw-mshc": for controllers with hi4511 specific extensions.
- "hisilicon,hi6220-dw-mshc": for controllers with hi6220 specific extensions.
diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
new file mode 100644
index 000000000000..fbd4da3684fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/allwinner,sun4i-a10-nand.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 NAND Controller Device Tree Bindings
+
+allOf:
+ - $ref: "nand-controller.yaml"
+
+maintainers:
+ - Chen-Yu Tsai <wens@csie.org>
+ - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+ "#address-cells": true
+ "#size-cells": true
+
+ compatible:
+ enum:
+ - allwinner,sun4i-a10-nand
+ - allwinner,sun8i-a23-nand-controller
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: Bus Clock
+ - description: Module Clock
+
+ clock-names:
+ items:
+ - const: ahb
+ - const: mod
+
+ resets:
+ maxItems: 1
+
+ reset-names:
+ const: ahb
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ const: rxtx
+
+ pinctrl-names: true
+
+patternProperties:
+ "^pinctrl-[0-9]+$": true
+
+ "^nand@[a-f0-9]+$":
+ properties:
+ reg:
+ maxItems: 1
+ minimum: 0
+ maximum: 7
+
+ nand-ecc-mode: true
+
+ nand-ecc-algo:
+ const: bch
+
+ nand-ecc-step-size:
+ enum: [ 512, 1024 ]
+
+ nand-ecc-strength:
+ maximum: 80
+
+ allwinner,rb:
+ description:
+ Contains the native Ready/Busy IDs.
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ - minItems: 1
+ maxItems: 2
+ items:
+ minimum: 0
+ maximum: 1
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index 9bb66e476672..68b51dc58816 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -14,6 +14,7 @@ Required properties:
"atmel,at91sam9261-nand-controller"
"atmel,at91sam9g45-nand-controller"
"atmel,sama5d3-nand-controller"
+ "microchip,sam9x60-nand-controller"
- ranges: empty ranges property to forward EBI ranges definitions.
- #address-cells: should be set to 2.
- #size-cells: should be set to 1.
diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index f33da8782741..b14b6751c2f3 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -7,34 +7,48 @@ Required properties:
"socionext,uniphier-denali-nand-v5b" - for Socionext UniPhier (v5b)
- reg : should contain registers location and length for data and reg.
- reg-names: Should contain the reg names "nand_data" and "denali_reg"
+ - #address-cells: should be 1. The cell encodes the chip select connection.
+ - #size-cells : should be 0.
- interrupts : The interrupt number.
- clocks: should contain phandle of the controller core clock, the bus
interface clock, and the ECC circuit clock.
- clock-names: should contain "nand", "nand_x", "ecc"
-Optional properties:
- - nand-ecc-step-size: see nand.txt for details. If present, the value must be
- 512 for "altr,socfpga-denali-nand"
- 1024 for "socionext,uniphier-denali-nand-v5a"
- 1024 for "socionext,uniphier-denali-nand-v5b"
- - nand-ecc-strength: see nand.txt for details. Valid values are:
- 8, 15 for "altr,socfpga-denali-nand"
- 8, 16, 24 for "socionext,uniphier-denali-nand-v5a"
- 8, 16 for "socionext,uniphier-denali-nand-v5b"
- - nand-ecc-maximize: see nand.txt for details
-
-The device tree may optionally contain sub-nodes describing partitions of the
+Sub-nodes:
+ Sub-nodes represent available NAND chips.
+
+ Required properties:
+ - reg: should contain the bank ID of the controller to which each chip
+ select is connected.
+
+ Optional properties:
+ - nand-ecc-step-size: see nand.txt for details.
+ If present, the value must be
+ 512 for "altr,socfpga-denali-nand"
+ 1024 for "socionext,uniphier-denali-nand-v5a"
+ 1024 for "socionext,uniphier-denali-nand-v5b"
+ - nand-ecc-strength: see nand.txt for details. Valid values are:
+ 8, 15 for "altr,socfpga-denali-nand"
+ 8, 16, 24 for "socionext,uniphier-denali-nand-v5a"
+ 8, 16 for "socionext,uniphier-denali-nand-v5b"
+ - nand-ecc-maximize: see nand.txt for details
+
+The chip nodes may optionally contain sub-nodes describing partitions of the
address space. See partition.txt for more detail.
Examples:
nand: nand@ff900000 {
#address-cells = <1>;
- #size-cells = <1>;
+ #size-cells = <0>;
compatible = "altr,socfpga-denali-nand";
reg = <0xff900000 0x20>, <0xffb80000 0x1000>;
reg-names = "nand_data", "denali_reg";
clocks = <&nand_clk>, <&nand_x_clk>, <&nand_ecc_clk>;
clock-names = "nand", "nand_x", "ecc";
interrupts = <0 144 4>;
+
+ nand@0 {
+ reg = <0>;
+ }
};
diff --git a/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt b/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt
index 29ea5853ca91..c02259353327 100644
--- a/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/ingenic,jz4780-nand.txt
@@ -1,4 +1,4 @@
-* Ingenic JZ4780 NAND/BCH
+* Ingenic JZ4780 NAND/ECC
This file documents the device tree bindings for NAND flash devices on the
JZ4780. NAND devices are connected to the NEMC controller (described in
@@ -6,15 +6,18 @@ memory-controllers/ingenic,jz4780-nemc.txt), and thus NAND device nodes must
be children of the NEMC node.
Required NAND controller device properties:
-- compatible: Should be set to "ingenic,jz4780-nand".
+- compatible: Should be one of:
+ * ingenic,jz4740-nand
+ * ingenic,jz4725b-nand
+ * ingenic,jz4780-nand
- reg: For each bank with a NAND chip attached, should specify a bank number,
an offset of 0 and a size of 0x1000000 (i.e. the whole NEMC bank).
Optional NAND controller device properties:
-- ingenic,bch-controller: To make use of the hardware BCH controller, this
- property must contain a phandle for the BCH controller node. The required
+- ecc-engine: To make use of the hardware ECC controller, this
+ property must contain a phandle for the ECC controller node. The required
properties for this node are described below. If this is not specified,
- software BCH will be used instead.
+ software ECC will be used instead.
Optional children nodes:
- Individual NAND chips are children of the NAND controller node.
@@ -45,7 +48,7 @@ nemc: nemc@13410000 {
#address-cells = <1>;
#size-cells = <0>;
- ingenic,bch-controller = <&bch>;
+ ecc-engine = <&bch>;
nand@1 {
reg = <1>;
@@ -67,14 +70,17 @@ nemc: nemc@13410000 {
};
};
-The BCH controller is a separate SoC component used for error correction on
+The ECC controller is a separate SoC component used for error correction on
NAND devices. The following is a description of the device properties for a
-BCH controller.
-
-Required BCH properties:
-- compatible: Should be set to "ingenic,jz4780-bch".
-- reg: Should specify the BCH controller registers location and length.
-- clocks: Clock for the BCH controller.
+ECC controller.
+
+Required ECC properties:
+- compatible: Should be one of:
+ * ingenic,jz4740-ecc
+ * ingenic,jz4725b-bch
+ * ingenic,jz4780-bch
+- reg: Should specify the ECC controller registers location and length.
+- clocks: Clock for the ECC controller.
Example:
diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
index 7df0dcaccb7d..c69f4f065d23 100644
--- a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
@@ -96,3 +96,19 @@ An example using SRAM:
bank-width = <2>;
};
+An example using gpio-addrs
+
+ flash@20000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash", "jedec-flash";
+ reg = <0x20000000 0x02000000>;
+ ranges = <0 0x00000000 0x02000000
+ 1 0x02000000 0x02000000>;
+ bank-width = <2>;
+ addr-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+ partition@0 {
+ label = "test-part1";
+ reg = <0 0x04000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
new file mode 100644
index 000000000000..199ba5ac2a06
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -0,0 +1,143 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/nand-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NAND Chip and NAND Controller Generic Binding
+
+maintainers:
+ - Miquel Raynal <miquel.raynal@bootlin.com>
+ - Richard Weinberger <richard@nod.at>
+
+description: |
+ The NAND controller should be represented with its own DT node, and
+ all NAND chips attached to this controller should be defined as
+ children nodes of the NAND controller. This representation should be
+ enforced even for simple controllers supporting only one chip.
+
+ The ECC strength and ECC step size properties define the user
+ desires in terms of correction capability of a controller. Together,
+ they request the ECC engine to correct {strength} bit errors per
+ {size} bytes.
+
+ The interpretation of these parameters is implementation-defined, so
+ not all implementations must support all possible
+ combinations. However, implementations are encouraged to further
+ specify the value(s) they support.
+
+properties:
+ $nodename:
+ pattern: "^nand-controller(@.*)?"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ ranges: true
+
+patternProperties:
+ "^nand@[a-f0-9]$":
+ properties:
+ reg:
+ description:
+ Contains the native Ready/Busy IDs.
+
+ nand-ecc-mode:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/string
+ - enum: [ none, soft, hw, hw_syndrome, hw_oob_first, on-die ]
+ description:
+ Desired ECC engine, either hardware (most of the time
+ embedded in the NAND controller) or software correction
+ (Linux will handle the calculations). soft_bch is deprecated
+ and should be replaced by soft and nand-ecc-algo.
+
+ nand-ecc-algo:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/string
+ - enum: [ hamming, bch, rs ]
+ description:
+ Desired ECC algorithm.
+
+ nand-bus-width:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - enum: [ 8, 16 ]
+ - default: 8
+ description:
+ Bus width to the NAND chip
+
+ nand-on-flash-bbt:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ With this property, the OS will search the device for a Bad
+ Block Table (BBT). If not found, it will create one, reserve
+ a few blocks at the end of the device to store it and update
+ it as the device ages. Otherwise, the out-of-band area of a
+ few pages of all the blocks will be scanned at boot time to
+ find Bad Block Markers (BBM). These markers will help to
+ build a volatile BBT in RAM.
+
+ nand-ecc-strength:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - minimum: 1
+ description:
+ Maximum number of bits that can be corrected per ECC step.
+
+ nand-ecc-step-size:
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
+ - minimum: 1
+ description:
+ Number of data bytes covered by a single ECC step.
+
+ nand-ecc-maximize:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Whether or not the ECC strength should be maximized. The
+ maximum ECC strength is both controller and chip
+ dependent. The ECC engine has to select the ECC config
+ providing the best strength and taking the OOB area size
+ constraint into account. This is particularly useful when
+ only the in-band area is used by the upper layers, and you
+ want to make your NAND as reliable as possible.
+
+ nand-is-boot-medium:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Whether or not the NAND chip is a boot medium. Drivers might
+ use this information to select ECC algorithms supported by
+ the boot ROM or similar restrictions.
+
+ nand-rb:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ description:
+ Contains the native Ready/Busy IDs.
+
+ required:
+ - reg
+
+required:
+ - "#address-cells"
+ - "#size-cells"
+
+examples:
+ - |
+ nand-controller {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* controller specific properties */
+
+ nand@0 {
+ reg = <0>;
+ nand-ecc-mode = "soft";
+ nand-ecc-algo = "bch";
+
+ /* controller specific properties */
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
deleted file mode 100644
index e949c778e983..000000000000
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-* NAND chip and NAND controller generic binding
-
-NAND controller/NAND chip representation:
-
-The NAND controller should be represented with its own DT node, and all
-NAND chips attached to this controller should be defined as children nodes
-of the NAND controller. This representation should be enforced even for
-simple controllers supporting only one chip.
-
-Mandatory NAND controller properties:
-- #address-cells: depends on your controller. Should at least be 1 to
- encode the CS line id.
-- #size-cells: depends on your controller. Put zero unless you need a
- mapping between CS lines and dedicated memory regions
-
-Optional NAND controller properties
-- ranges: only needed if you need to define a mapping between CS lines and
- memory regions
-
-Optional NAND chip properties:
-
-- nand-ecc-mode : String, operation mode of the NAND ecc mode.
- Supported values are: "none", "soft", "hw", "hw_syndrome",
- "hw_oob_first", "on-die".
- Deprecated values:
- "soft_bch": use "soft" and nand-ecc-algo instead
-- nand-ecc-algo: string, algorithm of NAND ECC.
- Valid values are: "hamming", "bch", "rs".
-- nand-bus-width : 8 or 16 bus width if not present 8
-- nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
-
-- nand-ecc-strength: integer representing the number of bits to correct
- per ECC step.
-
-- nand-ecc-step-size: integer representing the number of data bytes
- that are covered by a single ECC step.
-
-- nand-ecc-maximize: boolean used to specify that you want to maximize ECC
- strength. The maximum ECC strength is both controller and
- chip dependent. The controller side has to select the ECC
- config providing the best strength and taking the OOB area
- size constraint into account.
- This is particularly useful when only the in-band area is
- used by the upper layers, and you want to make your NAND
- as reliable as possible.
-- nand-is-boot-medium: Whether the NAND chip is a boot medium. Drivers might use
- this information to select ECC algorithms supported by
- the boot ROM or similar restrictions.
-
-- nand-rb: shall contain the native Ready/Busy ids.
-
-The ECC strength and ECC step size properties define the correction capability
-of a controller. Together, they say a controller can correct "{strength} bit
-errors per {size} bytes".
-
-The interpretation of these parameters is implementation-defined, so not all
-implementations must support all possible combinations. However, implementations
-are encouraged to further specify the value(s) they support.
-
-Example:
-
- nand-controller {
- #address-cells = <1>;
- #size-cells = <0>;
-
- /* controller specific properties */
-
- nand@0 {
- reg = <0>;
- nand-ecc-mode = "soft";
- nand-ecc-algo = "bch";
-
- /* controller specific properties */
- };
- };
diff --git a/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.txt b/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.txt
new file mode 100644
index 000000000000..d5c5616f6db5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partitions/arm,arm-firmware-suite.txt
@@ -0,0 +1,17 @@
+ARM AFS - ARM Firmware Suite Partitions
+=======================================
+
+The ARM Firmware Suite is a flash partitioning system found on the
+ARM reference designs: Integrator AP, Integrator CP, Versatile AB,
+Versatile PB, the RealView family, Versatile Express and Juno.
+
+Required properties:
+- compatible : (required) must be "arm,arm-firmware-suite"
+
+Example:
+
+flash@0 {
+ partitions {
+ compatible = "arm,arm-firmware-suite";
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-cfe-nor-partitions.txt b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-cfe-nor-partitions.txt
new file mode 100644
index 000000000000..9f630e95f180
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-cfe-nor-partitions.txt
@@ -0,0 +1,24 @@
+Broadcom BCM963XX CFE Loader NOR Flash Partitions
+=================================================
+
+Most Broadcom BCM63XX SoC based devices follow the Broadcom reference layout for
+NOR. The first erase block used for the CFE bootloader, the last for an
+NVRAM partition, and the remainder in-between for one to two firmware partitions
+at fixed offsets. A valid firmware partition is identified by the ImageTag
+header found at beginning of the second erase block, containing the rootfs and
+kernel offsets and sizes within the firmware partition.
+
+Required properties:
+- compatible : must be "brcm,bcm963xx-cfe-nor-partitions"
+
+Example:
+
+flash@1fc00000 {
+ compatible = "cfi-flash";
+ reg = <0x1fc00000 0x400000>;
+ bank-width = <2>;
+
+ partitions {
+ compatible = "brcm,bcm963xx-cfe-nor-partitions";
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-imagetag.txt b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-imagetag.txt
new file mode 100644
index 000000000000..f8b7418ed817
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partitions/brcm,bcm963xx-imagetag.txt
@@ -0,0 +1,45 @@
+Broadcom BCM963XX ImageTag Partition Container
+==============================================
+
+Some Broadcom BCM63XX SoC based devices contain additional, non discoverable
+partitions or non standard bootloader partition sizes. For these a mixed layout
+needs to be used with an explicit firmware partition.
+
+The BCM963XX ImageTag is a simple firmware header describing the offsets and
+sizes of the rootfs and kernel parts contained in the firmware.
+
+Required properties:
+- compatible : must be "brcm,bcm963xx-imagetag"
+
+Example:
+
+flash@1e000000 {
+ compatible = "cfi-flash";
+ reg = <0x1e000000 0x2000000>;
+ bank-width = <2>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cfe@0 {
+ reg = <0x0 0x10000>;
+ read-only;
+ };
+
+ firmware@10000 {
+ reg = <0x10000 0x7d0000>;
+ compatible = "brcm,bcm963xx-imagetag";
+ };
+
+ caldata@7e0000 {
+ reg = <0x7e0000 0x10000>;
+ read-only;
+ };
+
+ nvram@7f0000 {
+ reg = <0x7f0000 0x10000>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
deleted file mode 100644
index dcd5a5d80dc0..000000000000
--- a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Allwinner NAND Flash Controller (NFC)
-
-Required properties:
-- compatible : "allwinner,sun4i-a10-nand".
-- reg : shall contain registers location and length for data and reg.
-- interrupts : shall define the nand controller interrupt.
-- #address-cells: shall be set to 1. Encode the nand CS.
-- #size-cells : shall be set to 0.
-- clocks : shall reference nand controller clocks.
-- clock-names : nand controller internal clock names. Shall contain :
- * "ahb" : AHB gating clock
- * "mod" : nand controller clock
-
-Optional properties:
-- dmas : shall reference DMA channel associated to the NAND controller.
-- dma-names : shall be "rxtx".
-
-Optional children nodes:
-Children nodes represent the available nand chips.
-
-Optional properties:
-- reset : phandle + reset specifier pair
-- reset-names : must contain "ahb"
-- allwinner,rb : shall contain the native Ready/Busy ids.
-- nand-ecc-mode : one of the supported ECC modes ("hw", "soft", "soft_bch" or
- "none")
-
-see Documentation/devicetree/bindings/mtd/nand.txt for generic bindings.
-
-
-Examples:
-nfc: nand@1c03000 {
- compatible = "allwinner,sun4i-a10-nand";
- reg = <0x01c03000 0x1000>;
- interrupts = <0 37 1>;
- clocks = <&ahb_gates 13>, <&nand_clk>;
- clock-names = "ahb", "mod";
- #address-cells = <1>;
- #size-cells = <0>;
- pinctrl-names = "default";
- pinctrl-0 = <&nand_pins_a &nand_cs0_pins_a &nand_rb0_pins_a>;
-
- nand@0 {
- reg = <0>;
- allwinner,rb = <0>;
- nand-ecc-mode = "soft_bch";
- };
-};
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
index 3a65aabc76a2..6262c2f293b0 100644
--- a/Documentation/devicetree/bindings/net/keystone-netcp.txt
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -139,9 +139,9 @@ Optional properties:
sub-module attached to this interface.
The MAC address will be determined using the optional properties defined in
-ethernet.txt, as provided by the of_get_mac_address API and only if efuse-mac
-is set to 0. If any of the optional MAC address properties are not present,
-then the driver will use random MAC address.
+ethernet.txt and only if efuse-mac is set to 0. If all of the optional MAC
+address properties are not present, then the driver will use a random MAC
+address.
Example binding:
diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
index 74665502f4cf..7e675dafc256 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
@@ -16,8 +16,8 @@ Optional properties:
- ieee80211-freq-limit: See ieee80211.txt
- mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data
-The driver is using of_get_mac_address API, so the MAC address can be as well
-be set with corresponding optional properties defined in net/ethernet.txt.
+The MAC address can as well be set with corresponding optional properties
+defined in net/ethernet.txt.
Optional nodes:
- led: Properties for a connected LED
diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
index c124f9bc11f3..5561a1c060d0 100644
--- a/Documentation/devicetree/bindings/pci/designware-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -4,8 +4,11 @@ Required properties:
- compatible:
"snps,dw-pcie" for RC mode;
"snps,dw-pcie-ep" for EP mode;
-- reg: Should contain the configuration address space.
-- reg-names: Must be "config" for the PCIe configuration space.
+- reg: For designware cores version < 4.80 contains the configuration
+ address space. For designware core version >= 4.80, contains
+ the configuration and ATU address space
+- reg-names: Must be "config" for the PCIe configuration space and "atu" for
+ the ATU address space.
(The old way of getting the configuration address space from "ranges"
is deprecated and should be avoided.)
- num-lanes: number of lanes to use
diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt
index 2030ee0dc4f9..47202a2938f2 100644
--- a/Documentation/devicetree/bindings/pci/pci-keystone.txt
+++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt
@@ -11,16 +11,24 @@ described here as well as properties that are not applicable.
Required Properties:-
-compatibility: "ti,keystone-pcie"
-reg: index 1 is the base address and length of DW application registers.
- index 2 is the base address and length of PCI device ID register.
+compatibility: Should be "ti,keystone-pcie" for RC on Keystone2 SoC
+ Should be "ti,am654-pcie-rc" for RC on AM654x SoC
+reg: Three register ranges as listed in the reg-names property
+reg-names: "dbics" for the DesignWare PCIe registers, "app" for the
+ TI specific application registers, "config" for the
+ configuration space address
pcie_msi_intc : Interrupt controller device node for MSI IRQ chip
interrupt-cells: should be set to 1
interrupts: GIC interrupt lines connected to PCI MSI interrupt lines
+ (required if the compatible is "ti,keystone-pcie")
+msi-map: As specified in Documentation/devicetree/bindings/pci/pci-msi.txt
+ (required if the compatible is "ti,am654-pcie-rc".
ti,syscon-pcie-id : phandle to the device control module required to set device
id and vendor id.
+ti,syscon-pcie-mode : phandle to the device control module required to configure
+ PCI in either RC mode or EP mode.
Example:
pcie_msi_intc: msi-interrupt-controller {
@@ -61,3 +69,47 @@ Optional properties:-
DesignWare DT Properties not applicable for Keystone PCI
1. pcie_bus clock-names not used. Instead, a phandle to phys is used.
+
+AM654 PCIe Endpoint
+===================
+
+Required Properties:-
+
+compatibility: Should be "ti,am654-pcie-ep" for EP on AM654x SoC
+reg: Four register ranges as listed in the reg-names property
+reg-names: "dbics" for the DesignWare PCIe registers, "app" for the
+ TI specific application registers, "atu" for the
+ Address Translation Unit configuration registers and
+ "addr_space" used to map remote RC address space
+num-ib-windows: As specified in
+ Documentation/devicetree/bindings/pci/designware-pcie.txt
+num-ob-windows: As specified in
+ Documentation/devicetree/bindings/pci/designware-pcie.txt
+num-lanes: As specified in
+ Documentation/devicetree/bindings/pci/designware-pcie.txt
+power-domains: As documented by the generic PM domain bindings in
+ Documentation/devicetree/bindings/power/power_domain.txt.
+ti,syscon-pcie-mode: phandle to the device control module required to configure
+ PCI in either RC mode or EP mode.
+
+Optional properties:-
+
+phys: list of PHY specifiers (used by generic PHY framework)
+phy-names: must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
+ number of lanes as specified in *num-lanes* property.
+("phys" and "phy-names" DT bindings are specified in
+Documentation/devicetree/bindings/phy/phy-bindings.txt)
+interrupts: platform interrupt for error interrupts.
+
+pcie-ep {
+ compatible = "ti,am654-pcie-ep";
+ reg = <0x5500000 0x1000>, <0x5501000 0x1000>,
+ <0x10000000 0x8000000>, <0x5506000 0x1000>;
+ reg-names = "app", "dbics", "addr_space", "atu";
+ power-domains = <&k3_pds 120>;
+ ti,syscon-pcie-mode = <&pcie0_mode>;
+ num-lanes = <1>;
+ num-ib-windows = <16>;
+ num-ob-windows = <16>;
+ interrupts = <GIC_SPI 340 IRQ_TYPE_EDGE_RISING>;
+};
diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt
index c77981c5dd18..92c01db610df 100644
--- a/Documentation/devicetree/bindings/pci/pci.txt
+++ b/Documentation/devicetree/bindings/pci/pci.txt
@@ -24,3 +24,53 @@ driver implementation may support the following properties:
unsupported link speed, for instance, trying to do training for
unsupported link speed, etc. Must be '4' for gen4, '3' for gen3, '2'
for gen2, and '1' for gen1. Any other values are invalid.
+
+PCI-PCI Bridge properties
+-------------------------
+
+PCIe root ports and switch ports may be described explicitly in the device
+tree, as children of the host bridge node. Even though those devices are
+discoverable by probing, it might be necessary to describe properties that
+aren't provided by standard PCIe capabilities.
+
+Required properties:
+
+- reg:
+ Identifies the PCI-PCI bridge. As defined in the IEEE Std 1275-1994
+ document, it is a five-cell address encoded as (phys.hi phys.mid
+ phys.lo size.hi size.lo). phys.hi should contain the device's BDF as
+ 0b00000000 bbbbbbbb dddddfff 00000000. The other cells should be zero.
+
+ The bus number is defined by firmware, through the standard bridge
+ configuration mechanism. If this port is a switch port, then firmware
+ allocates the bus number and writes it into the Secondary Bus Number
+ register of the bridge directly above this port. Otherwise, the bus
+ number of a root port is the first number in the bus-range property,
+ defaulting to zero.
+
+ If firmware leaves the ARI Forwarding Enable bit set in the bridge
+ above this port, then phys.hi contains the 8-bit function number as
+ 0b00000000 bbbbbbbb ffffffff 00000000. Note that the PCIe specification
+ recommends that firmware only leaves ARI enabled when it knows that the
+ OS is ARI-aware.
+
+Optional properties:
+
+- external-facing:
+ When present, the port is external-facing. All bridges and endpoints
+ downstream of this port are external to the machine. The OS can, for
+ example, use this information to identify devices that cannot be
+ trusted with relaxed DMA protection, as users could easily attach
+ malicious devices to this port.
+
+Example:
+
+pcie@10000000 {
+ compatible = "pci-host-ecam-generic";
+ ...
+ pcie@0008 {
+ /* Root port 00:01.0 is external-facing */
+ reg = <0x00000800 0 0 0 0>;
+ external-facing;
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt
new file mode 100644
index 000000000000..c1b4c1819b84
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-stmfx.txt
@@ -0,0 +1,116 @@
+STMicroelectronics Multi-Function eXpander (STMFX) GPIO expander bindings
+
+ST Multi-Function eXpander (STMFX) offers up to 24 GPIOs expansion.
+Please refer to ../mfd/stmfx.txt for STMFX Core bindings.
+
+Required properties:
+- compatible: should be "st,stmfx-0300-pinctrl".
+- #gpio-cells: should be <2>, the first cell is the GPIO number and the second
+ cell is the gpio flags in accordance with <dt-bindings/gpio/gpio.h>.
+- gpio-controller: marks the device as a GPIO controller.
+- #interrupt-cells: should be <2>, the first cell is the GPIO number and the
+ second cell is the interrupt flags in accordance with
+ <dt-bindings/interrupt-controller/irq.h>.
+- interrupt-controller: marks the device as an interrupt controller.
+- gpio-ranges: specifies the mapping between gpio controller and pin
+ controller pins. Check "Concerning gpio-ranges property" below.
+Please refer to ../gpio/gpio.txt.
+
+Please refer to pinctrl-bindings.txt for pin configuration.
+
+Required properties for pin configuration sub-nodes:
+- pins: list of pins to which the configuration applies.
+
+Optional properties for pin configuration sub-nodes (pinconf-generic ones):
+- bias-disable: disable any bias on the pin.
+- bias-pull-up: the pin will be pulled up.
+- bias-pull-pin-default: use the pin-default pull state.
+- bias-pull-down: the pin will be pulled down.
+- drive-open-drain: the pin will be driven with open drain.
+- drive-push-pull: the pin will be driven actively high and low.
+- output-high: the pin will be configured as an output driving high level.
+- output-low: the pin will be configured as an output driving low level.
+
+Note that STMFX pins[15:0] are called "gpio[15:0]", and STMFX pins[23:16] are
+called "agpio[7:0]". Example, to refer to pin 18 of STMFX, use "agpio2".
+
+Concerning gpio-ranges property:
+- if all STMFX pins[24:0] are available (no other STMFX function in use), you
+ should use gpio-ranges = <&stmfx_pinctrl 0 0 24>;
+- if agpio[3:0] are not available (STMFX Touchscreen function in use), you
+ should use gpio-ranges = <&stmfx_pinctrl 0 0 16>, <&stmfx_pinctrl 20 20 4>;
+- if agpio[7:4] are not available (STMFX IDD function in use), you
+ should use gpio-ranges = <&stmfx_pinctrl 0 0 20>;
+
+
+Example:
+
+ stmfx: stmfx@42 {
+ ...
+
+ stmfx_pinctrl: stmfx-pin-controller {
+ compatible = "st,stmfx-0300-pinctrl";
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ gpio-ranges = <&stmfx_pinctrl 0 0 24>;
+
+ joystick_pins: joystick {
+ pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4";
+ drive-push-pull;
+ bias-pull-up;
+ };
+ };
+ };
+
+Example of STMFX GPIO consumers:
+
+ joystick {
+ compatible = "gpio-keys";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-0 = <&joystick_pins>;
+ pinctrl-names = "default";
+ button-0 {
+ label = "JoySel";
+ linux,code = <KEY_ENTER>;
+ interrupt-parent = <&stmfx_pinctrl>;
+ interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+ };
+ button-1 {
+ label = "JoyDown";
+ linux,code = <KEY_DOWN>;
+ interrupt-parent = <&stmfx_pinctrl>;
+ interrupts = <1 IRQ_TYPE_EDGE_RISING>;
+ };
+ button-2 {
+ label = "JoyLeft";
+ linux,code = <KEY_LEFT>;
+ interrupt-parent = <&stmfx_pinctrl>;
+ interrupts = <2 IRQ_TYPE_EDGE_RISING>;
+ };
+ button-3 {
+ label = "JoyRight";
+ linux,code = <KEY_RIGHT>;
+ interrupt-parent = <&stmfx_pinctrl>;
+ interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+ };
+ button-4 {
+ label = "JoyUp";
+ linux,code = <KEY_UP>;
+ interrupt-parent = <&stmfx_pinctrl>;
+ interrupts = <4 IRQ_TYPE_EDGE_RISING>;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+ orange {
+ gpios = <&stmfx_pinctrl 17 1>;
+ };
+
+ blue {
+ gpios = <&stmfx_pinctrl 19 1>;
+ };
+ }
diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt b/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
index 1cd050b4054c..0fdc3dd1125e 100644
--- a/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
+++ b/Documentation/devicetree/bindings/power/amlogic,meson-gx-pwrc.txt
@@ -16,7 +16,9 @@ Device Tree Bindings:
---------------------
Required properties:
-- compatible: should be "amlogic,meson-gx-pwrc-vpu" for the Meson GX SoCs
+- compatible: should be one of the following :
+ - "amlogic,meson-gx-pwrc-vpu" for the Meson GX SoCs
+ - "amlogic,meson-g12a-pwrc-vpu" for the Meson G12A SoCs
- #power-domain-cells: should be 0
- amlogic,hhi-sysctrl: phandle to the HHI sysctrl node
- resets: phandles to the reset lines needed for this power demain sequence
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt b/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
index 11906316b43d..e23dea8344f8 100644
--- a/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
@@ -3,13 +3,20 @@ Generic SYSCON mapped register reset driver
This is a generic reset driver using syscon to map the reset register.
The reset is generally performed with a write to the reset register
defined by the register map pointed by syscon reference plus the offset
-with the mask defined in the reboot node.
+with the value and mask defined in the reboot node.
Required properties:
- compatible: should contain "syscon-reboot"
- regmap: this is phandle to the register map node
- offset: offset in the register map for the reboot register (in bytes)
-- mask: the reset value written to the reboot register (32 bit access)
+- value: the reset value written to the reboot register (32 bit access)
+
+Optional properties:
+- mask: update only the register bits defined by the mask (32 bit)
+
+Legacy usage:
+If a node doesn't contain a value property but contains a mask property, the
+mask property is used as the value.
Default will be little endian mode, 32 bit access only.
diff --git a/Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt b/Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt
index ba8d35f66cbe..b2d4968fde7d 100644
--- a/Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt
+++ b/Documentation/devicetree/bindings/power/supply/axp20x_usb_power.txt
@@ -4,6 +4,7 @@ Required Properties:
-compatible: One of: "x-powers,axp202-usb-power-supply"
"x-powers,axp221-usb-power-supply"
"x-powers,axp223-usb-power-supply"
+ "x-powers,axp813-usb-power-supply"
The AXP223 PMIC shares most of its behaviour with the AXP221 but has slight
variations such as the former being able to set the VBUS power supply max
diff --git a/Documentation/devicetree/bindings/power/supply/gpio-charger.txt b/Documentation/devicetree/bindings/power/supply/gpio-charger.txt
index adbb5dc5b6e9..0fb33b2c62a6 100644
--- a/Documentation/devicetree/bindings/power/supply/gpio-charger.txt
+++ b/Documentation/devicetree/bindings/power/supply/gpio-charger.txt
@@ -14,13 +14,17 @@ Required properties :
usb-cdp (USB charging downstream port)
usb-aca (USB accessory charger adapter)
+Optional properties:
+ - charge-status-gpios: GPIO indicating whether a battery is charging.
+
Example:
usb_charger: charger {
compatible = "gpio-charger";
charger-type = "usb-sdp";
- gpios = <&gpf0 2 0 0 0>;
- }
+ gpios = <&gpd 28 GPIO_ACTIVE_LOW>;
+ charge-status-gpios = <&gpc 27 GPIO_ACTIVE_LOW>;
+ };
battery {
power-supplies = <&usb_charger>;
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt b/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
new file mode 100644
index 000000000000..66430bf73815
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.txt
@@ -0,0 +1,31 @@
+* Ingenic JZ47xx battery bindings
+
+Required properties:
+
+- compatible: Must be "ingenic,jz4740-battery".
+- io-channels: phandle and IIO specifier pair to the IIO device.
+ Format described in iio-bindings.txt.
+- monitored-battery: phandle to a "simple-battery" compatible node.
+
+The "monitored-battery" property must be a phandle to a node using the format
+described in battery.txt, with the following properties being required:
+
+- voltage-min-design-microvolt: Drained battery voltage.
+- voltage-max-design-microvolt: Fully charged battery voltage.
+
+Example:
+
+#include <dt-bindings/iio/adc/ingenic,adc.h>
+
+simple_battery: battery {
+ compatible = "simple-battery";
+ voltage-min-design-microvolt = <3600000>;
+ voltage-max-design-microvolt = <4200000>;
+};
+
+ingenic_battery {
+ compatible = "ingenic,jz4740-battery";
+ io-channels = <&adc INGENIC_ADC_BATTERY>;
+ io-channel-names = "battery";
+ monitored-battery = <&simple_battery>;
+};
diff --git a/Documentation/devicetree/bindings/power/supply/ltc3651-charger.txt b/Documentation/devicetree/bindings/power/supply/lt3651-charger.txt
index 71f2840e8209..40811ff8de10 100644
--- a/Documentation/devicetree/bindings/power/supply/ltc3651-charger.txt
+++ b/Documentation/devicetree/bindings/power/supply/lt3651-charger.txt
@@ -1,14 +1,16 @@
-ltc3651-charger
+Analog Devices LT3651 Charger Power Supply bindings: lt3651-charger
Required properties:
- - compatible: "lltc,ltc3651-charger"
+- compatible: Should contain one of the following:
+ * "lltc,ltc3651-charger", (DEPRECATED: Use "lltc,lt3651-charger")
+ * "lltc,lt3651-charger"
- lltc,acpr-gpios: Connect to ACPR output. See remark below.
Optional properties:
- lltc,fault-gpios: Connect to FAULT output. See remark below.
- lltc,chrg-gpios: Connect to CHRG output. See remark below.
-The ltc3651 outputs are open-drain type and active low. The driver assumes the
+The lt3651 outputs are open-drain type and active low. The driver assumes the
GPIO reports "active" when the output is asserted, so if the pins have been
connected directly, the GPIO flags should be set to active low also.
@@ -20,7 +22,7 @@ attributes to detect changes.
Example:
charger: battery-charger {
- compatible = "lltc,ltc3651-charger";
+ compatible = "lltc,lt3651-charger";
lltc,acpr-gpios = <&gpio0 68 GPIO_ACTIVE_LOW>;
lltc,fault-gpios = <&gpio0 64 GPIO_ACTIVE_LOW>;
lltc,chrg-gpios = <&gpio0 63 GPIO_ACTIVE_LOW>;
diff --git a/Documentation/devicetree/bindings/power/supply/max77650-charger.txt b/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
new file mode 100644
index 000000000000..e6d0fb6ff94e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/max77650-charger.txt
@@ -0,0 +1,28 @@
+Battery charger driver for MAX77650 PMIC from Maxim Integrated.
+
+This module is part of the MAX77650 MFD device. For more details
+see Documentation/devicetree/bindings/mfd/max77650.txt.
+
+The charger is represented as a sub-node of the PMIC node on the device tree.
+
+Required properties:
+--------------------
+- compatible: Must be "maxim,max77650-charger"
+
+Optional properties:
+--------------------
+- input-voltage-min-microvolt: Minimum CHGIN regulation voltage. Must be one
+ of: 4000000, 4100000, 4200000, 4300000,
+ 4400000, 4500000, 4600000, 4700000.
+- input-current-limit-microamp: CHGIN input current limit (in microamps). Must
+ be one of: 95000, 190000, 285000, 380000,
+ 475000.
+
+Example:
+--------
+
+ charger {
+ compatible = "maxim,max77650-charger";
+ input-voltage-min-microvolt = <4200000>;
+ input-current-limit-microamp = <285000>;
+ };
diff --git a/Documentation/devicetree/bindings/power/supply/microchip,ucs1002.txt b/Documentation/devicetree/bindings/power/supply/microchip,ucs1002.txt
new file mode 100644
index 000000000000..1d284ad816bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/supply/microchip,ucs1002.txt
@@ -0,0 +1,27 @@
+Microchip UCS1002 USB Port Power Controller
+
+Required properties:
+- compatible : Should be "microchip,ucs1002";
+- reg : I2C slave address
+
+Optional properties:
+- interrupts : A list of interrupts lines present (could be either
+ corresponding to A_DET# pin, ALERT# pin, or both)
+- interrupt-names : A list of interrupt names. Should contain (if
+ present):
+ - "a_det" for line connected to A_DET# pin
+ - "alert" for line connected to ALERT# pin
+ Both are expected to be IRQ_TYPE_EDGE_BOTH
+Example:
+
+&i2c3 {
+ charger@32 {
+ compatible = "microchip,ucs1002";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ucs1002_pins>;
+ reg = <0x32>;
+ interrupts-extended = <&gpio5 2 IRQ_TYPE_EDGE_BOTH>,
+ <&gpio3 21 IRQ_TYPE_EDGE_BOTH>;
+ interrupt-names = "a_det", "alert";
+ };
+};
diff --git a/Documentation/devicetree/bindings/power/supply/olpc_battery.txt b/Documentation/devicetree/bindings/power/supply/olpc_battery.txt
index c8901b3992d9..8d87d6b35a98 100644
--- a/Documentation/devicetree/bindings/power/supply/olpc_battery.txt
+++ b/Documentation/devicetree/bindings/power/supply/olpc_battery.txt
@@ -2,4 +2,4 @@ OLPC battery
~~~~~~~~~~~~
Required properties:
- - compatible : "olpc,xo1-battery"
+ - compatible : "olpc,xo1-battery" or "olpc,xo1.5-battery"
diff --git a/Documentation/devicetree/bindings/pps/pps-gpio.txt b/Documentation/devicetree/bindings/pps/pps-gpio.txt
index 3683874832ae..9012a2a02e14 100644
--- a/Documentation/devicetree/bindings/pps/pps-gpio.txt
+++ b/Documentation/devicetree/bindings/pps/pps-gpio.txt
@@ -7,6 +7,10 @@ Required properties:
- compatible: should be "pps-gpio"
- gpios: one PPS GPIO in the format described by ../gpio/gpio.txt
+Additional required properties for the PPS ECHO functionality:
+- echo-gpios: one PPS ECHO GPIO in the format described by ../gpio/gpio.txt
+- echo-active-ms: duration in ms of the active portion of the echo pulse
+
Optional properties:
- assert-falling-edge: when present, assert is indicated by a falling edge
(instead of by a rising edge)
@@ -19,5 +23,8 @@ Example:
gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>;
assert-falling-edge;
+ echo-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+ echo-active-ms = <100>;
+
compatible = "pps-gpio";
};
diff --git a/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt
new file mode 100644
index 000000000000..3ba958d764ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/imx-tpm-pwm.txt
@@ -0,0 +1,22 @@
+Freescale i.MX TPM PWM controller
+
+Required properties:
+- compatible : Should be "fsl,imx7ulp-pwm".
+- reg: Physical base address and length of the controller's registers.
+- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of the cells format.
+- clocks : The clock provided by the SoC to drive the PWM.
+- interrupts: The interrupt for the PWM controller.
+
+Note: The TPM counter and period counter are shared between multiple channels, so all channels
+should use same period setting.
+
+Example:
+
+tpm4: pwm@40250000 {
+ compatible = "fsl,imx7ulp-pwm";
+ reg = <0x40250000 0x1000>;
+ assigned-clocks = <&pcc2 IMX7ULP_CLK_LPTPM4>;
+ assigned-clock-parents = <&scg1 IMX7ULP_CLK_SOSC_BUS_CLK>;
+ clocks = <&pcc2 IMX7ULP_CLK_LPTPM4>;
+ #pwm-cells = <3>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-meson.txt b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
index 1fa3f7182133..891632354065 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-meson.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
@@ -7,6 +7,9 @@ Required properties:
or "amlogic,meson-gxbb-ao-pwm"
or "amlogic,meson-axg-ee-pwm"
or "amlogic,meson-axg-ao-pwm"
+ or "amlogic,meson-g12a-ee-pwm"
+ or "amlogic,meson-g12a-ao-pwm-ab"
+ or "amlogic,meson-g12a-ao-pwm-cd"
- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
the cells format.
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
index 944fe356bb45..31c4577157dd 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
@@ -4,6 +4,7 @@ Required properties:
- compatible: Must be "ti,<soc>-ehrpwm".
for am33xx - compatible = "ti,am3352-ehrpwm", "ti,am33xx-ehrpwm";
for am4372 - compatible = "ti,am4372-ehrpwm", "ti-am3352-ehrpwm", "ti,am33xx-ehrpwm";
+ for am654 - compatible = "ti,am654-ehrpwm", "ti-am3352-ehrpwm";
for da850 - compatible = "ti,da850-ehrpwm", "ti-am3352-ehrpwm", "ti,am33xx-ehrpwm";
for dra746 - compatible = "ti,dra746-ehrpwm", "ti-am3352-ehrpwm";
- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
diff --git a/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt b/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt
index 2bf3344b2a02..2df4bddeb688 100644
--- a/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt
+++ b/Documentation/devicetree/bindings/reset/hisilicon,hi3660-reset.txt
@@ -5,11 +5,12 @@ Please also refer to reset.txt in this directory for common reset
controller binding usage.
The reset controller registers are part of the system-ctl block on
-hi3660 SoC.
+hi3660 and hi3670 SoCs.
Required properties:
-- compatible: should be
- "hisilicon,hi3660-reset"
+- compatible: should be one of the following:
+ "hisilicon,hi3660-reset" for HI3660
+ "hisilicon,hi3670-reset", "hisilicon,hi3660-reset" for HI3670
- hisi,rst-syscon: phandle of the reset's syscon.
- #reset-cells : Specifies the number of cells needed to encode a
reset source. The type shall be a <u32> and the value shall be 2.
diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt
new file mode 100644
index 000000000000..73d8f19c3bd9
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt
@@ -0,0 +1,51 @@
+SiFive L2 Cache Controller
+--------------------------
+The SiFive Level 2 Cache Controller is used to provide access to fast copies
+of memory for masters in a Core Complex. The Level 2 Cache Controller also
+acts as directory-based coherency manager.
+All the properties in ePAPR/DeviceTree specification applies for this platform
+
+Required Properties:
+--------------------
+- compatible: Should be "sifive,fu540-c000-ccache" and "cache"
+
+- cache-block-size: Specifies the block size in bytes of the cache.
+ Should be 64
+
+- cache-level: Should be set to 2 for a level 2 cache
+
+- cache-sets: Specifies the number of associativity sets of the cache.
+ Should be 1024
+
+- cache-size: Specifies the size in bytes of the cache. Should be 2097152
+
+- cache-unified: Specifies the cache is a unified cache
+
+- interrupts: Must contain 3 entries (DirError, DataError and DataFail signals)
+
+- reg: Physical base address and size of L2 cache controller registers map
+
+Optional Properties:
+--------------------
+- next-level-cache: phandle to the next level cache if present.
+
+- memory-region: reference to the reserved-memory for the L2 Loosely Integrated
+ Memory region. The reserved memory node should be defined as per the bindings
+ in reserved-memory.txt
+
+
+Example:
+
+ cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic0>;
+ interrupts = <1 2 3>;
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ next-level-cache = <&L25 &L40 &L36>;
+ memory-region = <&l2_lim>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt b/Documentation/devicetree/bindings/serial/mtk-uart.txt
index bcfb13194f16..c6b5262eb352 100644
--- a/Documentation/devicetree/bindings/serial/mtk-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -1,4 +1,4 @@
-* Mediatek Universal Asynchronous Receiver/Transmitter (UART)
+* MediaTek Universal Asynchronous Receiver/Transmitter (UART)
Required properties:
- compatible should contain:
@@ -13,10 +13,12 @@ Required properties:
* "mediatek,mt6797-uart" for MT6797 compatible UARTS
* "mediatek,mt7622-uart" for MT7622 compatible UARTS
* "mediatek,mt7623-uart" for MT7623 compatible UARTS
+ * "mediatek,mt7629-uart" for MT7629 compatible UARTS
* "mediatek,mt8127-uart" for MT8127 compatible UARTS
* "mediatek,mt8135-uart" for MT8135 compatible UARTS
* "mediatek,mt8173-uart" for MT8173 compatible UARTS
* "mediatek,mt8183-uart", "mediatek,mt6577-uart" for MT8183 compatible UARTS
+ * "mediatek,mt8516-uart" for MT8516 compatible UARTS
* "mediatek,mt6577-uart" for MT6577 and all of the above
- reg: The base address of the UART register bank.
diff --git a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
index 5a2ef1726e2a..7a32404c6114 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
+++ b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
@@ -25,6 +25,7 @@ Required properties in pwrap device node.
"mediatek,mt8135-pwrap" for MT8135 SoCs
"mediatek,mt8173-pwrap" for MT8173 SoCs
"mediatek,mt8183-pwrap" for MT8183 SoCs
+ "mediatek,mt8516-pwrap" for MT8516 SoCs
- interrupts: IRQ for pwrap in SOC
- reg-names: Must include the following entries:
"pwrap": Main registers base
diff --git a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
index d6fe16f094af..876693a7ada5 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
+++ b/Documentation/devicetree/bindings/soc/mediatek/scpsys.txt
@@ -23,6 +23,7 @@ Required properties:
- "mediatek,mt7622-scpsys"
- "mediatek,mt7623-scpsys", "mediatek,mt2701-scpsys": For MT7623 SoC
- "mediatek,mt7623a-scpsys": For MT7623A SoC
+ - "mediatek,mt7629-scpsys", "mediatek,mt7622-scpsys": For MT7629 SoC
- "mediatek,mt8173-scpsys"
- #power-domain-cells: Must be 1
- reg: Address range of the SCPSYS unit
@@ -33,8 +34,8 @@ Required properties:
Required clocks for MT2701 or MT7623: "mm", "mfg", "ethif"
Required clocks for MT2712: "mm", "mfg", "venc", "jpgdec", "audio", "vdec"
Required clocks for MT6797: "mm", "mfg", "vdec"
- Required clocks for MT7622: "hif_sel"
- Required clocks for MT7622A: "ethif"
+ Required clocks for MT7622 or MT7629: "hif_sel"
+ Required clocks for MT7623A: "ethif"
Required clocks for MT8173: "mm", "mfg", "venc", "venc_lt"
Optional properties:
diff --git a/Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt b/Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt
new file mode 100644
index 000000000000..703979dbd577
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/amazon,al-thermal.txt
@@ -0,0 +1,33 @@
+Amazon's Annapurna Labs Thermal Sensor
+
+Simple thermal device that allows temperature reading by a single MMIO
+transaction.
+
+Required properties:
+- compatible: "amazon,al-thermal".
+- reg: The physical base address and length of the sensor's registers.
+- #thermal-sensor-cells: Must be 1. See ./thermal.txt for a description.
+
+Example:
+ thermal: thermal {
+ compatible = "amazon,al-thermal";
+ reg = <0x0 0x05002860 0x0 0x1>;
+ #thermal-sensor-cells = <0x1>;
+ };
+
+ thermal-zones {
+ thermal-z0 {
+ polling-delay-passive = <250>;
+ polling-delay = <1000>;
+ thermal-sensors = <&thermal 0>;
+ trips {
+ critical {
+ temperature = <105000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
index b6c0ae53d4dc..f02f38527a6b 100644
--- a/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
+++ b/Documentation/devicetree/bindings/thermal/nvidia,tegra124-soctherm.txt
@@ -52,13 +52,47 @@ Required properties :
Must set as following values:
TEGRA_SOCTHERM_THROT_LEVEL_LOW, TEGRA_SOCTHERM_THROT_LEVEL_MED
TEGRA_SOCTHERM_THROT_LEVEL_HIGH, TEGRA_SOCTHERM_THROT_LEVEL_NONE
+ - nvidia,gpu-throt-level: This property is for Tegra124 and Tegra210.
+ It is the level of pulse skippers, which used to throttle clock
+ frequencies. It indicates gpu clock throttling depth and can be
+ programmed to any of the following values which represent a throttling
+ percentage:
+ TEGRA_SOCTHERM_THROT_LEVEL_NONE (0%)
+ TEGRA_SOCTHERM_THROT_LEVEL_LOW (50%),
+ TEGRA_SOCTHERM_THROT_LEVEL_MED (75%),
+ TEGRA_SOCTHERM_THROT_LEVEL_HIGH (85%).
- #cooling-cells: Should be 1. This cooling device only support on/off state.
See ./thermal.txt for a description of this property.
+ Optional properties: The following properties are T210 specific and
+ valid only for OCx throttle events.
+ - nvidia,count-threshold: Specifies the number of OC events that are
+ required for triggering an interrupt. Interrupts are not triggered if
+ the property is missing. A value of 0 will interrupt on every OC alarm.
+ - nvidia,polarity-active-low: Configures the polarity of the OC alaram
+ signal. If present, this means assert low, otherwise assert high.
+ - nvidia,alarm-filter: Number of clocks to filter event. When the filter
+ expires (which means the OC event has not occurred for a long time),
+ the counter is cleared and filter is rearmed. Default value is 0.
+ - nvidia,throttle-period-us: Specifies the number of uSec for which
+ throttling is engaged after the OC event is deasserted. Default value
+ is 0.
+
+Optional properties:
+- nvidia,thermtrips : When present, this property specifies the temperature at
+ which the soctherm hardware will assert the thermal trigger signal to the
+ Power Management IC, which can be configured to reset or shutdown the device.
+ It is an array of pairs where each pair represents a tsensor id followed by a
+ temperature in milli Celcius. In the absence of this property the critical
+ trip point will be used for thermtrip temperature.
+
Note:
-- the "critical" type trip points will be set to SOC_THERM hardware as the
-shut down temperature. Once the temperature of this thermal zone is higher
-than it, the system will be shutdown or reset by hardware.
+- the "critical" type trip points will be used to set the temperature at which
+the SOC_THERM hardware will assert a thermal trigger if the "nvidia,thermtrips"
+property is missing. When the thermtrips property is present, the breach of a
+critical trip point is reported back to the thermal framework to implement
+software shutdown.
+
- the "hot" type trip points will be set to SOC_THERM hardware as the throttle
temperature. Once the the temperature of this thermal zone is higher
than it, it will trigger the HW throttle event.
@@ -79,25 +113,32 @@ Example :
#thermal-sensor-cells = <1>;
+ nvidia,thermtrips = <TEGRA124_SOCTHERM_SENSOR_CPU 102500
+ TEGRA124_SOCTHERM_SENSOR_GPU 103000>;
+
throttle-cfgs {
/*
* When the "heavy" cooling device triggered,
- * the HW will skip cpu clock's pulse in 85% depth
+ * the HW will skip cpu clock's pulse in 85% depth,
+ * skip gpu clock's pulse in 85% level
*/
throttle_heavy: heavy {
nvidia,priority = <100>;
nvidia,cpu-throt-percent = <85>;
+ nvidia,gpu-throt-level = <TEGRA_SOCTHERM_THROT_LEVEL_HIGH>;
#cooling-cells = <1>;
};
/*
* When the "light" cooling device triggered,
- * the HW will skip cpu clock's pulse in 50% depth
+ * the HW will skip cpu clock's pulse in 50% depth,
+ * skip gpu clock's pulse in 50% level
*/
throttle_light: light {
nvidia,priority = <80>;
nvidia,cpu-throt-percent = <50>;
+ nvidia,gpu-throt-level = <TEGRA_SOCTHERM_THROT_LEVEL_LOW>;
#cooling-cells = <1>;
};
@@ -107,6 +148,17 @@ Example :
* arbiter will select the highest priority as the final throttle
* settings to skip cpu pulse.
*/
+
+ throttle_oc1: oc1 {
+ nvidia,priority = <50>;
+ nvidia,polarity-active-low;
+ nvidia,count-threshold = <100>;
+ nvidia,alarm-filter = <5100000>;
+ nvidia,throttle-period-us = <0>;
+ nvidia,cpu-throt-percent = <75>;
+ nvidia,gpu-throt-level =
+ <TEGRA_SOCTHERM_THROT_LEVEL_MED>;
+ };
};
};
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.txt b/Documentation/devicetree/bindings/thermal/qcom-tsens.txt
index 1d9e8cf61018..673cc1831ee9 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.txt
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.txt
@@ -6,11 +6,14 @@ Required properties:
- "qcom,msm8916-tsens" (MSM8916)
- "qcom,msm8974-tsens" (MSM8974)
- "qcom,msm8996-tsens" (MSM8996)
+ - "qcom,qcs404-tsens", "qcom,tsens-v1" (QCS404)
- "qcom,msm8998-tsens", "qcom,tsens-v2" (MSM8998)
- "qcom,sdm845-tsens", "qcom,tsens-v2" (SDM845)
The generic "qcom,tsens-v2" property must be used as a fallback for any SoC
with version 2 of the TSENS IP. MSM8996 is the only exception because the
generic property did not exist when support was added.
+ Similarly, the generic "qcom,tsens-v1" property must be used as a fallback for
+ any SoC with version 1 of the TSENS IP.
- reg: Address range of the thermal registers.
New platforms containing v2.x.y of the TSENS IP must specify the SROT and TM
@@ -39,3 +42,14 @@ tsens0: thermal-sensor@c263000 {
#qcom,sensors = <13>;
#thermal-sensor-cells = <1>;
};
+
+Example 3 (for any platform containing v1 of the TSENS IP):
+tsens: thermal-sensor@4a9000 {
+ compatible = "qcom,qcs404-tsens", "qcom,tsens-v1";
+ reg = <0x004a9000 0x1000>, /* TM */
+ <0x004a8000 0x1000>; /* SROT */
+ nvmem-cells = <&tsens_caldata>;
+ nvmem-cell-names = "calib";
+ #qcom,sensors = <10>;
+ #thermal-sensor-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
index 43d744e5305e..c6aac9bcacf1 100644
--- a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -2,6 +2,7 @@
Required properties:
- compatible : should be "rockchip,<name>-tsadc"
+ "rockchip,px30-tsadc": found on PX30 SoCs
"rockchip,rv1108-tsadc": found on RV1108 SoCs
"rockchip,rk3228-tsadc": found on RK3228 SoCs
"rockchip,rk3288-tsadc": found on RK3288 SoCs
diff --git a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt b/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt
index d72355502b78..691a09db2fef 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt
@@ -8,16 +8,22 @@ temperature using voltage-temperature lookup table.
Required properties:
===================
- compatible: Must be "generic-adc-thermal".
+- #thermal-sensor-cells: Should be 1. See ./thermal.txt for a description
+ of this property.
+Optional properties:
+===================
- temperature-lookup-table: Two dimensional array of Integer; lookup table
to map the relation between ADC value and
temperature. When ADC is read, the value is
looked up on the table to get the equivalent
temperature.
+
The first value of the each row of array is the
temperature in milliCelsius and second value of
the each row of array is the ADC read value.
-- #thermal-sensor-cells: Should be 1. See ./thermal.txt for a description
- of this property.
+
+ If not specified, driver assumes the ADC channel
+ gives milliCelsius directly.
Example :
#include <dt-bindings/thermal/thermal.h>
diff --git a/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml b/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
new file mode 100644
index 000000000000..a36a0746c056
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/timer/intel-ixp4xx-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx XScale Networking Processors Timers
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: This timer is found in the Intel IXP4xx processors.
+
+properties:
+ compatible:
+ items:
+ - const: intel,ixp4xx-timer
+
+ reg:
+ description: Should contain registers location and length
+
+ interrupts:
+ minItems: 1
+ maxItems: 2
+ items:
+ - description: Timer 1 interrupt
+ - description: Timer 2 interrupt
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ timer@c8005000 {
+ compatible = "intel,ixp4xx-timer";
+ reg = <0xc8005000 0x100>;
+ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
index ff7c567a7972..74c3eadad844 100644
--- a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
+++ b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
@@ -17,6 +17,7 @@ Required properties:
* "mediatek,mt8127-timer" for MT8127 compatible timers (GPT)
* "mediatek,mt8135-timer" for MT8135 compatible timers (GPT)
* "mediatek,mt8173-timer" for MT8173 compatible timers (GPT)
+ * "mediatek,mt8516-timer" for MT8516 compatible timers (GPT)
* "mediatek,mt6577-timer" for MT6577 and all above compatible timers (GPT)
For those SoCs that use SYST
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
index 56bccde9953a..a74720486ee2 100644
--- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -11,6 +11,7 @@ Required properties:
the appropriate jedec string:
"qcom,msm8994-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
"qcom,msm8996-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
+ "qcom,msm8998-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
"qcom,sdm845-ufshc", "qcom,ufshc", "jedec,ufs-2.0"
- interrupts : <interrupt mapping for UFS host controller IRQ>
- reg : <registers mapping>
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
deleted file mode 100644
index 9ed399977297..000000000000
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ /dev/null
@@ -1,468 +0,0 @@
-Device tree binding vendor prefix registry. Keep list in alphabetical order.
-
-This isn't an exhaustive list, but you should add new prefixes to it before
-using them to avoid name-space collisions.
-
-abilis Abilis Systems
-abracon Abracon Corporation
-actions Actions Semiconductor Co., Ltd.
-active-semi Active-Semi International Inc
-ad Avionic Design GmbH
-adafruit Adafruit Industries, LLC
-adapteva Adapteva, Inc.
-adaptrum Adaptrum, Inc.
-adh AD Holdings Plc.
-adi Analog Devices, Inc.
-advantech Advantech Corporation
-aeroflexgaisler Aeroflex Gaisler AB
-al Annapurna Labs
-allo Allo.com
-allwinner Allwinner Technology Co., Ltd.
-alphascale AlphaScale Integrated Circuits Systems, Inc.
-altr Altera Corp.
-amarula Amarula Solutions
-amazon Amazon.com, Inc.
-amcc Applied Micro Circuits Corporation (APM, formally AMCC)
-amd Advanced Micro Devices (AMD), Inc.
-amediatech Shenzhen Amediatech Technology Co., Ltd
-amlogic Amlogic, Inc.
-ampire Ampire Co., Ltd.
-ams AMS AG
-amstaos AMS-Taos Inc.
-analogix Analogix Semiconductor, Inc.
-andestech Andes Technology Corporation
-apm Applied Micro Circuits Corporation (APM)
-aptina Aptina Imaging
-arasan Arasan Chip Systems
-archermind ArcherMind Technology (Nanjing) Co., Ltd.
-arctic Arctic Sand
-arcx arcx Inc. / Archronix Inc.
-aries Aries Embedded GmbH
-arm ARM Ltd.
-armadeus ARMadeus Systems SARL
-arrow Arrow Electronics
-artesyn Artesyn Embedded Technologies Inc.
-asahi-kasei Asahi Kasei Corp.
-aspeed ASPEED Technology Inc.
-asus AsusTek Computer Inc.
-atlas Atlas Scientific LLC
-atmel Atmel Corporation
-auo AU Optronics Corporation
-auvidea Auvidea GmbH
-avago Avago Technologies
-avia avia semiconductor
-avic Shanghai AVIC Optoelectronics Co., Ltd.
-avnet Avnet, Inc.
-axentia Axentia Technologies AB
-axis Axis Communications AB
-bananapi BIPAI KEJI LIMITED
-bhf Beckhoff Automation GmbH & Co. KG
-bitmain Bitmain Technologies
-boe BOE Technology Group Co., Ltd.
-bosch Bosch Sensortec GmbH
-boundary Boundary Devices Inc.
-brcm Broadcom Corporation
-buffalo Buffalo, Inc.
-bticino Bticino International
-calxeda Calxeda
-capella Capella Microsystems, Inc
-cascoda Cascoda, Ltd.
-catalyst Catalyst Semiconductor, Inc.
-cavium Cavium, Inc.
-cdns Cadence Design Systems Inc.
-cdtech CDTech(H.K.) Electronics Limited
-ceva Ceva, Inc.
-chipidea Chipidea, Inc
-chipone ChipOne
-chipspark ChipSPARK
-chrp Common Hardware Reference Platform
-chunghwa Chunghwa Picture Tubes Ltd.
-ciaa Computadora Industrial Abierta Argentina
-cirrus Cirrus Logic, Inc.
-cloudengines Cloud Engines, Inc.
-cnm Chips&Media, Inc.
-cnxt Conexant Systems, Inc.
-compulab CompuLab Ltd.
-cortina Cortina Systems, Inc.
-cosmic Cosmic Circuits
-crane Crane Connectivity Solutions
-creative Creative Technology Ltd
-crystalfontz Crystalfontz America, Inc.
-csky Hangzhou C-SKY Microsystems Co., Ltd
-cubietech Cubietech, Ltd.
-cypress Cypress Semiconductor Corporation
-cznic CZ.NIC, z.s.p.o.
-dallas Maxim Integrated Products (formerly Dallas Semiconductor)
-dataimage DataImage, Inc.
-davicom DAVICOM Semiconductor, Inc.
-delta Delta Electronics, Inc.
-denx Denx Software Engineering
-devantech Devantech, Ltd.
-dh DH electronics GmbH
-digi Digi International Inc.
-digilent Diglent, Inc.
-dioo Dioo Microcircuit Co., Ltd
-dlc DLC Display Co., Ltd.
-dlg Dialog Semiconductor
-dlink D-Link Corporation
-dmo Data Modul AG
-domintech Domintech Co., Ltd.
-dongwoon Dongwoon Anatech
-dptechnics DPTechnics
-dragino Dragino Technology Co., Limited
-ea Embedded Artists AB
-ebs-systart EBS-SYSTART GmbH
-ebv EBV Elektronik
-eckelmann Eckelmann AG
-edt Emerging Display Technologies
-eeti eGalax_eMPIA Technology Inc
-elan Elan Microelectronic Corp.
-elgin Elgin S/A.
-embest Shenzhen Embest Technology Co., Ltd.
-emlid Emlid, Ltd.
-emmicro EM Microelectronic
-emtrion emtrion GmbH
-endless Endless Mobile, Inc.
-energymicro Silicon Laboratories (formerly Energy Micro AS)
-engicam Engicam S.r.l.
-epcos EPCOS AG
-epfl Ecole Polytechnique Fédérale de Lausanne
-epson Seiko Epson Corp.
-est ESTeem Wireless Modems
-ettus NI Ettus Research
-eukrea Eukréa Electromatique
-everest Everest Semiconductor Co. Ltd.
-everspin Everspin Technologies, Inc.
-exar Exar Corporation
-excito Excito
-ezchip EZchip Semiconductor
-facebook Facebook
-fairphone Fairphone B.V.
-faraday Faraday Technology Corporation
-fastrax Fastrax Oy
-fcs Fairchild Semiconductor
-feiyang Shenzhen Fly Young Technology Co.,LTD.
-firefly Firefly
-focaltech FocalTech Systems Co.,Ltd
-friendlyarm Guangzhou FriendlyARM Computer Tech Co., Ltd
-fsl Freescale Semiconductor
-fujitsu Fujitsu Ltd.
-gateworks Gateworks Corporation
-gcw Game Consoles Worldwide
-ge General Electric Company
-geekbuying GeekBuying
-gef GE Fanuc Intelligent Platforms Embedded Systems, Inc.
-GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc.
-geniatech Geniatech, Inc.
-giantec Giantec Semiconductor, Inc.
-giantplus Giantplus Technology Co., Ltd.
-globalscale Globalscale Technologies, Inc.
-globaltop GlobalTop Technology, Inc.
-gmt Global Mixed-mode Technology, Inc.
-goodix Shenzhen Huiding Technology Co., Ltd.
-google Google, Inc.
-grinn Grinn
-grmn Garmin Limited
-gumstix Gumstix, Inc.
-gw Gateworks Corporation
-hannstar HannStar Display Corporation
-haoyu Haoyu Microelectronic Co. Ltd.
-hardkernel Hardkernel Co., Ltd
-hideep HiDeep Inc.
-himax Himax Technologies, Inc.
-hisilicon Hisilicon Limited.
-hit Hitachi Ltd.
-hitex Hitex Development Tools
-holt Holt Integrated Circuits, Inc.
-honeywell Honeywell
-hp Hewlett Packard
-holtek Holtek Semiconductor, Inc.
-hwacom HwaCom Systems Inc.
-i2se I2SE GmbH
-ibm International Business Machines (IBM)
-icplus IC Plus Corp.
-idt Integrated Device Technologies, Inc.
-ifi Ingenieurburo Fur Ic-Technologie (I/F/I)
-ilitek ILI Technology Corporation (ILITEK)
-img Imagination Technologies Ltd.
-infineon Infineon Technologies
-inforce Inforce Computing
-ingenic Ingenic Semiconductor
-innolux Innolux Corporation
-inside-secure INSIDE Secure
-intel Intel Corporation
-intercontrol Inter Control Group
-invensense InvenSense Inc.
-inversepath Inverse Path
-iom Iomega Corporation
-isee ISEE 2007 S.L.
-isil Intersil
-issi Integrated Silicon Solutions Inc.
-itead ITEAD Intelligent Systems Co.Ltd
-iwave iWave Systems Technologies Pvt. Ltd.
-jdi Japan Display Inc.
-jedec JEDEC Solid State Technology Association
-jianda Jiandangjing Technology Co., Ltd.
-karo Ka-Ro electronics GmbH
-keithkoep Keith & Koep GmbH
-keymile Keymile GmbH
-khadas Khadas
-kiebackpeter Kieback & Peter GmbH
-kinetic Kinetic Technologies
-kingdisplay King & Display Technology Co., Ltd.
-kingnovel Kingnovel Technology Co., Ltd.
-kionix Kionix, Inc.
-koe Kaohsiung Opto-Electronics Inc.
-kosagi Sutajio Ko-Usagi PTE Ltd.
-kyo Kyocera Corporation
-lacie LaCie
-laird Laird PLC
-lantiq Lantiq Semiconductor
-lattice Lattice Semiconductor
-lego LEGO Systems A/S
-lemaker Shenzhen LeMaker Technology Co., Ltd.
-lenovo Lenovo Group Ltd.
-lg LG Corporation
-libretech Shenzhen Libre Technology Co., Ltd
-licheepi Lichee Pi
-linaro Linaro Limited
-linksys Belkin International, Inc. (Linksys)
-linux Linux-specific binding
-linx Linx Technologies
-lltc Linear Technology Corporation
-logicpd Logic PD, Inc.
-lsi LSI Corp. (LSI Logic)
-lwn Liebherr-Werk Nenzing GmbH
-macnica Macnica Americas
-marvell Marvell Technology Group Ltd.
-maxbotix MaxBotix Inc.
-maxim Maxim Integrated Products
-mbvl Mobiveil Inc.
-mcube mCube
-meas Measurement Specialties
-mediatek MediaTek Inc.
-megachips MegaChips
-mele Shenzhen MeLE Digital Technology Ltd.
-melexis Melexis N.V.
-melfas MELFAS Inc.
-mellanox Mellanox Technologies
-memsic MEMSIC Inc.
-merrii Merrii Technology Co., Ltd.
-micrel Micrel Inc.
-microchip Microchip Technology Inc.
-microcrystal Micro Crystal AG
-micron Micron Technology Inc.
-mikroe MikroElektronika d.o.o.
-minix MINIX Technology Ltd.
-miramems MiraMEMS Sensing Technology Co., Ltd.
-mitsubishi Mitsubishi Electric Corporation
-mosaixtech Mosaix Technologies, Inc.
-motorola Motorola, Inc.
-moxa Moxa Inc.
-mpl MPL AG
-mqmaker mqmaker Inc.
-mscc Microsemi Corporation
-msi Micro-Star International Co. Ltd.
-mti Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
-multi-inno Multi-Inno Technology Co.,Ltd
-mundoreader Mundo Reader S.L.
-murata Murata Manufacturing Co., Ltd.
-mxicy Macronix International Co., Ltd.
-myir MYIR Tech Limited
-national National Semiconductor
-nec NEC LCD Technologies, Ltd.
-neonode Neonode Inc.
-netgear NETGEAR
-netlogic Broadcom Corporation (formerly NetLogic Microsystems)
-netron-dy Netron DY
-netxeon Shenzhen Netxeon Technology CO., LTD
-nexbox Nexbox
-nextthing Next Thing Co.
-newhaven Newhaven Display International
-ni National Instruments
-nintendo Nintendo
-nlt NLT Technologies, Ltd.
-nokia Nokia
-nordic Nordic Semiconductor
-novtech NovTech, Inc.
-nutsboard NutsBoard
-nuvoton Nuvoton Technology Corporation
-nvd New Vision Display
-nvidia NVIDIA
-nxp NXP Semiconductors
-okaya Okaya Electric America, Inc.
-oki Oki Electric Industry Co., Ltd.
-olimex OLIMEX Ltd.
-olpc One Laptop Per Child
-onion Onion Corporation
-onnn ON Semiconductor Corp.
-ontat On Tat Industrial Company
-opalkelly Opal Kelly Incorporated
-opencores OpenCores.org
-openrisc OpenRISC.io
-option Option NV
-oranth Shenzhen Oranth Technology Co., Ltd.
-ORCL Oracle Corporation
-orisetech Orise Technology
-ortustech Ortus Technology Co., Ltd.
-osddisplays OSD Displays
-ovti OmniVision Technologies
-oxsemi Oxford Semiconductor, Ltd.
-panasonic Panasonic Corporation
-parade Parade Technologies Inc.
-pda Precision Design Associates, Inc.
-pericom Pericom Technology Inc.
-pervasive Pervasive Displays, Inc.
-phicomm PHICOMM Co., Ltd.
-phytec PHYTEC Messtechnik GmbH
-picochip Picochip Ltd
-pine64 Pine64
-pixcir PIXCIR MICROELECTRONICS Co., Ltd
-plantower Plantower Co., Ltd
-plathome Plat'Home Co., Ltd.
-plda PLDA
-plx Broadcom Corporation (formerly PLX Technology)
-pni PNI Sensor Corporation
-portwell Portwell Inc.
-poslab Poslab Technology Co., Ltd.
-powervr PowerVR (deprecated, use img)
-probox2 PROBOX2 (by W2COMP Co., Ltd.)
-pulsedlight PulsedLight, Inc
-qca Qualcomm Atheros, Inc.
-qcom Qualcomm Technologies, Inc
-qemu QEMU, a generic and open source machine emulator and virtualizer
-qi Qi Hardware
-qiaodian QiaoDian XianShi Corporation
-qnap QNAP Systems, Inc.
-radxa Radxa
-raidsonic RaidSonic Technology GmbH
-ralink Mediatek/Ralink Technology Corp.
-ramtron Ramtron International
-raspberrypi Raspberry Pi Foundation
-raydium Raydium Semiconductor Corp.
-rda Unisoc Communications, Inc.
-realtek Realtek Semiconductor Corp.
-renesas Renesas Electronics Corporation
-richtek Richtek Technology Corporation
-ricoh Ricoh Co. Ltd.
-rikomagic Rikomagic Tech Corp. Ltd
-riscv RISC-V Foundation
-rockchip Fuzhou Rockchip Electronics Co., Ltd
-rocktech ROCKTECH DISPLAYS LIMITED
-rohm ROHM Semiconductor Co., Ltd
-ronbo Ronbo Electronics
-roofull Shenzhen Roofull Technology Co, Ltd
-samsung Samsung Semiconductor
-samtec Samtec/Softing company
-sancloud Sancloud Ltd
-sandisk Sandisk Corporation
-sbs Smart Battery System
-schindler Schindler
-seagate Seagate Technology PLC
-semtech Semtech Corporation
-sensirion Sensirion AG
-sff Small Form Factor Committee
-sgd Solomon Goldentek Display Corporation
-sgx SGX Sensortech
-sharp Sharp Corporation
-shimafuji Shimafuji Electric, Inc.
-si-en Si-En Technology Ltd.
-sifive SiFive, Inc.
-sigma Sigma Designs, Inc.
-sii Seiko Instruments, Inc.
-sil Silicon Image
-silabs Silicon Laboratories
-silead Silead Inc.
-silergy Silergy Corp.
-siliconmitus Silicon Mitus, Inc.
-simtek
-sirf SiRF Technology, Inc.
-sis Silicon Integrated Systems Corp.
-sitronix Sitronix Technology Corporation
-skyworks Skyworks Solutions, Inc.
-smsc Standard Microsystems Corporation
-snps Synopsys, Inc.
-socionext Socionext Inc.
-solidrun SolidRun
-solomon Solomon Systech Limited
-sony Sony Corporation
-spansion Spansion Inc.
-sprd Spreadtrum Communications Inc.
-sst Silicon Storage Technology, Inc.
-st STMicroelectronics
-starry Starry Electronic Technology (ShenZhen) Co., LTD
-startek Startek
-ste ST-Ericsson
-stericsson ST-Ericsson
-summit Summit microelectronics
-sunchip Shenzhen Sunchip Technology Co., Ltd
-SUNW Sun Microsystems, Inc
-swir Sierra Wireless
-syna Synaptics Inc.
-synology Synology, Inc.
-tbs TBS Technologies
-tbs-biometrics Touchless Biometric Systems AG
-tcg Trusted Computing Group
-tcl Toby Churchill Ltd.
-technexion TechNexion
-technologic Technologic Systems
-tempo Tempo Semiconductor
-techstar Shenzhen Techstar Electronics Co., Ltd.
-terasic Terasic Inc.
-thine THine Electronics, Inc.
-ti Texas Instruments
-tianma Tianma Micro-electronics Co., Ltd.
-tlm Trusted Logic Mobility
-tmt Tecon Microprocessor Technologies, LLC.
-topeet Topeet
-toradex Toradex AG
-toshiba Toshiba Corporation
-toumaz Toumaz
-tpk TPK U.S.A. LLC
-tplink TP-LINK Technologies Co., Ltd.
-tpo TPO
-tronfy Tronfy
-tronsmart Tronsmart
-truly Truly Semiconductors Limited
-tsd Theobroma Systems Design und Consulting GmbH
-tyan Tyan Computer Corporation
-u-blox u-blox
-ucrobotics uCRobotics
-ubnt Ubiquiti Networks
-udoo Udoo
-uniwest United Western Technologies Corp (UniWest)
-upisemi uPI Semiconductor Corp.
-urt United Radiant Technology Corporation
-usi Universal Scientific Industrial Co., Ltd.
-v3 V3 Semiconductor
-vamrs Vamrs Ltd.
-variscite Variscite Ltd.
-via VIA Technologies, Inc.
-virtio Virtual I/O Device Specification, developed by the OASIS consortium
-vishay Vishay Intertechnology, Inc
-vitesse Vitesse Semiconductor Corporation
-vivante Vivante Corporation
-vocore VoCore Studio
-voipac Voipac Technologies s.r.o.
-vot Vision Optical Technology Co., Ltd.
-wd Western Digital Corp.
-wetek WeTek Electronics, limited.
-wexler Wexler
-whwave Shenzhen whwave Electronics, Inc.
-wi2wi Wi2Wi, Inc.
-winbond Winbond Electronics corp.
-winstar Winstar Display Corp.
-wlf Wolfson Microelectronics
-wm Wondermedia Technologies, Inc.
-x-powers X-Powers
-xes Extreme Engineering Solutions (X-ES)
-xillybus Xillybus Ltd.
-xlnx Xilinx
-xunlong Shenzhen Xunlong Software CO.,Limited
-ysoft Y Soft Corporation a.s.
-zarlink Zarlink Semiconductor
-zeitec ZEITEC Semiconductor Co., LTD.
-zidoo Shenzhen Zidoo Technology Co., Ltd.
-zii Zodiac Inflight Innovations
-zte ZTE Corp.
-zyxel ZyXEL Communications Corp.
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
new file mode 100644
index 000000000000..33a65a45e319
--- /dev/null
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -0,0 +1,977 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/vendor-prefixes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Devicetree Vendor Prefix Registry
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+select: true
+
+properties: {}
+
+patternProperties:
+ # Prefixes which are not vendors, but followed the pattern
+ # DO NOT ADD NEW PROPERTIES TO THIS LIST
+ "^(at25|devbus|dmacap|dsa|exynos|gpio-fan|gpio|gpmc|hdmi|i2c-gpio),.*": true
+ "^(keypad|m25p|max8952|max8997|max8998|mpmc),.*": true
+ "^(pinctrl-single|#pinctrl-single|PowerPC),.*": true
+ "^(pl022|pxa-mmc|rcar_sound|rotary-encoder|s5m8767|sdhci),.*": true
+ "^(simple-audio-card|simple-graph-card|st-plgpio|st-spics|ts),.*": true
+
+ # Keep list in alphabetical order.
+ "^abilis,.*":
+ description: Abilis Systems
+ "^abracon,.*":
+ description: Abracon Corporation
+ "^actions,.*":
+ description: Actions Semiconductor Co., Ltd.
+ "^active-semi,.*":
+ description: Active-Semi International Inc
+ "^ad,.*":
+ description: Avionic Design GmbH
+ "^adafruit,.*":
+ description: Adafruit Industries, LLC
+ "^adapteva,.*":
+ description: Adapteva, Inc.
+ "^adaptrum,.*":
+ description: Adaptrum, Inc.
+ "^adh,.*":
+ description: AD Holdings Plc.
+ "^adi,.*":
+ description: Analog Devices, Inc.
+ "^advantech,.*":
+ description: Advantech Corporation
+ "^aeroflexgaisler,.*":
+ description: Aeroflex Gaisler AB
+ "^al,.*":
+ description: Annapurna Labs
+ "^allo,.*":
+ description: Allo.com
+ "^allwinner,.*":
+ description: Allwinner Technology Co., Ltd.
+ "^alphascale,.*":
+ description: AlphaScale Integrated Circuits Systems, Inc.
+ "^altr,.*":
+ description: Altera Corp.
+ "^amarula,.*":
+ description: Amarula Solutions
+ "^amazon,.*":
+ description: Amazon.com, Inc.
+ "^amcc,.*":
+ description: Applied Micro Circuits Corporation (APM, formally AMCC)
+ "^amd,.*":
+ description: Advanced Micro Devices (AMD), Inc.
+ "^amediatech,.*":
+ description: Shenzhen Amediatech Technology Co., Ltd
+ "^amlogic,.*":
+ description: Amlogic, Inc.
+ "^ampire,.*":
+ description: Ampire Co., Ltd.
+ "^ams,.*":
+ description: AMS AG
+ "^amstaos,.*":
+ description: AMS-Taos Inc.
+ "^analogix,.*":
+ description: Analogix Semiconductor, Inc.
+ "^andestech,.*":
+ description: Andes Technology Corporation
+ "^apm,.*":
+ description: Applied Micro Circuits Corporation (APM)
+ "^aptina,.*":
+ description: Aptina Imaging
+ "^arasan,.*":
+ description: Arasan Chip Systems
+ "^archermind,.*":
+ description: ArcherMind Technology (Nanjing) Co., Ltd.
+ "^arctic,.*":
+ description: Arctic Sand
+ "^arcx,.*":
+ description: arcx Inc. / Archronix Inc.
+ "^aries,.*":
+ description: Aries Embedded GmbH
+ "^arm,.*":
+ description: ARM Ltd.
+ "^armadeus,.*":
+ description: ARMadeus Systems SARL
+ "^arrow,.*":
+ description: Arrow Electronics
+ "^artesyn,.*":
+ description: Artesyn Embedded Technologies Inc.
+ "^asahi-kasei,.*":
+ description: Asahi Kasei Corp.
+ "^aspeed,.*":
+ description: ASPEED Technology Inc.
+ "^asus,.*":
+ description: AsusTek Computer Inc.
+ "^atlas,.*":
+ description: Atlas Scientific LLC
+ "^atmel,.*":
+ description: Atmel Corporation
+ "^auo,.*":
+ description: AU Optronics Corporation
+ "^auvidea,.*":
+ description: Auvidea GmbH
+ "^avago,.*":
+ description: Avago Technologies
+ "^avia,.*":
+ description: avia semiconductor
+ "^avic,.*":
+ description: Shanghai AVIC Optoelectronics Co., Ltd.
+ "^avnet,.*":
+ description: Avnet, Inc.
+ "^axentia,.*":
+ description: Axentia Technologies AB
+ "^axis,.*":
+ description: Axis Communications AB
+ "^azoteq,.*":
+ description: Azoteq (Pty) Ltd
+ "^azw,.*":
+ description: Shenzhen AZW Technology Co., Ltd.
+ "^bananapi,.*":
+ description: BIPAI KEJI LIMITED
+ "^bhf,.*":
+ description: Beckhoff Automation GmbH & Co. KG
+ "^bitmain,.*":
+ description: Bitmain Technologies
+ "^boe,.*":
+ description: BOE Technology Group Co., Ltd.
+ "^bosch,.*":
+ description: Bosch Sensortec GmbH
+ "^boundary,.*":
+ description: Boundary Devices Inc.
+ "^brcm,.*":
+ description: Broadcom Corporation
+ "^buffalo,.*":
+ description: Buffalo, Inc.
+ "^bticino,.*":
+ description: Bticino International
+ "^calxeda,.*":
+ description: Calxeda
+ "^capella,.*":
+ description: Capella Microsystems, Inc
+ "^cascoda,.*":
+ description: Cascoda, Ltd.
+ "^catalyst,.*":
+ description: Catalyst Semiconductor, Inc.
+ "^cavium,.*":
+ description: Cavium, Inc.
+ "^cdns,.*":
+ description: Cadence Design Systems Inc.
+ "^cdtech,.*":
+ description: CDTech(H.K.) Electronics Limited
+ "^ceva,.*":
+ description: Ceva, Inc.
+ "^chipidea,.*":
+ description: Chipidea, Inc
+ "^chipone,.*":
+ description: ChipOne
+ "^chipspark,.*":
+ description: ChipSPARK
+ "^chrp,.*":
+ description: Common Hardware Reference Platform
+ "^chunghwa,.*":
+ description: Chunghwa Picture Tubes Ltd.
+ "^ciaa,.*":
+ description: Computadora Industrial Abierta Argentina
+ "^cirrus,.*":
+ description: Cirrus Logic, Inc.
+ "^cloudengines,.*":
+ description: Cloud Engines, Inc.
+ "^cnm,.*":
+ description: Chips&Media, Inc.
+ "^cnxt,.*":
+ description: Conexant Systems, Inc.
+ "^compulab,.*":
+ description: CompuLab Ltd.
+ "^cortina,.*":
+ description: Cortina Systems, Inc.
+ "^cosmic,.*":
+ description: Cosmic Circuits
+ "^crane,.*":
+ description: Crane Connectivity Solutions
+ "^creative,.*":
+ description: Creative Technology Ltd
+ "^crystalfontz,.*":
+ description: Crystalfontz America, Inc.
+ "^csky,.*":
+ description: Hangzhou C-SKY Microsystems Co., Ltd
+ "^cubietech,.*":
+ description: Cubietech, Ltd.
+ "^cypress,.*":
+ description: Cypress Semiconductor Corporation
+ "^cznic,.*":
+ description: CZ.NIC, z.s.p.o.
+ "^dallas,.*":
+ description: Maxim Integrated Products (formerly Dallas Semiconductor)
+ "^dataimage,.*":
+ description: DataImage, Inc.
+ "^davicom,.*":
+ description: DAVICOM Semiconductor, Inc.
+ "^delta,.*":
+ description: Delta Electronics, Inc.
+ "^denx,.*":
+ description: Denx Software Engineering
+ "^devantech,.*":
+ description: Devantech, Ltd.
+ "^dh,.*":
+ description: DH electronics GmbH
+ "^digi,.*":
+ description: Digi International Inc.
+ "^digilent,.*":
+ description: Diglent, Inc.
+ "^dioo,.*":
+ description: Dioo Microcircuit Co., Ltd
+ "^dlc,.*":
+ description: DLC Display Co., Ltd.
+ "^dlg,.*":
+ description: Dialog Semiconductor
+ "^dlink,.*":
+ description: D-Link Corporation
+ "^dmo,.*":
+ description: Data Modul AG
+ "^domintech,.*":
+ description: Domintech Co., Ltd.
+ "^dongwoon,.*":
+ description: Dongwoon Anatech
+ "^dptechnics,.*":
+ description: DPTechnics
+ "^dragino,.*":
+ description: Dragino Technology Co., Limited
+ "^ea,.*":
+ description: Embedded Artists AB
+ "^ebs-systart,.*":
+ description: EBS-SYSTART GmbH
+ "^ebv,.*":
+ description: EBV Elektronik
+ "^eckelmann,.*":
+ description: Eckelmann AG
+ "^edt,.*":
+ description: Emerging Display Technologies
+ "^eeti,.*":
+ description: eGalax_eMPIA Technology Inc
+ "^elan,.*":
+ description: Elan Microelectronic Corp.
+ "^elgin,.*":
+ description: Elgin S/A.
+ "^embest,.*":
+ description: Shenzhen Embest Technology Co., Ltd.
+ "^emlid,.*":
+ description: Emlid, Ltd.
+ "^emmicro,.*":
+ description: EM Microelectronic
+ "^emtrion,.*":
+ description: emtrion GmbH
+ "^endless,.*":
+ description: Endless Mobile, Inc.
+ "^energymicro,.*":
+ description: Silicon Laboratories (formerly Energy Micro AS)
+ "^engicam,.*":
+ description: Engicam S.r.l.
+ "^epcos,.*":
+ description: EPCOS AG
+ "^epfl,.*":
+ description: Ecole Polytechnique Fédérale de Lausanne
+ "^epson,.*":
+ description: Seiko Epson Corp.
+ "^est,.*":
+ description: ESTeem Wireless Modems
+ "^ettus,.*":
+ description: NI Ettus Research
+ "^eukrea,.*":
+ description: Eukréa Electromatique
+ "^everest,.*":
+ description: Everest Semiconductor Co. Ltd.
+ "^everspin,.*":
+ description: Everspin Technologies, Inc.
+ "^exar,.*":
+ description: Exar Corporation
+ "^excito,.*":
+ description: Excito
+ "^ezchip,.*":
+ description: EZchip Semiconductor
+ "^facebook,.*":
+ description: Facebook
+ "^fairphone,.*":
+ description: Fairphone B.V.
+ "^faraday,.*":
+ description: Faraday Technology Corporation
+ "^fastrax,.*":
+ description: Fastrax Oy
+ "^fcs,.*":
+ description: Fairchild Semiconductor
+ "^feiyang,.*":
+ description: Shenzhen Fly Young Technology Co.,LTD.
+ "^firefly,.*":
+ description: Firefly
+ "^focaltech,.*":
+ description: FocalTech Systems Co.,Ltd
+ "^friendlyarm,.*":
+ description: Guangzhou FriendlyARM Computer Tech Co., Ltd
+ "^fsl,.*":
+ description: Freescale Semiconductor
+ "^fujitsu,.*":
+ description: Fujitsu Ltd.
+ "^gateworks,.*":
+ description: Gateworks Corporation
+ "^gcw,.*":
+ description: Game Consoles Worldwide
+ "^ge,.*":
+ description: General Electric Company
+ "^geekbuying,.*":
+ description: GeekBuying
+ "^gef,.*":
+ description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+ "^GEFanuc,.*":
+ description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+ "^geniatech,.*":
+ description: Geniatech, Inc.
+ "^giantec,.*":
+ description: Giantec Semiconductor, Inc.
+ "^giantplus,.*":
+ description: Giantplus Technology Co., Ltd.
+ "^globalscale,.*":
+ description: Globalscale Technologies, Inc.
+ "^globaltop,.*":
+ description: GlobalTop Technology, Inc.
+ "^gmt,.*":
+ description: Global Mixed-mode Technology, Inc.
+ "^goodix,.*":
+ description: Shenzhen Huiding Technology Co., Ltd.
+ "^google,.*":
+ description: Google, Inc.
+ "^grinn,.*":
+ description: Grinn
+ "^grmn,.*":
+ description: Garmin Limited
+ "^gumstix,.*":
+ description: Gumstix, Inc.
+ "^gw,.*":
+ description: Gateworks Corporation
+ "^hannstar,.*":
+ description: HannStar Display Corporation
+ "^haoyu,.*":
+ description: Haoyu Microelectronic Co. Ltd.
+ "^hardkernel,.*":
+ description: Hardkernel Co., Ltd
+ "^hideep,.*":
+ description: HiDeep Inc.
+ "^himax,.*":
+ description: Himax Technologies, Inc.
+ "^hisilicon,.*":
+ description: Hisilicon Limited.
+ "^hit,.*":
+ description: Hitachi Ltd.
+ "^hitex,.*":
+ description: Hitex Development Tools
+ "^holt,.*":
+ description: Holt Integrated Circuits, Inc.
+ "^honeywell,.*":
+ description: Honeywell
+ "^hp,.*":
+ description: Hewlett Packard
+ "^holtek,.*":
+ description: Holtek Semiconductor, Inc.
+ "^hwacom,.*":
+ description: HwaCom Systems Inc.
+ "^i2se,.*":
+ description: I2SE GmbH
+ "^ibm,.*":
+ description: International Business Machines (IBM)
+ "^icplus,.*":
+ description: IC Plus Corp.
+ "^idt,.*":
+ description: Integrated Device Technologies, Inc.
+ "^ifi,.*":
+ description: Ingenieurburo Fur Ic-Technologie (I/F/I)
+ "^ilitek,.*":
+ description: ILI Technology Corporation (ILITEK)
+ "^img,.*":
+ description: Imagination Technologies Ltd.
+ "^infineon,.*":
+ description: Infineon Technologies
+ "^inforce,.*":
+ description: Inforce Computing
+ "^ingenic,.*":
+ description: Ingenic Semiconductor
+ "^innolux,.*":
+ description: Innolux Corporation
+ "^inside-secure,.*":
+ description: INSIDE Secure
+ "^intel,.*":
+ description: Intel Corporation
+ "^intercontrol,.*":
+ description: Inter Control Group
+ "^invensense,.*":
+ description: InvenSense Inc.
+ "^inversepath,.*":
+ description: Inverse Path
+ "^iom,.*":
+ description: Iomega Corporation
+ "^isee,.*":
+ description: ISEE 2007 S.L.
+ "^isil,.*":
+ description: Intersil
+ "^issi,.*":
+ description: Integrated Silicon Solutions Inc.
+ "^itead,.*":
+ description: ITEAD Intelligent Systems Co.Ltd
+ "^iwave,.*":
+ description: iWave Systems Technologies Pvt. Ltd.
+ "^jdi,.*":
+ description: Japan Display Inc.
+ "^jedec,.*":
+ description: JEDEC Solid State Technology Association
+ "^jianda,.*":
+ description: Jiandangjing Technology Co., Ltd.
+ "^karo,.*":
+ description: Ka-Ro electronics GmbH
+ "^keithkoep,.*":
+ description: Keith & Koep GmbH
+ "^keymile,.*":
+ description: Keymile GmbH
+ "^khadas,.*":
+ description: Khadas
+ "^kiebackpeter,.*":
+ description: Kieback & Peter GmbH
+ "^kinetic,.*":
+ description: Kinetic Technologies
+ "^kingdisplay,.*":
+ description: King & Display Technology Co., Ltd.
+ "^kingnovel,.*":
+ description: Kingnovel Technology Co., Ltd.
+ "^kionix,.*":
+ description: Kionix, Inc.
+ "^kobo,.*":
+ description: Rakuten Kobo Inc.
+ "^koe,.*":
+ description: Kaohsiung Opto-Electronics Inc.
+ "^kosagi,.*":
+ description: Sutajio Ko-Usagi PTE Ltd.
+ "^kyo,.*":
+ description: Kyocera Corporation
+ "^lacie,.*":
+ description: LaCie
+ "^laird,.*":
+ description: Laird PLC
+ "^lantiq,.*":
+ description: Lantiq Semiconductor
+ "^lattice,.*":
+ description: Lattice Semiconductor
+ "^lego,.*":
+ description: LEGO Systems A/S
+ "^lemaker,.*":
+ description: Shenzhen LeMaker Technology Co., Ltd.
+ "^lenovo,.*":
+ description: Lenovo Group Ltd.
+ "^lg,.*":
+ description: LG Corporation
+ "^libretech,.*":
+ description: Shenzhen Libre Technology Co., Ltd
+ "^licheepi,.*":
+ description: Lichee Pi
+ "^linaro,.*":
+ description: Linaro Limited
+ "^linksys,.*":
+ description: Belkin International, Inc. (Linksys)
+ "^linux,.*":
+ description: Linux-specific binding
+ "^linx,.*":
+ description: Linx Technologies
+ "^lltc,.*":
+ description: Linear Technology Corporation
+ "^logicpd,.*":
+ description: Logic PD, Inc.
+ "^lsi,.*":
+ description: LSI Corp. (LSI Logic)
+ "^lwn,.*":
+ description: Liebherr-Werk Nenzing GmbH
+ "^macnica,.*":
+ description: Macnica Americas
+ "^marvell,.*":
+ description: Marvell Technology Group Ltd.
+ "^maxbotix,.*":
+ description: MaxBotix Inc.
+ "^maxim,.*":
+ description: Maxim Integrated Products
+ "^mbvl,.*":
+ description: Mobiveil Inc.
+ "^mcube,.*":
+ description: mCube
+ "^meas,.*":
+ description: Measurement Specialties
+ "^mediatek,.*":
+ description: MediaTek Inc.
+ "^megachips,.*":
+ description: MegaChips
+ "^mele,.*":
+ description: Shenzhen MeLE Digital Technology Ltd.
+ "^melexis,.*":
+ description: Melexis N.V.
+ "^melfas,.*":
+ description: MELFAS Inc.
+ "^mellanox,.*":
+ description: Mellanox Technologies
+ "^memsic,.*":
+ description: MEMSIC Inc.
+ "^menlo,.*":
+ description: Menlo Systems GmbH
+ "^merrii,.*":
+ description: Merrii Technology Co., Ltd.
+ "^micrel,.*":
+ description: Micrel Inc.
+ "^microchip,.*":
+ description: Microchip Technology Inc.
+ "^microcrystal,.*":
+ description: Micro Crystal AG
+ "^micron,.*":
+ description: Micron Technology Inc.
+ "^mikroe,.*":
+ description: MikroElektronika d.o.o.
+ "^minix,.*":
+ description: MINIX Technology Ltd.
+ "^miramems,.*":
+ description: MiraMEMS Sensing Technology Co., Ltd.
+ "^mitsubishi,.*":
+ description: Mitsubishi Electric Corporation
+ "^mosaixtech,.*":
+ description: Mosaix Technologies, Inc.
+ "^motorola,.*":
+ description: Motorola, Inc.
+ "^moxa,.*":
+ description: Moxa Inc.
+ "^mpl,.*":
+ description: MPL AG
+ "^mqmaker,.*":
+ description: mqmaker Inc.
+ "^mscc,.*":
+ description: Microsemi Corporation
+ "^msi,.*":
+ description: Micro-Star International Co. Ltd.
+ "^mti,.*":
+ description: Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
+ "^multi-inno,.*":
+ description: Multi-Inno Technology Co.,Ltd
+ "^mundoreader,.*":
+ description: Mundo Reader S.L.
+ "^murata,.*":
+ description: Murata Manufacturing Co., Ltd.
+ "^mxicy,.*":
+ description: Macronix International Co., Ltd.
+ "^myir,.*":
+ description: MYIR Tech Limited
+ "^national,.*":
+ description: National Semiconductor
+ "^nec,.*":
+ description: NEC LCD Technologies, Ltd.
+ "^neonode,.*":
+ description: Neonode Inc.
+ "^netgear,.*":
+ description: NETGEAR
+ "^netlogic,.*":
+ description: Broadcom Corporation (formerly NetLogic Microsystems)
+ "^netron-dy,.*":
+ description: Netron DY
+ "^netxeon,.*":
+ description: Shenzhen Netxeon Technology CO., LTD
+ "^nexbox,.*":
+ description: Nexbox
+ "^nextthing,.*":
+ description: Next Thing Co.
+ "^newhaven,.*":
+ description: Newhaven Display International
+ "^ni,.*":
+ description: National Instruments
+ "^nintendo,.*":
+ description: Nintendo
+ "^nlt,.*":
+ description: NLT Technologies, Ltd.
+ "^nokia,.*":
+ description: Nokia
+ "^nordic,.*":
+ description: Nordic Semiconductor
+ "^novtech,.*":
+ description: NovTech, Inc.
+ "^nutsboard,.*":
+ description: NutsBoard
+ "^nuvoton,.*":
+ description: Nuvoton Technology Corporation
+ "^nvd,.*":
+ description: New Vision Display
+ "^nvidia,.*":
+ description: NVIDIA
+ "^nxp,.*":
+ description: NXP Semiconductors
+ "^oceanic,.*":
+ description: Oceanic Systems (UK) Ltd.
+ "^okaya,.*":
+ description: Okaya Electric America, Inc.
+ "^oki,.*":
+ description: Oki Electric Industry Co., Ltd.
+ "^olimex,.*":
+ description: OLIMEX Ltd.
+ "^olpc,.*":
+ description: One Laptop Per Child
+ "^onion,.*":
+ description: Onion Corporation
+ "^onnn,.*":
+ description: ON Semiconductor Corp.
+ "^ontat,.*":
+ description: On Tat Industrial Company
+ "^opalkelly,.*":
+ description: Opal Kelly Incorporated
+ "^opencores,.*":
+ description: OpenCores.org
+ "^openrisc,.*":
+ description: OpenRISC.io
+ "^option,.*":
+ description: Option NV
+ "^oranth,.*":
+ description: Shenzhen Oranth Technology Co., Ltd.
+ "^ORCL,.*":
+ description: Oracle Corporation
+ "^orisetech,.*":
+ description: Orise Technology
+ "^ortustech,.*":
+ description: Ortus Technology Co., Ltd.
+ "^osddisplays,.*":
+ description: OSD Displays
+ "^ovti,.*":
+ description: OmniVision Technologies
+ "^oxsemi,.*":
+ description: Oxford Semiconductor, Ltd.
+ "^panasonic,.*":
+ description: Panasonic Corporation
+ "^parade,.*":
+ description: Parade Technologies Inc.
+ "^pda,.*":
+ description: Precision Design Associates, Inc.
+ "^pericom,.*":
+ description: Pericom Technology Inc.
+ "^pervasive,.*":
+ description: Pervasive Displays, Inc.
+ "^phicomm,.*":
+ description: PHICOMM Co., Ltd.
+ "^phytec,.*":
+ description: PHYTEC Messtechnik GmbH
+ "^picochip,.*":
+ description: Picochip Ltd
+ "^pine64,.*":
+ description: Pine64
+ "^pixcir,.*":
+ description: PIXCIR MICROELECTRONICS Co., Ltd
+ "^plantower,.*":
+ description: Plantower Co., Ltd
+ "^plathome,.*":
+ description: Plat'Home Co., Ltd.
+ "^plda,.*":
+ description: PLDA
+ "^plx,.*":
+ description: Broadcom Corporation (formerly PLX Technology)
+ "^pni,.*":
+ description: PNI Sensor Corporation
+ "^portwell,.*":
+ description: Portwell Inc.
+ "^poslab,.*":
+ description: Poslab Technology Co., Ltd.
+ "^powervr,.*":
+ description: PowerVR (deprecated, use img)
+ "^probox2,.*":
+ description: PROBOX2 (by W2COMP Co., Ltd.)
+ "^pulsedlight,.*":
+ description: PulsedLight, Inc
+ "^qca,.*":
+ description: Qualcomm Atheros, Inc.
+ "^qcom,.*":
+ description: Qualcomm Technologies, Inc
+ "^qemu,.*":
+ description: QEMU, a generic and open source machine emulator and virtualizer
+ "^qi,.*":
+ description: Qi Hardware
+ "^qiaodian,.*":
+ description: QiaoDian XianShi Corporation
+ "^qnap,.*":
+ description: QNAP Systems, Inc.
+ "^radxa,.*":
+ description: Radxa
+ "^raidsonic,.*":
+ description: RaidSonic Technology GmbH
+ "^ralink,.*":
+ description: Mediatek/Ralink Technology Corp.
+ "^ramtron,.*":
+ description: Ramtron International
+ "^raspberrypi,.*":
+ description: Raspberry Pi Foundation
+ "^raydium,.*":
+ description: Raydium Semiconductor Corp.
+ "^rda,.*":
+ description: Unisoc Communications, Inc.
+ "^realtek,.*":
+ description: Realtek Semiconductor Corp.
+ "^renesas,.*":
+ description: Renesas Electronics Corporation
+ "^richtek,.*":
+ description: Richtek Technology Corporation
+ "^ricoh,.*":
+ description: Ricoh Co. Ltd.
+ "^rikomagic,.*":
+ description: Rikomagic Tech Corp. Ltd
+ "^riscv,.*":
+ description: RISC-V Foundation
+ "^rockchip,.*":
+ description: Fuzhou Rockchip Electronics Co., Ltd
+ "^rocktech,.*":
+ description: ROCKTECH DISPLAYS LIMITED
+ "^rohm,.*":
+ description: ROHM Semiconductor Co., Ltd
+ "^ronbo,.*":
+ description: Ronbo Electronics
+ "^roofull,.*":
+ description: Shenzhen Roofull Technology Co, Ltd
+ "^samsung,.*":
+ description: Samsung Semiconductor
+ "^samtec,.*":
+ description: Samtec/Softing company
+ "^sancloud,.*":
+ description: Sancloud Ltd
+ "^sandisk,.*":
+ description: Sandisk Corporation
+ "^sbs,.*":
+ description: Smart Battery System
+ "^schindler,.*":
+ description: Schindler
+ "^seagate,.*":
+ description: Seagate Technology PLC
+ "^seirobotics,.*":
+ description: Shenzhen SEI Robotics Co., Ltd
+ "^semtech,.*":
+ description: Semtech Corporation
+ "^sensirion,.*":
+ description: Sensirion AG
+ "^sff,.*":
+ description: Small Form Factor Committee
+ "^sgd,.*":
+ description: Solomon Goldentek Display Corporation
+ "^sgx,.*":
+ description: SGX Sensortech
+ "^sharp,.*":
+ description: Sharp Corporation
+ "^shimafuji,.*":
+ description: Shimafuji Electric, Inc.
+ "^si-en,.*":
+ description: Si-En Technology Ltd.
+ "^si-linux,.*":
+ description: Silicon Linux Corporation
+ "^sifive,.*":
+ description: SiFive, Inc.
+ "^sigma,.*":
+ description: Sigma Designs, Inc.
+ "^sii,.*":
+ description: Seiko Instruments, Inc.
+ "^sil,.*":
+ description: Silicon Image
+ "^silabs,.*":
+ description: Silicon Laboratories
+ "^silead,.*":
+ description: Silead Inc.
+ "^silergy,.*":
+ description: Silergy Corp.
+ "^siliconmitus,.*":
+ description: Silicon Mitus, Inc.
+ "^simte,.*":
+ description: k
+ "^sirf,.*":
+ description: SiRF Technology, Inc.
+ "^sis,.*":
+ description: Silicon Integrated Systems Corp.
+ "^sitronix,.*":
+ description: Sitronix Technology Corporation
+ "^skyworks,.*":
+ description: Skyworks Solutions, Inc.
+ "^smsc,.*":
+ description: Standard Microsystems Corporation
+ "^snps,.*":
+ description: Synopsys, Inc.
+ "^socionext,.*":
+ description: Socionext Inc.
+ "^solidrun,.*":
+ description: SolidRun
+ "^solomon,.*":
+ description: Solomon Systech Limited
+ "^sony,.*":
+ description: Sony Corporation
+ "^spansion,.*":
+ description: Spansion Inc.
+ "^sprd,.*":
+ description: Spreadtrum Communications Inc.
+ "^sst,.*":
+ description: Silicon Storage Technology, Inc.
+ "^st,.*":
+ description: STMicroelectronics
+ "^starry,.*":
+ description: Starry Electronic Technology (ShenZhen) Co., LTD
+ "^startek,.*":
+ description: Startek
+ "^ste,.*":
+ description: ST-Ericsson
+ "^stericsson,.*":
+ description: ST-Ericsson
+ "^summit,.*":
+ description: Summit microelectronics
+ "^sunchip,.*":
+ description: Shenzhen Sunchip Technology Co., Ltd
+ "^SUNW,.*":
+ description: Sun Microsystems, Inc
+ "^swir,.*":
+ description: Sierra Wireless
+ "^syna,.*":
+ description: Synaptics Inc.
+ "^synology,.*":
+ description: Synology, Inc.
+ "^tbs,.*":
+ description: TBS Technologies
+ "^tbs-biometrics,.*":
+ description: Touchless Biometric Systems AG
+ "^tcg,.*":
+ description: Trusted Computing Group
+ "^tcl,.*":
+ description: Toby Churchill Ltd.
+ "^technexion,.*":
+ description: TechNexion
+ "^technologic,.*":
+ description: Technologic Systems
+ "^tempo,.*":
+ description: Tempo Semiconductor
+ "^techstar,.*":
+ description: Shenzhen Techstar Electronics Co., Ltd.
+ "^terasic,.*":
+ description: Terasic Inc.
+ "^thine,.*":
+ description: THine Electronics, Inc.
+ "^ti,.*":
+ description: Texas Instruments
+ "^tianma,.*":
+ description: Tianma Micro-electronics Co., Ltd.
+ "^tlm,.*":
+ description: Trusted Logic Mobility
+ "^tmt,.*":
+ description: Tecon Microprocessor Technologies, LLC.
+ "^topeet,.*":
+ description: Topeet
+ "^toradex,.*":
+ description: Toradex AG
+ "^toshiba,.*":
+ description: Toshiba Corporation
+ "^toumaz,.*":
+ description: Toumaz
+ "^tpk,.*":
+ description: TPK U.S.A. LLC
+ "^tplink,.*":
+ description: TP-LINK Technologies Co., Ltd.
+ "^tpo,.*":
+ description: TPO
+ "^tq,.*":
+ description: TQ Systems GmbH
+ "^tronfy,.*":
+ description: Tronfy
+ "^tronsmart,.*":
+ description: Tronsmart
+ "^truly,.*":
+ description: Truly Semiconductors Limited
+ "^tsd,.*":
+ description: Theobroma Systems Design und Consulting GmbH
+ "^tyan,.*":
+ description: Tyan Computer Corporation
+ "^u-blox,.*":
+ description: u-blox
+ "^ucrobotics,.*":
+ description: uCRobotics
+ "^ubnt,.*":
+ description: Ubiquiti Networks
+ "^udoo,.*":
+ description: Udoo
+ "^uniwest,.*":
+ description: United Western Technologies Corp (UniWest)
+ "^upisemi,.*":
+ description: uPI Semiconductor Corp.
+ "^urt,.*":
+ description: United Radiant Technology Corporation
+ "^usi,.*":
+ description: Universal Scientific Industrial Co., Ltd.
+ "^v3,.*":
+ description: V3 Semiconductor
+ "^vamrs,.*":
+ description: Vamrs Ltd.
+ "^variscite,.*":
+ description: Variscite Ltd.
+ "^via,.*":
+ description: VIA Technologies, Inc.
+ "^virtio,.*":
+ description: Virtual I/O Device Specification, developed by the OASIS consortium
+ "^vishay,.*":
+ description: Vishay Intertechnology, Inc
+ "^vitesse,.*":
+ description: Vitesse Semiconductor Corporation
+ "^vivante,.*":
+ description: Vivante Corporation
+ "^vocore,.*":
+ description: VoCore Studio
+ "^voipac,.*":
+ description: Voipac Technologies s.r.o.
+ "^vot,.*":
+ description: Vision Optical Technology Co., Ltd.
+ "^wd,.*":
+ description: Western Digital Corp.
+ "^wetek,.*":
+ description: WeTek Electronics, limited.
+ "^wexler,.*":
+ description: Wexler
+ "^whwave,.*":
+ description: Shenzhen whwave Electronics, Inc.
+ "^wi2wi,.*":
+ description: Wi2Wi, Inc.
+ "^winbond,.*":
+ description: Winbond Electronics corp.
+ "^winstar,.*":
+ description: Winstar Display Corp.
+ "^wlf,.*":
+ description: Wolfson Microelectronics
+ "^wm,.*":
+ description: Wondermedia Technologies, Inc.
+ "^x-powers,.*":
+ description: X-Powers
+ "^xes,.*":
+ description: Extreme Engineering Solutions (X-ES)
+ "^xillybus,.*":
+ description: Xillybus Ltd.
+ "^xlnx,.*":
+ description: Xilinx
+ "^xunlong,.*":
+ description: Shenzhen Xunlong Software CO.,Limited
+ "^ysoft,.*":
+ description: Y Soft Corporation a.s.
+ "^zarlink,.*":
+ description: Zarlink Semiconductor
+ "^zeitec,.*":
+ description: ZEITEC Semiconductor Co., LTD.
+ "^zidoo,.*":
+ description: Shenzhen Zidoo Technology Co., Ltd.
+ "^zii,.*":
+ description: Zodiac Inflight Innovations
+ "^zte,.*":
+ description: ZTE Corp.
+ "^zyxel,.*":
+ description: ZyXEL Communications Corp.
+
+ # Normal property name match without a comma
+ # These should catch all node/property names without a prefix
+ "^[a-zA-Z0-9#][a-zA-Z0-9+\\-._@]{0,63}$": true
+ "^[a-zA-Z0-9+\\-._]*@[0-9a-zA-Z,]*$": true
+ "^#.*": true
+
+additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt
new file mode 100644
index 000000000000..02b87e92ae68
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-sc-wdt.txt
@@ -0,0 +1,24 @@
+* Freescale i.MX System Controller Watchdog
+
+i.MX system controller watchdog is for i.MX SoCs with system controller inside,
+the watchdog is managed by system controller, users can ONLY communicate with
+system controller from secure mode for watchdog operations, so Linux i.MX system
+controller watchdog driver will call ARM SMC API and trap into ARM-Trusted-Firmware
+for watchdog operations, ARM-Trusted-Firmware is running at secure EL3 mode and
+it will request system controller to execute the watchdog operation passed from
+Linux kernel.
+
+Required properties:
+- compatible: Should be :
+ "fsl,imx8qxp-sc-wdt"
+ followed by "fsl,imx-sc-wdt";
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds.
+
+Examples:
+
+watchdog {
+ compatible = "fsl,imx8qxp-sc-wdt", "fsl,imx-sc-wdt";
+ timeout-sec = <60>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
index 8682d6a93e5b..fd380eb28df5 100644
--- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
@@ -9,6 +9,7 @@ Required properties:
"mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622
"mediatek,mt7623-wdt", "mediatek,mt6589-wdt": for MT7623
"mediatek,mt7629-wdt", "mediatek,mt6589-wdt": for MT7629
+ "mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516
- reg : Specifies base physical address and size of the registers.
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 3043167fc557..1ce7fcd0f989 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -1,10 +1,8 @@
-================================
-GPIO Descriptor Driver Interface
-================================
+=====================
+GPIO Driver Interface
+=====================
-This document serves as a guide for GPIO chip drivers writers. Note that it
-describes the new descriptor-based interface. For a description of the
-deprecated integer-based GPIO interface please refer to gpio-legacy.txt.
+This document serves as a guide for writers of GPIO chip drivers.
Each GPIO controller driver needs to include the following header, which defines
the structures used to define a GPIO driver:
@@ -15,32 +13,49 @@ the structures used to define a GPIO driver:
Internal Representation of GPIOs
================================
-Inside a GPIO driver, individual GPIOs are identified by their hardware number,
-which is a unique number between 0 and n, n being the number of GPIOs managed by
-the chip. This number is purely internal: the hardware number of a particular
-GPIO descriptor is never made visible outside of the driver.
-
-On top of this internal number, each GPIO also need to have a global number in
-the integer GPIO namespace so that it can be used with the legacy GPIO
+A GPIO chip handles one or more GPIO lines. To be considered a GPIO chip, the
+lines must conform to the definition: General Purpose Input/Output. If the
+line is not general purpose, it is not GPIO and should not be handled by a
+GPIO chip. The use case is the indicative: certain lines in a system may be
+called GPIO but serve a very particular purpose thus not meeting the criteria
+of a general purpose I/O. On the other hand a LED driver line may be used as a
+GPIO and should therefore still be handled by a GPIO chip driver.
+
+Inside a GPIO driver, individual GPIO lines are identified by their hardware
+number, sometime also referred to as ``offset``, which is a unique number
+between 0 and n-1, n being the number of GPIOs managed by the chip.
+
+The hardware GPIO number should be something intuitive to the hardware, for
+example if a system uses a memory-mapped set of I/O-registers where 32 GPIO
+lines are handled by one bit per line in a 32-bit register, it makes sense to
+use hardware offsets 0..31 for these, corresponding to bits 0..31 in the
+register.
+
+This number is purely internal: the hardware number of a particular GPIO
+line is never made visible outside of the driver.
+
+On top of this internal number, each GPIO line also needs to have a global
+number in the integer GPIO namespace so that it can be used with the legacy GPIO
interface. Each chip must thus have a "base" number (which can be automatically
-assigned), and for each GPIO the global number will be (base + hardware number).
-Although the integer representation is considered deprecated, it still has many
-users and thus needs to be maintained.
+assigned), and for each GPIO line the global number will be (base + hardware
+number). Although the integer representation is considered deprecated, it still
+has many users and thus needs to be maintained.
-So for example one platform could use numbers 32-159 for GPIOs, with a
+So for example one platform could use global numbers 32-159 for GPIOs, with a
controller defining 128 GPIOs at a "base" of 32 ; while another platform uses
-numbers 0..63 with one set of GPIO controllers, 64-79 with another type of GPIO
-controller, and on one particular board 80-95 with an FPGA. The numbers need not
-be contiguous; either of those platforms could also use numbers 2000-2063 to
-identify GPIOs in a bank of I2C GPIO expanders.
+global numbers 0..63 with one set of GPIO controllers, 64-79 with another type
+of GPIO controller, and on one particular board 80-95 with an FPGA. The legacy
+numbers need not be contiguous; either of those platforms could also use numbers
+2000-2063 to identify GPIO lines in a bank of I2C GPIO expanders.
Controller Drivers: gpio_chip
=============================
In the gpiolib framework each GPIO controller is packaged as a "struct
-gpio_chip" (see linux/gpio/driver.h for its complete definition) with members
-common to each controller of that type:
+gpio_chip" (see <linux/gpio/driver.h> for its complete definition) with members
+common to each controller of that type, these should be assigned by the
+driver code:
- methods to establish GPIO line direction
- methods used to access GPIO line values
@@ -48,12 +63,12 @@ common to each controller of that type:
- method to return the IRQ number associated to a given GPIO line
- flag saying whether calls to its methods may sleep
- optional line names array to identify lines
- - optional debugfs dump method (showing extra state like pullup config)
+ - optional debugfs dump method (showing extra state information)
- optional base number (will be automatically assigned if omitted)
- optional label for diagnostics and GPIO chip mapping using platform data
The code implementing a gpio_chip should support multiple instances of the
-controller, possibly using the driver model. That code will configure each
+controller, preferably using the driver model. That code will configure each
gpio_chip and issue ``gpiochip_add[_data]()`` or ``devm_gpiochip_add_data()``.
Removing a GPIO controller should be rare; use ``[devm_]gpiochip_remove()``
when it is unavoidable.
@@ -62,24 +77,28 @@ Often a gpio_chip is part of an instance-specific structure with states not
exposed by the GPIO interfaces, such as addressing, power management, and more.
Chips such as audio codecs will have complex non-GPIO states.
-Any debugfs dump method should normally ignore signals which haven't been
-requested as GPIOs. They can use gpiochip_is_requested(), which returns either
-NULL or the label associated with that GPIO when it was requested.
+Any debugfs dump method should normally ignore lines which haven't been
+requested. They can use gpiochip_is_requested(), which returns either
+NULL or the label associated with that GPIO line when it was requested.
-RT_FULL: the GPIO driver should not use spinlock_t or any sleepable APIs
-(like PM runtime) in its gpio_chip implementation (.get/.set and direction
-control callbacks) if it is expected to call GPIO APIs from atomic context
-on -RT (inside hard IRQ handlers and similar contexts). Normally this should
-not be required.
+Realtime considerations: the GPIO driver should not use spinlock_t or any
+sleepable APIs (like PM runtime) in its gpio_chip implementation (.get/.set
+and direction control callbacks) if it is expected to call GPIO APIs from
+atomic context on realtime kernels (inside hard IRQ handlers and similar
+contexts). Normally this should not be required.
GPIO electrical configuration
-----------------------------
-GPIOs can be configured for several electrical modes of operation by using the
-.set_config() callback. Currently this API supports setting debouncing and
-single-ended modes (open drain/open source). These settings are described
-below.
+GPIO lines can be configured for several electrical modes of operation by using
+the .set_config() callback. Currently this API supports setting:
+
+- Debouncing
+- Single-ended modes (open drain/open source)
+- Pull up and pull down resistor enablement
+
+These settings are described below.
The .set_config() callback uses the same enumerators and configuration
semantics as the generic pin control drivers. This is not a coincidence: it is
@@ -94,8 +113,8 @@ description needs to provide "GPIO ranges" mapping the GPIO line offsets to pin
numbers on the pin controller so they can properly cross-reference each other.
-GPIOs with debounce support
----------------------------
+GPIO lines with debounce support
+--------------------------------
Debouncing is a configuration set to a pin indicating that it is connected to
a mechanical switch or button, or similar that may bounce. Bouncing means the
@@ -111,8 +130,8 @@ a certain number of milliseconds for debouncing, or just "on/off" if that time
is not configurable.
-GPIOs with open drain/source support
-------------------------------------
+GPIO lines with open drain/source support
+-----------------------------------------
Open drain (CMOS) or open collector (TTL) means the line is not actively driven
high: instead you provide the drain/collector as output, so when the transistor
@@ -132,13 +151,13 @@ This configuration is normally used as a way to achieve one of two things:
- Level-shifting: to reach a logical level higher than that of the silicon
where the output resides.
-- inverse wire-OR on an I/O line, for example a GPIO line, making it possible
+- Inverse wire-OR on an I/O line, for example a GPIO line, making it possible
for any driving stage on the line to drive it low even if any other output
to the same line is simultaneously driving it high. A special case of this
is driving the SCL and SDA lines of an I2C bus, which is by definition a
wire-OR bus.
-Both usecases require that the line be equipped with a pull-up resistor. This
+Both use cases require that the line be equipped with a pull-up resistor. This
resistor will make the line tend to high level unless one of the transistors on
the rail actively pulls it down.
@@ -208,27 +227,91 @@ For open source configuration the same principle is used, just that instead
of actively driving the line low, it is set to input.
+GPIO lines with pull up/down resistor support
+---------------------------------------------
+
+A GPIO line can support pull-up/down using the .set_config() callback. This
+means that a pull up or pull-down resistor is available on the output of the
+GPIO line, and this resistor is software controlled.
+
+In discrete designs, a pull-up or pull-down resistor is simply soldered on
+the circuit board. This is not something we deal or model in software. The
+most you will think about these lines is that they will very likely be
+configured as open drain or open source (see the section above).
+
+The .set_config() callback can only turn pull up or down on and off, and will
+no have any semantic knowledge about the resistance used. It will only say
+switch a bit in a register enabling or disabling pull-up or pull-down.
+
+If the GPIO line supports shunting in different resistance values for the
+pull-up or pull-down resistor, the GPIO chip callback .set_config() will not
+suffice. For these complex use cases, a combined GPIO chip and pin controller
+need to be implemented, as the pin config interface of a pin controller
+supports more versatile control over electrical properties and can handle
+different pull-up or pull-down resistance values.
+
+
GPIO drivers providing IRQs
----------------------------
+===========================
+
It is custom that GPIO drivers (GPIO chips) are also providing interrupts,
most often cascaded off a parent interrupt controller, and in some special
cases the GPIO logic is melded with a SoC's primary interrupt controller.
-The IRQ portions of the GPIO block are implemented using an irqchip, using
+The IRQ portions of the GPIO block are implemented using an irq_chip, using
the header <linux/irq.h>. So basically such a driver is utilizing two sub-
systems simultaneously: gpio and irq.
-RT_FULL: a realtime compliant GPIO driver should not use spinlock_t or any
-sleepable APIs (like PM runtime) as part of its irq_chip implementation.
+It is legal for any IRQ consumer to request an IRQ from any irqchip even if it
+is a combined GPIO+IRQ driver. The basic premise is that gpio_chip and
+irq_chip are orthogonal, and offering their services independent of each
+other.
-* spinlock_t should be replaced with raw_spinlock_t [1].
-* If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
+gpiod_to_irq() is just a convenience function to figure out the IRQ for a
+certain GPIO line and should not be relied upon to have been called before
+the IRQ is used.
+
+Always prepare the hardware and make it ready for action in respective
+callbacks from the GPIO and irq_chip APIs. Do not rely on gpiod_to_irq() having
+been called first.
+
+We can divide GPIO irqchips in two broad categories:
+
+- CASCADED INTERRUPT CHIPS: this means that the GPIO chip has one common
+ interrupt output line, which is triggered by any enabled GPIO line on that
+ chip. The interrupt output line will then be routed to an parent interrupt
+ controller one level up, in the most simple case the systems primary
+ interrupt controller. This is modeled by an irqchip that will inspect bits
+ inside the GPIO controller to figure out which line fired it. The irqchip
+ part of the driver needs to inspect registers to figure this out and it
+ will likely also need to acknowledge that it is handling the interrupt
+ by clearing some bit (sometime implicitly, by just reading a status
+ register) and it will often need to set up the configuration such as
+ edge sensitivity (rising or falling edge, or high/low level interrupt for
+ example).
+
+- HIERARCHICAL INTERRUPT CHIPS: this means that each GPIO line has a dedicated
+ irq line to a parent interrupt controller one level up. There is no need
+ to inquire the GPIO hardware to figure out which line has figured, but it
+ may still be necessary to acknowledge the interrupt and set up the
+ configuration such as edge sensitivity.
+
+Realtime considerations: a realtime compliant GPIO driver should not use
+spinlock_t or any sleepable APIs (like PM runtime) as part of its irqchip
+implementation.
+
+- spinlock_t should be replaced with raw_spinlock_t [1].
+- If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
and .irq_bus_unlock() callbacks, as these are the only slowpath callbacks
on an irqchip. Create the callbacks if needed [2].
-GPIO irqchips usually fall in one of two categories:
-* CHAINED GPIO irqchips: these are usually the type that is embedded on
+Cascaded GPIO irqchips
+----------------------
+
+Cascaded GPIO irqchips usually fall in one of three categories:
+
+- CHAINED CASCADED GPIO IRQCHIPS: these are usually the type that is embedded on
an SoC. This means that there is a fast IRQ flow handler for the GPIOs that
gets called in a chain from the parent IRQ handler, most typically the
system interrupt controller. This means that the GPIO irqchip handler will
@@ -245,16 +328,19 @@ GPIO irqchips usually fall in one of two categories:
struct gpio_chip, as everything happens directly in the callbacks: no
slow bus traffic like I2C can be used.
- RT_FULL: Note, chained IRQ handlers will not be forced threaded on -RT.
- As result, spinlock_t or any sleepable APIs (like PM runtime) can't be used
- in chained IRQ handler.
- If required (and if it can't be converted to the nested threaded GPIO irqchip)
- a chained IRQ handler can be converted to generic irq handler and this way
- it will be a threaded IRQ handler on -RT and a hard IRQ handler on non-RT
- (for example, see [3]).
- Know W/A: The generic_handle_irq() is expected to be called with IRQ disabled,
+ Realtime considerations: Note that chained IRQ handlers will not be forced
+ threaded on -RT. As a result, spinlock_t or any sleepable APIs (like PM
+ runtime) can't be used in a chained IRQ handler.
+
+ If required (and if it can't be converted to the nested threaded GPIO irqchip,
+ see below) a chained IRQ handler can be converted to generic irq handler and
+ this way it will become a threaded IRQ handler on -RT and a hard IRQ handler
+ on non-RT (for example, see [3]).
+
+ The generic_handle_irq() is expected to be called with IRQ disabled,
so the IRQ core will complain if it is called from an IRQ handler which is
- forced to a thread. The "fake?" raw lock can be used to W/A this problem::
+ forced to a thread. The "fake?" raw lock can be used to work around this
+ problem::
raw_spinlock_t wa_lock;
static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank)
@@ -263,7 +349,7 @@ GPIO irqchips usually fall in one of two categories:
generic_handle_irq(irq_find_mapping(bank->chip.irq.domain, bit));
raw_spin_unlock_irqrestore(&bank->wa_lock, wa_lock_flags);
-* GENERIC CHAINED GPIO irqchips: these are the same as "CHAINED GPIO irqchips",
+- GENERIC CHAINED GPIO IRQCHIPS: these are the same as "CHAINED GPIO irqchips",
but chained IRQ handlers are not used. Instead GPIO IRQs dispatching is
performed by generic IRQ handler which is configured using request_irq().
The GPIO irqchip will then end up calling something like this sequence in
@@ -273,16 +359,19 @@ GPIO irqchips usually fall in one of two categories:
for each detected GPIO IRQ
generic_handle_irq(...);
- RT_FULL: Such kind of handlers will be forced threaded on -RT, as result IRQ
- core will complain that generic_handle_irq() is called with IRQ enabled and
- the same W/A as for "CHAINED GPIO irqchips" can be applied.
+ Realtime considerations: this kind of handlers will be forced threaded on -RT,
+ and as result the IRQ core will complain that generic_handle_irq() is called
+ with IRQ enabled and the same work around as for "CHAINED GPIO irqchips" can
+ be applied.
+
+- NESTED THREADED GPIO IRQCHIPS: these are off-chip GPIO expanders and any
+ other GPIO irqchip residing on the other side of a sleeping bus such as I2C
+ or SPI.
-* NESTED THREADED GPIO irqchips: these are off-chip GPIO expanders and any
- other GPIO irqchip residing on the other side of a sleeping bus. Of course
- such drivers that need slow bus traffic to read out IRQ status and similar,
- traffic which may in turn incur other IRQs to happen, cannot be handled
- in a quick IRQ handler with IRQs disabled. Instead they need to spawn a
- thread and then mask the parent IRQ line until the interrupt is handled
+ Of course such drivers that need slow bus traffic to read out IRQ status and
+ similar, traffic which may in turn incur other IRQs to happen, cannot be
+ handled in a quick IRQ handler with IRQs disabled. Instead they need to spawn
+ a thread and then mask the parent IRQ line until the interrupt is handled
by the driver. The hallmark of this driver is to call something like
this in its interrupt handler::
@@ -294,36 +383,46 @@ GPIO irqchips usually fall in one of two categories:
flag on struct gpio_chip to true, indicating that this chip may sleep
when accessing the GPIOs.
+ These kinds of irqchips are inherently realtime tolerant as they are
+ already set up to handle sleeping contexts.
+
+
+Infrastructure helpers for GPIO irqchips
+----------------------------------------
+
To help out in handling the set-up and management of GPIO irqchips and the
associated irqdomain and resource allocation callbacks, the gpiolib has
some helpers that can be enabled by selecting the GPIOLIB_IRQCHIP Kconfig
symbol:
-* gpiochip_irqchip_add(): adds a chained irqchip to a gpiochip. It will pass
- the struct gpio_chip* for the chip to all IRQ callbacks, so the callbacks
- need to embed the gpio_chip in its state container and obtain a pointer
- to the container using container_of().
+- gpiochip_irqchip_add(): adds a chained cascaded irqchip to a gpiochip. It
+ will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the
+ callbacks need to embed the gpio_chip in its state container and obtain a
+ pointer to the container using container_of().
(See Documentation/driver-model/design-patterns.txt)
-* gpiochip_irqchip_add_nested(): adds a nested irqchip to a gpiochip.
+- gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
+ as discussed above regarding different types of cascaded irqchips. The
+ cascaded irq has to be handled by a threaded interrupt handler.
Apart from that it works exactly like the chained irqchip.
-* gpiochip_set_chained_irqchip(): sets up a chained irq handler for a
+- gpiochip_set_chained_irqchip(): sets up a chained cascaded irq handler for a
gpio_chip from a parent IRQ and passes the struct gpio_chip* as handler
- data. (Notice handler data, since the irqchip data is likely used by the
- parent irqchip!).
+ data. Notice that we pass is as the handler data, since the irqchip data is
+ likely used by the parent irqchip.
-* gpiochip_set_nested_irqchip(): sets up a nested irq handler for a
+- gpiochip_set_nested_irqchip(): sets up a nested cascaded irq handler for a
gpio_chip from a parent IRQ. As the parent IRQ has usually been
explicitly requested by the driver, this does very little more than
mark all the child IRQs as having the other IRQ as parent.
-If there is a need to exclude certain GPIOs from the IRQ domain, you can
-set .irq.need_valid_mask of the gpiochip before gpiochip_add_data() is
-called. This allocates an .irq.valid_mask with as many bits set as there
-are GPIOs in the chip. Drivers can exclude GPIOs by clearing bits from this
-mask. The mask must be filled in before gpiochip_irqchip_add() or
-gpiochip_irqchip_add_nested() is called.
+If there is a need to exclude certain GPIO lines from the IRQ domain handled by
+these helpers, we can set .irq.need_valid_mask of the gpiochip before
+[devm_]gpiochip_add_data() is called. This allocates an .irq.valid_mask with as
+many bits set as there are GPIO lines in the chip, each bit representing line
+0..n-1. Drivers can exclude GPIO lines by clearing bits from this mask. The mask
+must be filled in before gpiochip_irqchip_add() or gpiochip_irqchip_add_nested()
+is called.
To use the helpers please keep the following in mind:
@@ -333,33 +432,24 @@ To use the helpers please keep the following in mind:
- Nominally set all handlers to handle_bad_irq() in the setup call and pass
handle_bad_irq() as flow handler parameter in gpiochip_irqchip_add() if it is
- expected for GPIO driver that irqchip .set_type() callback have to be called
- before using/enabling GPIO IRQ. Then set the handler to handle_level_irq()
- and/or handle_edge_irq() in the irqchip .set_type() callback depending on
- what your controller supports.
+ expected for GPIO driver that irqchip .set_type() callback will be called
+ before using/enabling each GPIO IRQ. Then set the handler to
+ handle_level_irq() and/or handle_edge_irq() in the irqchip .set_type()
+ callback depending on what your controller supports and what is requested
+ by the consumer.
-It is legal for any IRQ consumer to request an IRQ from any irqchip no matter
-if that is a combined GPIO+IRQ driver. The basic premise is that gpio_chip and
-irq_chip are orthogonal, and offering their services independent of each
-other.
-
-gpiod_to_irq() is just a convenience function to figure out the IRQ for a
-certain GPIO line and should not be relied upon to have been called before
-the IRQ is used.
-So always prepare the hardware and make it ready for action in respective
-callbacks from the GPIO and irqchip APIs. Do not rely on gpiod_to_irq() having
-been called first.
+Locking IRQ usage
+-----------------
-This orthogonality leads to ambiguities that we need to solve: if there is
-competition inside the subsystem which side is using the resource (a certain
-GPIO line and register for example) it needs to deny certain operations and
-keep track of usage inside of the gpiolib subsystem. This is why the API
-below exists.
+Since GPIO and irq_chip are orthogonal, we can get conflicts between different
+use cases. For example a GPIO line used for IRQs should be an input line,
+it does not make sense to fire interrupts on an output GPIO.
+If there is competition inside the subsystem which side is using the
+resource (a certain GPIO line and register for example) it needs to deny
+certain operations and keep track of usage inside of the gpiolib subsystem.
-Locking IRQ usage
------------------
Input GPIOs can be used as IRQ signals. When this happens, a driver is requested
to mark the GPIO as being used as an IRQ::
@@ -380,9 +470,15 @@ assigned.
Disabling and enabling IRQs
---------------------------
+
+In some (fringe) use cases, a driver may be using a GPIO line as input for IRQs,
+but occasionally switch that line over to drive output and then back to being
+an input with interrupts again. This happens on things like CEC (Consumer
+Electronics Control).
+
When a GPIO is used as an IRQ signal, then gpiolib also needs to know if
the IRQ is enabled or disabled. In order to inform gpiolib about this,
-a driver should call::
+the irqchip driver should call::
void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset)
@@ -398,40 +494,45 @@ irqchip.
When using the gpiolib irqchip helpers, these callbacks are automatically
assigned.
+
Real-Time compliance for GPIO IRQ chips
---------------------------------------
-Any provider of irqchips needs to be carefully tailored to support Real Time
+Any provider of irqchips needs to be carefully tailored to support Real-Time
preemption. It is desirable that all irqchips in the GPIO subsystem keep this
in mind and do the proper testing to assure they are real time-enabled.
-So, pay attention on above " RT_FULL:" notes, please.
-The following is a checklist to follow when preparing a driver for real
-time-compliance:
-- ensure spinlock_t is not used as part irq_chip implementation;
-- ensure that sleepable APIs are not used as part irq_chip implementation.
+So, pay attention on above realtime considerations in the documentation.
+
+The following is a checklist to follow when preparing a driver for real-time
+compliance:
+
+- ensure spinlock_t is not used as part irq_chip implementation
+- ensure that sleepable APIs are not used as part irq_chip implementation
If sleepable APIs have to be used, these can be done from the .irq_bus_lock()
- and .irq_bus_unlock() callbacks;
+ and .irq_bus_unlock() callbacks
- Chained GPIO irqchips: ensure spinlock_t or any sleepable APIs are not used
- from chained IRQ handler;
+ from the chained IRQ handler
- Generic chained GPIO irqchips: take care about generic_handle_irq() calls and
- apply corresponding W/A;
-- Chained GPIO irqchips: get rid of chained IRQ handler and use generic irq
- handler if possible :)
-- regmap_mmio: Sry, but you are in trouble :( if MMIO regmap is used as for
- GPIO IRQ chip implementation;
-- Test your driver with the appropriate in-kernel real time test cases for both
- level and edge IRQs.
+ apply corresponding work-around
+- Chained GPIO irqchips: get rid of the chained IRQ handler and use generic irq
+ handler if possible
+- regmap_mmio: it is possible to disable internal locking in regmap by setting
+ .disable_locking and handling the locking in the GPIO driver
+- Test your driver with the appropriate in-kernel real-time test cases for both
+ level and edge IRQs
+
+* [1] http://www.spinics.net/lists/linux-omap/msg120425.html
+* [2] https://lkml.org/lkml/2015/9/25/494
+* [3] https://lkml.org/lkml/2015/9/25/495
Requesting self-owned GPIO pins
--------------------------------
+===============================
Sometimes it is useful to allow a GPIO chip driver to request its own GPIO
-descriptors through the gpiolib API. Using gpio_request() for this purpose
-does not help since it pins the module to the kernel forever (it calls
-try_module_get()). A GPIO driver can use the following functions instead
-to request and free descriptors without being pinned to the kernel forever::
+descriptors through the gpiolib API. A GPIO driver can use the following
+functions to request and free descriptors::
struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc,
u16 hwnum,
@@ -446,7 +547,3 @@ gpiochip_free_own_desc().
These functions must be used with care since they do not affect module use
count. Do not use the functions to request gpio descriptors not owned by the
calling driver.
-
-* [1] http://www.spinics.net/lists/linux-omap/msg120425.html
-* [2] https://lkml.org/lkml/2015/9/25/494
-* [3] https://lkml.org/lkml/2015/9/25/495
diff --git a/Documentation/filesystems/autofs-mount-control.txt b/Documentation/filesystems/autofs-mount-control.txt
index 45edad6933cc..acc02fc57993 100644
--- a/Documentation/filesystems/autofs-mount-control.txt
+++ b/Documentation/filesystems/autofs-mount-control.txt
@@ -354,8 +354,10 @@ this ioctl is called until no further expire candidates are found.
The call requires an initialized struct autofs_dev_ioctl with the
ioctlfd field set to the descriptor obtained from the open call. In
-addition an immediate expire, independent of the mount timeout, can be
-requested by setting the how field of struct args_expire to 1. If no
+addition an immediate expire that's independent of the mount timeout,
+and a forced expire that's independent of whether the mount is busy,
+can be requested by setting the how field of struct args_expire to
+AUTOFS_EXP_IMMEDIATE or AUTOFS_EXP_FORCED, respectively . If no
expire candidates can be found the ioctl returns -1 with errno set to
EAGAIN.
diff --git a/Documentation/filesystems/autofs.txt b/Documentation/filesystems/autofs.txt
index 373ad25852d3..3af38c7fd26d 100644
--- a/Documentation/filesystems/autofs.txt
+++ b/Documentation/filesystems/autofs.txt
@@ -116,7 +116,7 @@ that purpose there is another flag.
**DCACHE_MANAGE_TRANSIT**
If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
-related behaviors are invoked, both using the `d_op->d_manage()`
+related behaviours are invoked, both using the `d_op->d_manage()`
dentry operation.
Firstly, before checking to see if any filesystem is mounted on the
@@ -193,8 +193,8 @@ VFS remain in RCU-walk mode, but can only tell it to get out of
RCU-walk mode by returning `-ECHILD`.
So `d_manage()`, when called with `rcu_walk` set, should either return
--ECHILD if there is any reason to believe it is unsafe to end the
-mounted filesystem, and otherwise should return 0.
+-ECHILD if there is any reason to believe it is unsafe to enter the
+mounted filesystem, otherwise it should return 0.
autofs will return `-ECHILD` if an expiry of the filesystem has been
initiated or is being considered, otherwise it returns 0.
@@ -210,7 +210,7 @@ mounts that were created by `d_automount()` returning a filesystem to be
mounted. As autofs doesn't return such a filesystem but leaves the
mounting to the automount daemon, it must involve the automount daemon
in unmounting as well. This also means that autofs has more control
-of expiry.
+over expiry.
The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
the `umount` system call. Unmounting with MNT_EXPIRE will fail unless
@@ -225,7 +225,7 @@ unmount any filesystems mounted on the autofs filesystem or remove any
symbolic links or empty directories any time it likes. If the unmount
or removal is successful the filesystem will be returned to the state
it was before the mount or creation, so that any access of the name
-will trigger normal auto-mount processing. In particlar, `rmdir` and
+will trigger normal auto-mount processing. In particular, `rmdir` and
`unlink` do not leave negative entries in the dcache as a normal
filesystem would, so an attempt to access a recently-removed object is
passed to autofs for handling.
@@ -240,11 +240,18 @@ Normally the daemon only wants to remove entries which haven't been
used for a while. For this purpose autofs maintains a "`last_used`"
time stamp on each directory or symlink. For symlinks it genuinely
does record the last time the symlink was "used" or followed to find
-out where it points to. For directories the field is a slight
-misnomer. It actually records the last time that autofs checked if
-the directory or one of its descendents was busy and found that it
-was. This is just as useful and doesn't require updating the field so
-often.
+out where it points to. For directories the field is used slightly
+differently. The field is updated at mount time and during expire
+checks if it is found to be in use (ie. open file descriptor or
+process working directory) and during path walks. The update done
+during path walks prevents frequent expire and immediate mount of
+frequently accessed automounts. But in the case where a GUI continually
+access or an application frequently scans an autofs directory tree
+there can be an accumulation of mounts that aren't actually being
+used. To cater for this case the "`strictexpire`" autofs mount option
+can be used to avoid the "`last_used`" update on path walk thereby
+preventing this apparent inability to expire mounts that aren't
+really in use.
The daemon is able to ask autofs if anything is due to be expired,
using an `ioctl` as discussed later. For a *direct* mount, autofs
@@ -255,8 +262,12 @@ up.
There is an option with indirect mounts to consider each of the leaves
that has been mounted on instead of considering the top-level names.
-This is intended for compatability with version 4 of autofs and should
-be considered as deprecated.
+This was originally intended for compatibility with version 4 of autofs
+and should be considered as deprecated for Sun Format automount maps.
+However, it may be used again for amd format mount maps (which are
+generally indirect maps) because the amd automounter allows for the
+setting of an expire timeout for individual mounts. But there are
+some difficulties in making the needed changes for this.
When autofs considers a directory it checks the `last_used` time and
compares it with the "timeout" value set when the filesystem was
@@ -273,7 +284,7 @@ mounts. If it finds something in the root directory to expire it will
return the name of that thing. Once a name has been returned the
automount daemon needs to unmount any filesystems mounted below the
name normally. As described above, this is unsafe for non-toplevel
-mounts in a version-5 autofs. For this reason the current `automountd`
+mounts in a version-5 autofs. For this reason the current `automount(8)`
does not use this ioctl.
The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
@@ -345,7 +356,7 @@ The `wait_queue_token` is a unique number which can identify a
particular request to be acknowledged. When a message is sent over
the pipe the affected dentry is marked as either "active" or
"expiring" and other accesses to it block until the message is
-acknowledged using one of the ioctls below and the relevant
+acknowledged using one of the ioctls below with the relevant
`wait_queue_token`.
Communicating with autofs: root directory ioctls
@@ -367,15 +378,14 @@ The available ioctl commands are:
This mode is also entered if a write to the pipe fails.
- **AUTOFS_IOC_PROTOVER**: This returns the protocol version in use.
- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which
- is really a version number for the implementation. It is
- currently 2.
+ is really a version number for the implementation.
- **AUTOFS_IOC_SETTIMEOUT**: This passes a pointer to an unsigned
long. The value is used to set the timeout for expiry, and
the current timeout value is stored back through the pointer.
- **AUTOFS_IOC_ASKUMOUNT**: Returns, in the pointed-to `int`, 1 if
the filesystem could be unmounted. This is only a hint as
the situation could change at any instant. This call can be
- use to avoid a more expensive full unmount attempt.
+ used to avoid a more expensive full unmount attempt.
- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is
anything suitable to expire. A pointer to a packet:
@@ -400,6 +410,11 @@ The available ioctl commands are:
**AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
and objects are expired if the are not in use.
+ **AUTOFS_EXP_FORCED** causes the in use status to be ignored
+ and objects are expired ieven if they are in use. This assumes
+ that the daemon has requested this because it is capable of
+ performing the umount.
+
**AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
name to expire. This is only safe when *maxproto* is 4.
@@ -415,7 +430,7 @@ which can be used to communicate directly with the autofs filesystem.
It requires CAP_SYS_ADMIN for access.
The `ioctl`s that can be used on this device are described in a separate
-document `autofs-mount-control.txt`, and are summarized briefly here.
+document `autofs-mount-control.txt`, and are summarised briefly here.
Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
struct autofs_dev_ioctl {
@@ -511,6 +526,21 @@ directories.
Catatonic mode can only be left via the
**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
+The "ignore" mount option
+-------------------------
+
+The "ignore" mount option can be used to provide a generic indicator
+to applications that the mount entry should be ignored when displaying
+mount information.
+
+In other OSes that provide autofs and that provide a mount list to user
+space based on the kernel mount list a no-op mount option ("ignore" is
+the one use on the most common OSes) is allowed so that autofs file
+system users can optionally use it.
+
+This is intended to be used by user space programs to exclude autofs
+mounts from consideration when reading the mounts list.
+
autofs, name spaces, and shared mounts
--------------------------------------
diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
index 1351984e767c..febccbc5689d 100644
--- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
@@ -45,8 +45,8 @@ the ANOD object which is also the final target node of the reference.
Name (_DSD, Package () {
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
Package () {
- Package () { "node@0", NOD0 },
- Package () { "node@1", NOD1 },
+ Package () { "node@0", "NOD0" },
+ Package () { "node@1", "NOD1" },
}
})
Name (NOD0, Package() {
@@ -58,7 +58,7 @@ the ANOD object which is also the final target node of the reference.
Name (NOD1, Package() {
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
Package () {
- Package () { "anothernode", ANOD },
+ Package () { "anothernode", "ANOD" },
}
})
Name (ANOD, Package() {
diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst
index e0baed35b037..1a6ce7afba5e 100644
--- a/Documentation/firmware-guide/acpi/dsd/graph.rst
+++ b/Documentation/firmware-guide/acpi/dsd/graph.rst
@@ -45,7 +45,7 @@ with "port" and must be followed by the "@" character and the number of the
port as its key. The target object it refers to should be called "PRTX", where
"X" is the number of the port. An example of such a package would be::
- Package() { "port@4", PRT4 }
+ Package() { "port@4", "PRT4" }
Further on, endpoints are located under the port nodes. The hierarchical
data extension key of the endpoint nodes must begin with
@@ -54,7 +54,7 @@ endpoint. The object it refers to should be called "EPXY", where "X" is the
number of the port and "Y" is the number of the endpoint. An example of such a
package would be::
- Package() { "endpoint@0", EP40 }
+ Package() { "endpoint@0", "EP40" }
Each port node contains a property extension key "port", the value of which is
the number of the port. Each endpoint is similarly numbered with a property
@@ -82,68 +82,68 @@ A simple example of this is show below::
Scope (\_SB.PCI0.I2C2)
{
- Device (CAM0)
- {
- Name (_DSD, Package () {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "compatible", Package () { "nokia,smia" } },
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "port@0", PRT0 },
- }
- })
- Name (PRT0, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "endpoint@0", EP00 },
- }
- })
- Name (EP00, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
- }
- })
- }
+ Device (CAM0)
+ {
+ Name (_DSD, Package () {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () { "compatible", Package () { "nokia,smia" } },
+ },
+ ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+ Package () {
+ Package () { "port@0", "PRT0" },
+ }
+ })
+ Name (PRT0, Package() {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () { "reg", 0 },
+ },
+ ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+ Package () {
+ Package () { "endpoint@0", "EP00" },
+ }
+ })
+ Name (EP00, Package() {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () { "reg", 0 },
+ Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
+ }
+ })
+ }
}
Scope (\_SB.PCI0)
{
- Device (ISP)
- {
- Name (_DSD, Package () {
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "port@4", PRT4 },
- }
- })
-
- Name (PRT4, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 4 }, /* CSI-2 port number */
- },
- ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
- Package () {
- Package () { "endpoint@0", EP40 },
- }
- })
-
- Name (EP40, Package() {
- ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
- Package () {
- Package () { "reg", 0 },
- Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
- }
- })
- }
+ Device (ISP)
+ {
+ Name (_DSD, Package () {
+ ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+ Package () {
+ Package () { "port@4", "PRT4" },
+ }
+ })
+
+ Name (PRT4, Package() {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () { "reg", 4 }, /* CSI-2 port number */
+ },
+ ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
+ Package () {
+ Package () { "endpoint@0", "EP40" },
+ }
+ })
+
+ Name (EP40, Package() {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () { "reg", 0 },
+ Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
+ }
+ })
+ }
}
Here, the port 0 of the "CAM0" device is connected to the port 4 of
diff --git a/Documentation/gpio/index.rst b/Documentation/gpio/index.rst
new file mode 100644
index 000000000000..09a4a553f434
--- /dev/null
+++ b/Documentation/gpio/index.rst
@@ -0,0 +1,17 @@
+:orphan:
+
+====
+gpio
+====
+
+.. toctree::
+ :maxdepth: 1
+
+ sysfs
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/gpio/sysfs.txt b/Documentation/gpio/sysfs.rst
index 58eeab81f349..ec09ffd983e7 100644
--- a/Documentation/gpio/sysfs.txt
+++ b/Documentation/gpio/sysfs.rst
@@ -1,10 +1,12 @@
GPIO Sysfs Interface for Userspace
==================================
-THIS ABI IS DEPRECATED, THE ABI DOCUMENTATION HAS BEEN MOVED TO
-Documentation/ABI/obsolete/sysfs-gpio AND NEW USERSPACE CONSUMERS
-ARE SUPPOSED TO USE THE CHARACTER DEVICE ABI. THIS OLD SYSFS ABI WILL
-NOT BE DEVELOPED (NO NEW FEATURES), IT WILL JUST BE MAINTAINED.
+.. warning::
+
+ THIS ABI IS DEPRECATED, THE ABI DOCUMENTATION HAS BEEN MOVED TO
+ Documentation/ABI/obsolete/sysfs-gpio AND NEW USERSPACE CONSUMERS
+ ARE SUPPOSED TO USE THE CHARACTER DEVICE ABI. THIS OLD SYSFS ABI WILL
+ NOT BE DEVELOPED (NO NEW FEATURES), IT WILL JUST BE MAINTAINED.
Refer to the examples in tools/gpio/* for an introduction to the new
character device ABI. Also see the userspace header in
@@ -51,13 +53,15 @@ The control interfaces are write-only:
/sys/class/gpio/
- "export" ... Userspace may ask the kernel to export control of
+ "export" ...
+ Userspace may ask the kernel to export control of
a GPIO to userspace by writing its number to this file.
Example: "echo 19 > export" will create a "gpio19" node
for GPIO #19, if that's not requested by kernel code.
- "unexport" ... Reverses the effect of exporting to userspace.
+ "unexport" ...
+ Reverses the effect of exporting to userspace.
Example: "echo 19 > unexport" will remove a "gpio19"
node exported using the "export" file.
@@ -67,7 +71,8 @@ and have the following read/write attributes:
/sys/class/gpio/gpioN/
- "direction" ... reads as either "in" or "out". This value may
+ "direction" ...
+ reads as either "in" or "out". This value may
normally be written. Writing as "out" defaults to
initializing the value as low. To ensure glitch free
operation, values "low" and "high" may be written to
@@ -78,7 +83,8 @@ and have the following read/write attributes:
it was exported by kernel code that didn't explicitly
allow userspace to reconfigure this GPIO's direction.
- "value" ... reads as either 0 (low) or 1 (high). If the GPIO
+ "value" ...
+ reads as either 0 (low) or 1 (high). If the GPIO
is configured as an output, this value may be written;
any nonzero value is treated as high.
@@ -92,14 +98,16 @@ and have the following read/write attributes:
file and read the new value or close the file and re-open it
to read the value.
- "edge" ... reads as either "none", "rising", "falling", or
+ "edge" ...
+ reads as either "none", "rising", "falling", or
"both". Write these strings to select the signal edge(s)
that will make poll(2) on the "value" file return.
This file exists only if the pin can be configured as an
interrupt generating input pin.
- "active_low" ... reads as either 0 (false) or 1 (true). Write
+ "active_low" ...
+ reads as either 0 (false) or 1 (true). Write
any nonzero value to invert the value attribute both
for reading and writing. Existing and subsequent
poll(2) support configuration via the edge attribute
@@ -112,11 +120,14 @@ read-only attributes:
/sys/class/gpio/gpiochipN/
- "base" ... same as N, the first GPIO managed by this chip
+ "base" ...
+ same as N, the first GPIO managed by this chip
- "label" ... provided for diagnostics (not always unique)
+ "label" ...
+ provided for diagnostics (not always unique)
- "ngpio" ... how many GPIOs this manages (N to N + ngpio - 1)
+ "ngpio" ...
+ how many GPIOs this manages (N to N + ngpio - 1)
Board documentation should in most cases cover what GPIOs are used for
what purposes. However, those numbers are not always stable; GPIOs on
@@ -129,7 +140,7 @@ the correct GPIO number to use for a given signal.
Exporting from Kernel code
--------------------------
Kernel code can explicitly manage exports of GPIOs which have already been
-requested using gpio_request():
+requested using gpio_request()::
/* export the GPIO to userspace */
int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
diff --git a/Documentation/index.rst b/Documentation/index.rst
index fec80fee512a..a7566ef62411 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -112,7 +112,9 @@ implementation.
.. toctree::
:maxdepth: 2
+ x86/index
sh/index
+ x86/index
Filesystem Documentation
------------------------
diff --git a/Documentation/media/uapi/v4l/field-order.rst b/Documentation/media/uapi/v4l/field-order.rst
index 3fb473e3b8e2..d640e922a974 100644
--- a/Documentation/media/uapi/v4l/field-order.rst
+++ b/Documentation/media/uapi/v4l/field-order.rst
@@ -75,12 +75,11 @@ enum v4l2_field
* - ``V4L2_FIELD_ANY``
- 0
- - Applications request this field order when any one of the
- ``V4L2_FIELD_NONE``, ``V4L2_FIELD_TOP``, ``V4L2_FIELD_BOTTOM``, or
- ``V4L2_FIELD_INTERLACED`` formats is acceptable. Drivers choose
- depending on hardware capabilities or e. g. the requested image
- size, and return the actual field order. Drivers must never return
- ``V4L2_FIELD_ANY``. If multiple field orders are possible the
+ - Applications request this field order when any field format
+ is acceptable. Drivers choose depending on hardware capabilities or
+ e.g. the requested image size, and return the actual field order.
+ Drivers must never return ``V4L2_FIELD_ANY``.
+ If multiple field orders are possible the
driver must choose one of the possible field orders during
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` or
:ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>`. struct
@@ -88,9 +87,8 @@ enum v4l2_field
``V4L2_FIELD_ANY``.
* - ``V4L2_FIELD_NONE``
- 1
- - Images are in progressive format, not interlaced. The driver may
- also indicate this order when it cannot distinguish between
- ``V4L2_FIELD_TOP`` and ``V4L2_FIELD_BOTTOM``.
+ - Images are in progressive (frame-based) format, not interlaced
+ (field-based).
* - ``V4L2_FIELD_TOP``
- 2
- Images consist of the top (aka odd) field only.
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index cd7303d7fa25..180e07d956a7 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -796,7 +796,9 @@ The kernel interface functions are as follows:
s64 tx_total_len,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
- bool upgrade);
+ bool upgrade,
+ bool intr,
+ unsigned int debug_id);
This allocates the infrastructure to make a new RxRPC call and assigns
call and connection numbers. The call will be made on the UDP port that
@@ -824,6 +826,13 @@ The kernel interface functions are as follows:
the server upgrade the service to a better one. The resultant service ID
is returned by rxrpc_kernel_recv_data().
+ intr should be set to true if the call should be interruptible. If this
+ is not set, this function may not return until a channel has been
+ allocated; if it is set, the function may return -ERESTARTSYS.
+
+ debug_id is the call debugging ID to be used for tracing. This can be
+ obtained by atomically incrementing rxrpc_debug_id.
+
If this function is successful, an opaque reference to the RxRPC call is
returned. The caller now holds a reference on this and it must be
properly ended.
@@ -1056,6 +1065,16 @@ The kernel interface functions are as follows:
This value can be used to determine if the remote client has been
restarted as it shouldn't change otherwise.
+ (*) Set the maxmimum lifespan on a call.
+
+ void rxrpc_kernel_set_max_life(struct socket *sock,
+ struct rxrpc_call *call,
+ unsigned long hard_timeout)
+
+ This sets the maximum lifespan on a call to hard_timeout (which is in
+ jiffies). In the event of the timeout occurring, the call will be
+ aborted and -ETIME or -ETIMEDOUT will be returned.
+
=======================
CONFIGURABLE PARAMETERS
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 3f13d8599337..749322060f10 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
- stat_refresh
- numa_stat
- swappiness
+- unprivileged_userfaultfd
- user_reserve_kbytes
- vfs_cache_pressure
- watermark_boost_factor
@@ -818,6 +819,17 @@ The default value is 60.
==============================================================
+unprivileged_userfaultfd
+
+This flag controls whether unprivileged users can use the userfaultfd
+system calls. Set this to 1 to allow unprivileged users to use the
+userfaultfd system calls, or set this to 0 to restrict userfaultfd to only
+privileged users (with SYS_CAP_PTRACE capability).
+
+The default value is 1.
+
+==============================================================
+
- user_reserve_kbytes
When overcommit_memory is set to 2, "never overcommit" mode, reserve
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index c3b9bd2fd512..f60079259669 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -765,6 +765,37 @@ Here is the list of current tracers that may be configured.
tracers from tracing simply echo "nop" into
current_tracer.
+Error conditions
+----------------
+
+ For most ftrace commands, failure modes are obvious and communicated
+ using standard return codes.
+
+ For other more involved commands, extended error information may be
+ available via the tracing/error_log file. For the commands that
+ support it, reading the tracing/error_log file after an error will
+ display more detailed information about what went wrong, if
+ information is available. The tracing/error_log file is a circular
+ error log displaying a small number (currently, 8) of ftrace errors
+ for the last (8) failed commands.
+
+ The extended error information and usage takes the form shown in
+ this example::
+
+ # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
+ echo: write error: Invalid argument
+
+ # cat /sys/kernel/debug/tracing/error_log
+ [ 5348.887237] location: error: Couldn't yyy: zzz
+ Command: xxx
+ ^
+ [ 7517.023364] location: error: Bad rrr: sss
+ Command: ppp qqq
+ ^
+
+ To clear the error log, echo the empty string into it::
+
+ # echo > /sys/kernel/debug/tracing/error_log
Examples of using the tracer
----------------------------
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index f95d94d19c22..fb621a1c2638 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -199,20 +199,8 @@ Extended error information
For some error conditions encountered when invoking a hist trigger
command, extended error information is available via the
- corresponding event's 'hist' file. Reading the hist file after an
- error will display more detailed information about what went wrong,
- if information is available. This extended error information will
- be available until the next hist trigger command for that event.
-
- If available for a given error condition, the extended error
- information and usage takes the following form::
-
- # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
- echo: write error: Invalid argument
-
- # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
- ERROR: Couldn't yyy: zzz
- Last command: xxx
+ tracing/error_log file. See Error Conditions in
+ :file:`Documentation/trace/ftrace.rst` for details.
6.2 'hist' trigger examples
---------------------------
@@ -1915,7 +1903,10 @@ The following commonly-used handler.action pairs are available:
The 'matching.event' specification is simply the fully qualified
event name of the event that matches the target event for the
- onmatch() functionality, in the form 'system.event_name'.
+ onmatch() functionality, in the form 'system.event_name'. Histogram
+ keys of both events are compared to find if events match. In case
+ multiple histogram keys are used, they all must match in the specified
+ order.
Finally, the number and type of variables/fields in the 'param
list' must match the number and types of the fields in the
@@ -1978,9 +1969,9 @@ The following commonly-used handler.action pairs are available:
/sys/kernel/debug/tracing/events/sched/sched_waking/trigger
Then, when the corresponding thread is actually scheduled onto the
- CPU by a sched_switch event, calculate the latency and use that
- along with another variable and an event field to generate a
- wakeup_latency synthetic event::
+ CPU by a sched_switch event (saved_pid matches next_pid), calculate
+ the latency and use that along with another variable and an event field
+ to generate a wakeup_latency synthetic event::
# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\
onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 66bfd8396877..995da15b16ca 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -113,7 +113,7 @@ my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)';
my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)';
my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)';
-my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
+my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate_anon=([0-9]*) nr_activate_file=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)';
@@ -212,7 +212,8 @@ $regex_lru_shrink_inactive = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_shrink_inactive",
$regex_lru_shrink_inactive_default,
"nid", "nr_scanned", "nr_reclaimed", "nr_dirty", "nr_writeback",
- "nr_congested", "nr_immediate", "nr_activate", "nr_ref_keep",
+ "nr_congested", "nr_immediate", "nr_activate_anon",
+ "nr_activate_file", "nr_ref_keep",
"nr_unmap_fail", "priority", "flags");
$regex_lru_shrink_active = generate_traceevent_regex(
"vmscan/mm_vmscan_lru_shrink_active",
@@ -407,7 +408,7 @@ EVENT_PROCESS:
}
my $nr_reclaimed = $3;
- my $flags = $12;
+ my $flags = $13;
my $file = 0;
if ($flags =~ /RECLAIM_WB_FILE/) {
$file = 1;
diff --git a/Documentation/translations/it_IT/process/license-rules.rst b/Documentation/translations/it_IT/process/license-rules.rst
index 91a8794ffd79..f058e06996dc 100644
--- a/Documentation/translations/it_IT/process/license-rules.rst
+++ b/Documentation/translations/it_IT/process/license-rules.rst
@@ -249,13 +249,13 @@ essere categorizzate in:
|
-2. Licenze non raccomandate:
+2. Licenze deprecate:
Questo tipo di licenze dovrebbero essere usate solo per codice già esistente
o quando si prende codice da altri progetti. Le licenze sono disponibili
nei sorgenti del kernel nella cartella::
- LICENSES/other/
+ LICENSES/deprecated/
I file in questa cartella contengono il testo completo della licenza e i
`Metatag`_. Il nome di questi file è lo stesso usato come identificatore
@@ -263,14 +263,14 @@ essere categorizzate in:
Esempi::
- LICENSES/other/ISC
+ LICENSES/deprecated/ISC
Contiene il testo della licenza Internet System Consortium e i suoi
metatag::
- LICENSES/other/ZLib
+ LICENSES/deprecated/GPL-1.0
- Contiene il testo della licenza ZLIB e i suoi metatag.
+ Contiene il testo della versione 1 della licenza GPL e i suoi metatag.
Metatag:
@@ -294,7 +294,55 @@ essere categorizzate in:
|
-3. _`Eccezioni`:
+3. Solo per doppie licenze
+
+ Queste licenze dovrebbero essere usate solamente per codice licenziato in
+ combinazione con un'altra licenza che solitamente è quella preferita.
+ Queste licenze sono disponibili nei sorgenti del kernel nella cartella::
+
+ LICENSES/dual
+
+ I file in questa cartella contengono il testo completo della rispettiva
+ licenza e i suoi `Metatags`_. I nomi dei file sono identici agli
+ identificatori di licenza SPDX che dovrebbero essere usati nei file
+ sorgenti.
+
+ Esempi::
+
+ LICENSES/dual/MPL-1.1
+
+ Questo file contiene il testo della versione 1.1 della licenza *Mozilla
+ Pulic License* e i metatag necessari::
+
+ LICENSES/dual/Apache-2.0
+
+ Questo file contiene il testo della versione 2.0 della licenza Apache e i
+ metatag necessari.
+
+ Metatag:
+
+ I requisiti per le 'altre' ('*other*') licenze sono identici a quelli per le
+ `Licenze raccomandate`_.
+
+ Esempio del formato del file::
+
+ Valid-License-Identifier: MPL-1.1
+ SPDX-URL: https://spdx.org/licenses/MPL-1.1.html
+ Usage-Guide:
+ Do NOT use. The MPL-1.1 is not GPL2 compatible. It may only be used for
+ dual-licensed files where the other license is GPL2 compatible.
+ If you end up using this it MUST be used together with a GPL2 compatible
+ license using "OR".
+ To use the Mozilla Public License version 1.1 put the following SPDX
+ tag/value pair into a comment according to the placement guidelines in
+ the licensing rules documentation:
+ SPDX-License-Identifier: MPL-1.1
+ License-Text:
+ Full license text
+
+|
+
+4. _`Eccezioni`:
Alcune licenze possono essere corrette con delle eccezioni che forniscono
diritti aggiuntivi. Queste eccezioni sono disponibili nei sorgenti del
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 64b38dfcc243..ba6c42c576dd 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -69,23 +69,6 @@ by and on behalf of the VM's process may not be freed/unaccounted when
the VM is shut down.
-It is important to note that althought VM ioctls may only be issued from
-the process that created the VM, a VM's lifecycle is associated with its
-file descriptor, not its creator (process). In other words, the VM and
-its resources, *including the associated address space*, are not freed
-until the last reference to the VM's file descriptor has been released.
-For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will
-not be freed until both the parent (original) process and its child have
-put their references to the VM's file descriptor.
-
-Because a VM's resources are not freed until the last reference to its
-file descriptor is released, creating additional references to a VM via
-via fork(), dup(), etc... without careful consideration is strongly
-discouraged and may have unwanted side effects, e.g. memory allocated
-by and on behalf of the VM's process may not be freed/unaccounted when
-the VM is shut down.
-
-
3. Extensions
-------------
@@ -347,7 +330,7 @@ They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
The bits in the dirty bitmap are cleared before the ioctl returns, unless
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information,
see the description of the capability.
4.9 KVM_SET_MEMORY_ALIAS
@@ -1117,9 +1100,8 @@ struct kvm_userspace_memory_region {
This ioctl allows the user to create, modify or delete a guest physical
memory slot. Bits 0-15 of "slot" specify the slot id and this value
should be less than the maximum number of user memory slots supported per
-VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
-if this capability is supported by the architecture. Slots may not
-overlap in guest physical address space.
+VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS.
+Slots may not overlap in guest physical address space.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
specifies the address space which is being modified. They must be
@@ -1901,6 +1883,12 @@ Architectures: all
Type: vcpu ioctl
Parameters: struct kvm_one_reg (in)
Returns: 0 on success, negative value on failure
+Errors:
+  ENOENT:   no such register
+  EINVAL:   invalid register ID, or no such register
+  EPERM:    (arm64) register access not allowed before vcpu finalization
+(These error codes are indicative only: do not rely on a specific error
+code being returned in a specific situation.)
struct kvm_one_reg {
__u64 id;
@@ -1985,6 +1973,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_TLB3PS | 32
PPC | KVM_REG_PPC_EPTCFG | 32
PPC | KVM_REG_PPC_ICP_STATE | 64
+ PPC | KVM_REG_PPC_VP_STATE | 128
PPC | KVM_REG_PPC_TB_OFFSET | 64
PPC | KVM_REG_PPC_SPMC1 | 32
PPC | KVM_REG_PPC_SPMC2 | 32
@@ -2137,6 +2126,37 @@ contains elements ranging from 32 to 128 bits. The index is a 32bit
value in the kvm_regs structure seen as a 32bit array.
0x60x0 0000 0010 <index into the kvm_regs struct:16>
+Specifically:
+ Encoding Register Bits kvm_regs member
+----------------------------------------------------------------
+ 0x6030 0000 0010 0000 X0 64 regs.regs[0]
+ 0x6030 0000 0010 0002 X1 64 regs.regs[1]
+ ...
+ 0x6030 0000 0010 003c X30 64 regs.regs[30]
+ 0x6030 0000 0010 003e SP 64 regs.sp
+ 0x6030 0000 0010 0040 PC 64 regs.pc
+ 0x6030 0000 0010 0042 PSTATE 64 regs.pstate
+ 0x6030 0000 0010 0044 SP_EL1 64 sp_el1
+ 0x6030 0000 0010 0046 ELR_EL1 64 elr_el1
+ 0x6030 0000 0010 0048 SPSR_EL1 64 spsr[KVM_SPSR_EL1] (alias SPSR_SVC)
+ 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT]
+ 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND]
+ 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ]
+ 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ]
+ 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] (*)
+ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] (*)
+ ...
+ 0x6040 0000 0010 00d0 V31 128 fp_regs.vregs[31] (*)
+ 0x6020 0000 0010 00d4 FPSR 32 fp_regs.fpsr
+ 0x6020 0000 0010 00d5 FPCR 32 fp_regs.fpcr
+
+(*) These encodings are not accepted for SVE-enabled vcpus. See
+ KVM_ARM_VCPU_INIT.
+
+ The equivalent register content can be accessed via bits [127:0] of
+ the corresponding SVE Zn registers instead for vcpus that have SVE
+ enabled (see below).
+
arm64 CCSIDR registers are demultiplexed by CSSELR value:
0x6020 0000 0011 00 <csselr:8>
@@ -2146,6 +2166,64 @@ arm64 system registers have the following id bit patterns:
arm64 firmware pseudo-registers have the following bit pattern:
0x6030 0000 0014 <regno:16>
+arm64 SVE registers have the following bit patterns:
+ 0x6080 0000 0015 00 <n:5> <slice:5> Zn bits[2048*slice + 2047 : 2048*slice]
+ 0x6050 0000 0015 04 <n:4> <slice:5> Pn bits[256*slice + 255 : 256*slice]
+ 0x6050 0000 0015 060 <slice:5> FFR bits[256*slice + 255 : 256*slice]
+ 0x6060 0000 0015 ffff KVM_REG_ARM64_SVE_VLS pseudo-register
+
+Access to register IDs where 2048 * slice >= 128 * max_vq will fail with
+ENOENT. max_vq is the vcpu's maximum supported vector length in 128-bit
+quadwords: see (**) below.
+
+These registers are only accessible on vcpus for which SVE is enabled.
+See KVM_ARM_VCPU_INIT for details.
+
+In addition, except for KVM_REG_ARM64_SVE_VLS, these registers are not
+accessible until the vcpu's SVE configuration has been finalized
+using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE). See KVM_ARM_VCPU_INIT
+and KVM_ARM_VCPU_FINALIZE for more information about this procedure.
+
+KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector
+lengths supported by the vcpu to be discovered and configured by
+userspace. When transferred to or from user memory via KVM_GET_ONE_REG
+or KVM_SET_ONE_REG, the value of this register is of type
+__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as
+follows:
+
+__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
+
+if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
+ ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
+ ((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1))
+ /* Vector length vq * 16 bytes supported */
+else
+ /* Vector length vq * 16 bytes not supported */
+
+(**) The maximum value vq for which the above condition is true is
+max_vq. This is the maximum vector length available to the guest on
+this vcpu, and determines which register slices are visible through
+this ioctl interface.
+
+(See Documentation/arm64/sve.txt for an explanation of the "vq"
+nomenclature.)
+
+KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
+KVM_ARM_VCPU_INIT initialises it to the best set of vector lengths that
+the host supports.
+
+Userspace may subsequently modify it if desired until the vcpu's SVE
+configuration is finalized using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).
+
+Apart from simply removing all vector lengths from the host set that
+exceed some value, support for arbitrarily chosen sets of vector lengths
+is hardware-dependent and may not be available. Attempting to configure
+an invalid set of vector lengths via KVM_SET_ONE_REG will fail with
+EINVAL.
+
+After the vcpu's SVE configuration is finalized, further attempts to
+write this register will fail with EPERM.
+
MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
the register group type:
@@ -2198,6 +2276,12 @@ Architectures: all
Type: vcpu ioctl
Parameters: struct kvm_one_reg (in and out)
Returns: 0 on success, negative value on failure
+Errors include:
+  ENOENT:   no such register
+  EINVAL:   invalid register ID, or no such register
+  EPERM:    (arm64) register access not allowed before vcpu finalization
+(These error codes are indicative only: do not rely on a specific error
+code being returned in a specific situation.)
This ioctl allows to receive the value of a single register implemented
in a vcpu. The register to read is indicated by the "id" field of the
@@ -2690,6 +2774,49 @@ Possible features:
- KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
Depends on KVM_CAP_ARM_PMU_V3.
+ - KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication
+ for arm64 only.
+ Depends on KVM_CAP_ARM_PTRAUTH_ADDRESS.
+ If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
+ both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
+ KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
+ requested.
+
+ - KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication
+ for arm64 only.
+ Depends on KVM_CAP_ARM_PTRAUTH_GENERIC.
+ If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
+ both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
+ KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
+ requested.
+
+ - KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only).
+ Depends on KVM_CAP_ARM_SVE.
+ Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+ * After KVM_ARM_VCPU_INIT:
+
+ - KVM_REG_ARM64_SVE_VLS may be read using KVM_GET_ONE_REG: the
+ initial value of this pseudo-register indicates the best set of
+ vector lengths possible for a vcpu on this host.
+
+ * Before KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+ - KVM_RUN and KVM_GET_REG_LIST are not available;
+
+ - KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access
+ the scalable archietctural SVE registers
+ KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or
+ KVM_REG_ARM64_SVE_FFR;
+
+ - KVM_REG_ARM64_SVE_VLS may optionally be written using
+ KVM_SET_ONE_REG, to modify the set of vector lengths available
+ for the vcpu.
+
+ * After KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+ - the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can
+ no longer be written using KVM_SET_ONE_REG.
4.83 KVM_ARM_PREFERRED_TARGET
@@ -3809,7 +3936,7 @@ to I/O ports.
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
-Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
Architectures: x86, arm, arm64, mips
Type: vm ioctl
Parameters: struct kvm_dirty_log (in)
@@ -3842,10 +3969,10 @@ the address space for which you want to return the dirty bitmap.
They must be less than the value that KVM_CHECK_EXTENSION returns for
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
-This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
is enabled; for more information, see the description of the capability.
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
-that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
+that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
4.118 KVM_GET_SUPPORTED_HV_CPUID
@@ -3904,6 +4031,40 @@ number of valid entries in the 'entries' array, which is then filled.
'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
userspace should not expect to get any particular value there.
+4.119 KVM_ARM_VCPU_FINALIZE
+
+Architectures: arm, arm64
+Type: vcpu ioctl
+Parameters: int feature (in)
+Returns: 0 on success, -1 on error
+Errors:
+ EPERM: feature not enabled, needs configuration, or already finalized
+ EINVAL: feature unknown or not present
+
+Recognised values for feature:
+ arm64 KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE)
+
+Finalizes the configuration of the specified vcpu feature.
+
+The vcpu must already have been initialised, enabling the affected feature, by
+means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set in
+features[].
+
+For affected vcpu features, this is a mandatory step that must be performed
+before the vcpu is fully usable.
+
+Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be
+configured by use of ioctls such as KVM_SET_ONE_REG. The exact configuration
+that should be performaned and how to do it are feature-dependent.
+
+Other calls that depend on a particular feature being finalized, such as
+KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with
+-EPERM unless the feature has already been finalized by means of a
+KVM_ARM_VCPU_FINALIZE call.
+
+See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
+using this ioctl.
+
5. The kvm_run structure
------------------------
@@ -4505,6 +4666,15 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
+6.75 KVM_CAP_PPC_IRQ_XIVE
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XIVE device fd
+ args[1] is the XIVE CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XIVE device.
+
7. Capabilities that can be enabled on VMs
------------------------------------------
@@ -4798,7 +4968,7 @@ and injected exceptions.
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
will clear DR6.RTM.
-7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
+7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
Architectures: x86, arm, arm64, mips
Parameters: args[0] whether feature should be enabled or not
@@ -4821,6 +4991,11 @@ while userspace can see false reports of dirty pages. Manual reprotection
helps reducing this time, improving guest performance and reducing the
number of dirty log false positives.
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
+it hard or impossible to use it correctly. The availability of
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed.
+Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
8. Other capabilities.
----------------------
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 95ca68d663a4..4ffb82b02468 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -141,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc {
u8 pcc[16]; # valid with Message-Security-Assist-Extension 4
u8 ppno[16]; # valid with Message-Security-Assist-Extension 5
u8 kma[16]; # valid with Message-Security-Assist-Extension 8
- u8 reserved[1808]; # reserved for future instructions
+ u8 kdsa[16]; # valid with Message-Security-Assist-Extension 9
+ u8 reserved[1792]; # reserved for future instructions
};
Parameters: address of a buffer to load the subfunction blocks from.
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
new file mode 100644
index 000000000000..9a24a4525253
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xive.txt
@@ -0,0 +1,197 @@
+POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
+==========================================================
+
+Device types supported:
+ KVM_DEV_TYPE_XIVE POWER9 XIVE Interrupt Controller generation 1
+
+This device acts as a VM interrupt controller. It provides the KVM
+interface to configure the interrupt sources of a VM in the underlying
+POWER9 XIVE interrupt controller.
+
+Only one XIVE instance may be instantiated. A guest XIVE device
+requires a POWER9 host and the guest OS should have support for the
+XIVE native exploitation interrupt mode. If not, it should run using
+the legacy interrupt mode, referred as XICS (POWER7/8).
+
+* Device Mappings
+
+ The KVM device exposes different MMIO ranges of the XIVE HW which
+ are required for interrupt management. These are exposed to the
+ guest in VMAs populated with a custom VM fault handler.
+
+ 1. Thread Interrupt Management Area (TIMA)
+
+ Each thread has an associated Thread Interrupt Management context
+ composed of a set of registers. These registers let the thread
+ handle priority management and interrupt acknowledgment. The most
+ important are :
+
+ - Interrupt Pending Buffer (IPB)
+ - Current Processor Priority (CPPR)
+ - Notification Source Register (NSR)
+
+ They are exposed to software in four different pages each proposing
+ a view with a different privilege. The first page is for the
+ physical thread context and the second for the hypervisor. Only the
+ third (operating system) and the fourth (user level) are exposed the
+ guest.
+
+ 2. Event State Buffer (ESB)
+
+ Each source is associated with an Event State Buffer (ESB) with
+ either a pair of even/odd pair of pages which provides commands to
+ manage the source: to trigger, to EOI, to turn off the source for
+ instance.
+
+ 3. Device pass-through
+
+ When a device is passed-through into the guest, the source
+ interrupts are from a different HW controller (PHB4) and the ESB
+ pages exposed to the guest should accommadate this change.
+
+ The passthru_irq helpers, kvmppc_xive_set_mapped() and
+ kvmppc_xive_clr_mapped() are called when the device HW irqs are
+ mapped into or unmapped from the guest IRQ number space. The KVM
+ device extends these helpers to clear the ESB pages of the guest IRQ
+ number being mapped and then lets the VM fault handler repopulate.
+ The handler will insert the ESB page corresponding to the HW
+ interrupt of the device being passed-through or the initial IPI ESB
+ page if the device has being removed.
+
+ The ESB remapping is fully transparent to the guest and the OS
+ device driver. All handling is done within VFIO and the above
+ helpers in KVM-PPC.
+
+* Groups:
+
+ 1. KVM_DEV_XIVE_GRP_CTRL
+ Provides global controls on the device
+ Attributes:
+ 1.1 KVM_DEV_XIVE_RESET (write only)
+ Resets the interrupt controller configuration for sources and event
+ queues. To be used by kexec and kdump.
+ Errors: none
+
+ 1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
+ Sync all the sources and queues and mark the EQ pages dirty. This
+ to make sure that a consistent memory state is captured when
+ migrating the VM.
+ Errors: none
+
+ 2. KVM_DEV_XIVE_GRP_SOURCE (write only)
+ Initializes a new source in the XIVE device and mask it.
+ Attributes:
+ Interrupt source number (64-bit)
+ The kvm_device_attr.addr points to a __u64 value:
+ bits: | 63 .... 2 | 1 | 0
+ values: | unused | level | type
+ - type: 0:MSI 1:LSI
+ - level: assertion level in case of an LSI.
+ Errors:
+ -E2BIG: Interrupt source number is out of range
+ -ENOMEM: Could not create a new source block
+ -EFAULT: Invalid user pointer for attr->addr.
+ -ENXIO: Could not allocate underlying HW interrupt
+
+ 3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
+ Configures source targeting
+ Attributes:
+ Interrupt source number (64-bit)
+ The kvm_device_attr.addr points to a __u64 value:
+ bits: | 63 .... 33 | 32 | 31 .. 3 | 2 .. 0
+ values: | eisn | mask | server | priority
+ - priority: 0-7 interrupt priority level
+ - server: CPU number chosen to handle the interrupt
+ - mask: mask flag (unused)
+ - eisn: Effective Interrupt Source Number
+ Errors:
+ -ENOENT: Unknown source number
+ -EINVAL: Not initialized source number
+ -EINVAL: Invalid priority
+ -EINVAL: Invalid CPU number.
+ -EFAULT: Invalid user pointer for attr->addr.
+ -ENXIO: CPU event queues not configured or configuration of the
+ underlying HW interrupt failed
+ -EBUSY: No CPU available to serve interrupt
+
+ 4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
+ Configures an event queue of a CPU
+ Attributes:
+ EQ descriptor identifier (64-bit)
+ The EQ descriptor identifier is a tuple (server, priority) :
+ bits: | 63 .... 32 | 31 .. 3 | 2 .. 0
+ values: | unused | server | priority
+ The kvm_device_attr.addr points to :
+ struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+ };
+ - flags: queue flags
+ KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
+ forces notification without using the coalescing mechanism
+ provided by the XIVE END ESBs.
+ - qshift: queue size (power of 2)
+ - qaddr: real address of queue
+ - qtoggle: current queue toggle bit
+ - qindex: current queue index
+ - pad: reserved for future use
+ Errors:
+ -ENOENT: Invalid CPU number
+ -EINVAL: Invalid priority
+ -EINVAL: Invalid flags
+ -EINVAL: Invalid queue size
+ -EINVAL: Invalid queue address
+ -EFAULT: Invalid user pointer for attr->addr.
+ -EIO: Configuration of the underlying HW failed
+
+ 5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
+ Synchronize the source to flush event notifications
+ Attributes:
+ Interrupt source number (64-bit)
+ Errors:
+ -ENOENT: Unknown source number
+ -EINVAL: Not initialized source number
+
+* VCPU state
+
+ The XIVE IC maintains VP interrupt state in an internal structure
+ called the NVT. When a VP is not dispatched on a HW processor
+ thread, this structure can be updated by HW if the VP is the target
+ of an event notification.
+
+ It is important for migration to capture the cached IPB from the NVT
+ as it synthesizes the priorities of the pending interrupts. We
+ capture a bit more to report debug information.
+
+ KVM_REG_PPC_VP_STATE (2 * 64bits)
+ bits: | 63 .... 32 | 31 .... 0 |
+ values: | TIMA word0 | TIMA word1 |
+ bits: | 127 .......... 64 |
+ values: | unused |
+
+* Migration:
+
+ Saving the state of a VM using the XIVE native exploitation mode
+ should follow a specific sequence. When the VM is stopped :
+
+ 1. Mask all sources (PQ=01) to stop the flow of events.
+
+ 2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
+ flush any in-flight event notification and to stabilize the EQs. At
+ this stage, the EQ pages are marked dirty to make sure they are
+ transferred in the migration sequence.
+
+ 3. Capture the state of the source targeting, the EQs configuration
+ and the state of thread interrupt context registers.
+
+ Restore is similar :
+
+ 1. Restore the EQ configuration. As targeting depends on it.
+ 2. Restore targeting
+ 3. Restore the thread interrupt contexts
+ 4. Restore the source states
+ 5. Let the vCPU run
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 44205f0b671f..ec1efa32af3c 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -189,20 +189,10 @@ the driver callback returns.
When the device driver wants to populate a range of virtual addresses, it can
use either::
- int hmm_vma_get_pfns(struct vm_area_struct *vma,
- struct hmm_range *range,
- unsigned long start,
- unsigned long end,
- hmm_pfn_t *pfns);
- int hmm_vma_fault(struct vm_area_struct *vma,
- struct hmm_range *range,
- unsigned long start,
- unsigned long end,
- hmm_pfn_t *pfns,
- bool write,
- bool block);
-
-The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
+ long hmm_range_snapshot(struct hmm_range *range);
+ long hmm_range_fault(struct hmm_range *range, bool block);
+
+The first one (hmm_range_snapshot()) will only fetch present CPU page table
entries and will not trigger a page fault on missing or non-present entries.
The second one does trigger a page fault on missing or read-only entry if the
write parameter is true. Page faults use the generic mm page fault code path
@@ -220,25 +210,56 @@ respect in order to keep things properly synchronized. The usage pattern is::
{
struct hmm_range range;
...
+
+ range.start = ...;
+ range.end = ...;
+ range.pfns = ...;
+ range.flags = ...;
+ range.values = ...;
+ range.pfn_shift = ...;
+ hmm_range_register(&range);
+
+ /*
+ * Just wait for range to be valid, safe to ignore return value as we
+ * will use the return value of hmm_range_snapshot() below under the
+ * mmap_sem to ascertain the validity of the range.
+ */
+ hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
+
again:
- ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
- if (ret)
+ down_read(&mm->mmap_sem);
+ ret = hmm_range_snapshot(&range);
+ if (ret) {
+ up_read(&mm->mmap_sem);
+ if (ret == -EAGAIN) {
+ /*
+ * No need to check hmm_range_wait_until_valid() return value
+ * on retry we will get proper error with hmm_range_snapshot()
+ */
+ hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
+ goto again;
+ }
+ hmm_mirror_unregister(&range);
return ret;
+ }
take_lock(driver->update);
- if (!hmm_vma_range_done(vma, &range)) {
+ if (!range.valid) {
release_lock(driver->update);
+ up_read(&mm->mmap_sem);
goto again;
}
// Use pfns array content to update device page table
+ hmm_mirror_unregister(&range);
release_lock(driver->update);
+ up_read(&mm->mmap_sem);
return 0;
}
The driver->update lock is the same lock that the driver takes inside its
-update() callback. That lock must be held before hmm_vma_range_done() to avoid
-any race with a concurrent CPU page table update.
+update() callback. That lock must be held before checking the range.valid
+field to avoid any race with a concurrent CPU page table update.
HMM implements all this on top of the mmu_notifier API because we wanted a
simpler API and also to be able to perform optimizations latter on like doing
@@ -255,6 +276,41 @@ report commands as executed is serialized (there is no point in doing this
concurrently).
+Leverage default_flags and pfn_flags_mask
+=========================================
+
+The hmm_range struct has 2 fields default_flags and pfn_flags_mask that allows
+to set fault or snapshot policy for a whole range instead of having to set them
+for each entries in the range.
+
+For instance if the device flags for device entries are:
+ VALID (1 << 63)
+ WRITE (1 << 62)
+
+Now let say that device driver wants to fault with at least read a range then
+it does set:
+ range->default_flags = (1 << 63)
+ range->pfn_flags_mask = 0;
+
+and calls hmm_range_fault() as described above. This will fill fault all page
+in the range with at least read permission.
+
+Now let say driver wants to do the same except for one page in the range for
+which its want to have write. Now driver set:
+ range->default_flags = (1 << 63);
+ range->pfn_flags_mask = (1 << 62);
+ range->pfns[index_of_write] = (1 << 62);
+
+With this HMM will fault in all page with at least read (ie valid) and for the
+address == range->start + (index_of_write << PAGE_SHIFT) it will fault with
+write permission ie if the CPU pte does not have write permission set then HMM
+will call handle_mm_fault().
+
+Note that HMM will populate the pfns array with write permission for any entry
+that have write permission within the CPU pte no matter what are the values set
+in default_flags or pfn_flags_mask.
+
+
Represent and manage device memory from core kernel point of view
=================================================================
diff --git a/Documentation/x86/amd-memory-encryption.txt b/Documentation/x86/amd-memory-encryption.rst
index afc41f544dab..c48d452d0718 100644
--- a/Documentation/x86/amd-memory-encryption.txt
+++ b/Documentation/x86/amd-memory-encryption.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+AMD Memory Encryption
+=====================
+
Secure Memory Encryption (SME) and Secure Encrypted Virtualization (SEV) are
features found on AMD processors.
@@ -34,7 +40,7 @@ is operating in 64-bit or 32-bit PAE mode, in all other modes the SEV hardware
forces the memory encryption bit to 1.
Support for SME and SEV can be determined through the CPUID instruction. The
-CPUID function 0x8000001f reports information related to SME:
+CPUID function 0x8000001f reports information related to SME::
0x8000001f[eax]:
Bit[0] indicates support for SME
@@ -48,14 +54,14 @@ CPUID function 0x8000001f reports information related to SME:
addresses)
If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
-determine if SME is enabled and/or to enable memory encryption:
+determine if SME is enabled and/or to enable memory encryption::
0xc0010010:
Bit[23] 0 = memory encryption features are disabled
1 = memory encryption features are enabled
If SEV is supported, MSR 0xc0010131 (MSR_AMD64_SEV) can be used to determine if
-SEV is active:
+SEV is active::
0xc0010131:
Bit[0] 0 = memory encryption is not active
@@ -68,6 +74,7 @@ requirements for the system. If this bit is not set upon Linux startup then
Linux itself will not set it and memory encryption will not be possible.
The state of SME in the Linux kernel can be documented as follows:
+
- Supported:
The CPU supports SME (determined through CPUID instruction).
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.rst
index 223e484a1304..08a2f100c0e6 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.rst
@@ -1,5 +1,8 @@
- THE LINUX/x86 BOOT PROTOCOL
- ---------------------------
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Linux/x86 Boot Protocol
+===========================
On the x86 platform, the Linux kernel uses a rather complicated boot
convention. This has evolved partially due to historical aspects, as
@@ -10,88 +13,91 @@ real-mode DOS as a mainstream operating system.
Currently, the following versions of the Linux/x86 boot protocol exist.
-Old kernels: zImage/Image support only. Some very early kernels
+============= ============================================================
+Old kernels zImage/Image support only. Some very early kernels
may not even support a command line.
-Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as
+Protocol 2.00 (Kernel 1.3.73) Added bzImage and initrd support, as
well as a formalized way to communicate between the
boot loader and the kernel. setup.S made relocatable,
although the traditional setup area still assumed
writable.
-Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning.
+Protocol 2.01 (Kernel 1.3.76) Added a heap overrun warning.
-Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol.
+Protocol 2.02 (Kernel 2.4.0-test3-pre3) New command line protocol.
Lower the conventional memory ceiling. No overwrite
of the traditional setup area, thus making booting
safe for systems which use the EBDA from SMM or 32-bit
BIOS entry points. zImage deprecated but still
supported.
-Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
+Protocol 2.03 (Kernel 2.4.18-pre1) Explicitly makes the highest possible
initrd address available to the bootloader.
-Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
+Protocol 2.04 (Kernel 2.6.14) Extend the syssize field to four bytes.
-Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
+Protocol 2.05 (Kernel 2.6.20) Make protected mode kernel relocatable.
Introduce relocatable_kernel and kernel_alignment fields.
-Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
+Protocol 2.06 (Kernel 2.6.22) Added a field that contains the size of
the boot command line.
-Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol.
+Protocol 2.07 (Kernel 2.6.24) Added paravirtualised boot protocol.
Introduced hardware_subarch and hardware_subarch_data
and KEEP_SEGMENTS flag in load_flags.
-Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
+Protocol 2.08 (Kernel 2.6.26) Added crc32 checksum and ELF format
payload. Introduced payload_offset and payload_length
fields to aid in locating the payload.
-Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
+Protocol 2.09 (Kernel 2.6.26) Added a field of 64-bit physical
pointer to single linked list of struct setup_data.
-Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
+Protocol 2.10 (Kernel 2.6.31) Added a protocol for relaxed alignment
beyond the kernel_alignment added, new init_size and
pref_address fields. Added extended boot loader IDs.
-Protocol 2.11: (Kernel 3.6) Added a field for offset of EFI handover
+Protocol 2.11 (Kernel 3.6) Added a field for offset of EFI handover
protocol entry point.
-Protocol 2.12: (Kernel 3.8) Added the xloadflags field and extension fields
+Protocol 2.12 (Kernel 3.8) Added the xloadflags field and extension fields
to struct boot_params for loading bzImage and ramdisk
above 4G in 64bit.
-Protocol 2.13: (Kernel 3.14) Support 32- and 64-bit flags being set in
+Protocol 2.13 (Kernel 3.14) Support 32- and 64-bit flags being set in
xloadflags to support booting a 64-bit kernel from 32-bit
EFI
+============= ============================================================
-**** MEMORY LAYOUT
-The traditional memory map for the kernel loader, used for Image or
-zImage kernels, typically looks like:
-
- | |
-0A0000 +------------------------+
- | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
-09A000 +------------------------+
- | Command line |
- | Stack/heap | For use by the kernel real-mode code.
-098000 +------------------------+
- | Kernel setup | The kernel real-mode code.
-090200 +------------------------+
- | Kernel boot sector | The kernel legacy boot sector.
-090000 +------------------------+
- | Protected-mode kernel | The bulk of the kernel image.
-010000 +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
-001000 +------------------------+
- | Reserved for MBR/BIOS |
-000800 +------------------------+
- | Typically used by MBR |
-000600 +------------------------+
- | BIOS use only |
-000000 +------------------------+
+Memory Layout
+=============
+The traditional memory map for the kernel loader, used for Image or
+zImage kernels, typically looks like::
+
+ | |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+ 09A000 +------------------------+
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
+ 098000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ 090200 +------------------------+
+ | Kernel boot sector | The kernel legacy boot sector.
+ 090000 +------------------------+
+ | Protected-mode kernel | The bulk of the kernel image.
+ 010000 +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
When using bzImage, the protected-mode kernel was relocated to
0x100000 ("high memory"), and the kernel real-mode block (boot sector,
@@ -116,36 +122,36 @@ zImage or old bzImage kernels, which need data written into the
above the 0x9A000 point; too many BIOSes will break above that point.
For a modern bzImage kernel with boot protocol version >= 2.02, a
-memory layout like the following is suggested:
-
- ~ ~
- | Protected-mode kernel |
-100000 +------------------------+
- | I/O memory hole |
-0A0000 +------------------------+
- | Reserved for BIOS | Leave as much as possible unused
- ~ ~
- | Command line | (Can also be below the X+10000 mark)
-X+10000 +------------------------+
- | Stack/heap | For use by the kernel real-mode code.
-X+08000 +------------------------+
- | Kernel setup | The kernel real-mode code.
- | Kernel boot sector | The kernel legacy boot sector.
-X +------------------------+
- | Boot loader | <- Boot sector entry point 0000:7C00
-001000 +------------------------+
- | Reserved for MBR/BIOS |
-000800 +------------------------+
- | Typically used by MBR |
-000600 +------------------------+
- | BIOS use only |
-000000 +------------------------+
-
-... where the address X is as low as the design of the boot loader
-permits.
-
-
-**** THE REAL-MODE KERNEL HEADER
+memory layout like the following is suggested::
+
+ ~ ~
+ | Protected-mode kernel |
+ 100000 +------------------------+
+ | I/O memory hole |
+ 0A0000 +------------------------+
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
+ X+10000 +------------------------+
+ | Stack/heap | For use by the kernel real-mode code.
+ X+08000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
+ X +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+ 001000 +------------------------+
+ | Reserved for MBR/BIOS |
+ 000800 +------------------------+
+ | Typically used by MBR |
+ 000600 +------------------------+
+ | BIOS use only |
+ 000000 +------------------------+
+
+ ... where the address X is as low as the design of the boot loader permits.
+
+
+The Real-Mode Kernel Header
+===========================
In the following text, and anywhere in the kernel boot sequence, "a
sector" refers to 512 bytes. It is independent of the actual sector
@@ -159,61 +165,63 @@ sectors (1K) and then examine the bootup sector size.
The header looks like:
-Offset Proto Name Meaning
-/Size
-
-01F1/1 ALL(1 setup_sects The size of the setup in sectors
-01F2/2 ALL root_flags If set, the root is mounted readonly
-01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras
-01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only
-01FA/2 ALL vid_mode Video mode control
-01FC/2 ALL root_dev Default root device number
-01FE/2 ALL boot_flag 0xAA55 magic number
-0200/2 2.00+ jump Jump instruction
-0202/4 2.00+ header Magic signature "HdrS"
-0206/2 2.00+ version Boot protocol version supported
-0208/4 2.00+ realmode_swtch Boot loader hook (see below)
-020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
-020E/2 2.00+ kernel_version Pointer to kernel version string
-0210/1 2.00+ type_of_loader Boot loader identifier
-0211/1 2.00+ loadflags Boot protocol option flags
-0212/2 2.00+ setup_move_size Move to high memory size (used with hooks)
-0214/4 2.00+ code32_start Boot loader hook (see below)
-0218/4 2.00+ ramdisk_image initrd load address (set by boot loader)
-021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
-0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
-0224/2 2.01+ heap_end_ptr Free memory after setup end
-0226/1 2.02+(3 ext_loader_ver Extended boot loader version
-0227/1 2.02+(3 ext_loader_type Extended boot loader ID
-0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
-022C/4 2.03+ initrd_addr_max Highest legal initrd address
-0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
-0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
-0235/1 2.10+ min_alignment Minimum alignment, as a power of two
-0236/2 2.12+ xloadflags Boot protocol option flags
-0238/4 2.06+ cmdline_size Maximum size of the kernel command line
-023C/4 2.07+ hardware_subarch Hardware subarchitecture
-0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
-0248/4 2.08+ payload_offset Offset of kernel payload
-024C/4 2.08+ payload_length Length of kernel payload
-0250/8 2.09+ setup_data 64-bit physical pointer to linked list
- of struct setup_data
-0258/8 2.10+ pref_address Preferred loading address
-0260/4 2.10+ init_size Linear memory required during initialization
-0264/4 2.11+ handover_offset Offset of handover entry point
-
-(1) For backwards compatibility, if the setup_sects field contains 0, the
- real value is 4.
-
-(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
- field are unusable, which means the size of a bzImage kernel
- cannot be determined.
-
-(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+=========== ======== ===================== ============================================
+Offset/Size Proto Name Meaning
+=========== ======== ===================== ============================================
+01F1/1 ALL(1) setup_sects The size of the setup in sectors
+01F2/2 ALL root_flags If set, the root is mounted readonly
+01F4/4 2.04+(2) syssize The size of the 32-bit code in 16-byte paras
+01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only
+01FA/2 ALL vid_mode Video mode control
+01FC/2 ALL root_dev Default root device number
+01FE/2 ALL boot_flag 0xAA55 magic number
+0200/2 2.00+ jump Jump instruction
+0202/4 2.00+ header Magic signature "HdrS"
+0206/2 2.00+ version Boot protocol version supported
+0208/4 2.00+ realmode_swtch Boot loader hook (see below)
+020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
+020E/2 2.00+ kernel_version Pointer to kernel version string
+0210/1 2.00+ type_of_loader Boot loader identifier
+0211/1 2.00+ loadflags Boot protocol option flags
+0212/2 2.00+ setup_move_size Move to high memory size (used with hooks)
+0214/4 2.00+ code32_start Boot loader hook (see below)
+0218/4 2.00+ ramdisk_image initrd load address (set by boot loader)
+021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
+0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
+0224/2 2.01+ heap_end_ptr Free memory after setup end
+0226/1 2.02+(3) ext_loader_ver Extended boot loader version
+0227/1 2.02+(3) ext_loader_type Extended boot loader ID
+0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
+022C/4 2.03+ initrd_addr_max Highest legal initrd address
+0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
+0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
+0235/1 2.10+ min_alignment Minimum alignment, as a power of two
+0236/2 2.12+ xloadflags Boot protocol option flags
+0238/4 2.06+ cmdline_size Maximum size of the kernel command line
+023C/4 2.07+ hardware_subarch Hardware subarchitecture
+0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
+0248/4 2.08+ payload_offset Offset of kernel payload
+024C/4 2.08+ payload_length Length of kernel payload
+0250/8 2.09+ setup_data 64-bit physical pointer to linked list
+ of struct setup_data
+0258/8 2.10+ pref_address Preferred loading address
+0260/4 2.10+ init_size Linear memory required during initialization
+0264/4 2.11+ handover_offset Offset of handover entry point
+=========== ======== ===================== ============================================
+
+.. note::
+ (1) For backwards compatibility, if the setup_sects field contains 0, the
+ real value is 4.
+
+ (2) For boot protocol prior to 2.04, the upper two bytes of the syssize
+ field are unusable, which means the size of a bzImage kernel
+ cannot be determined.
+
+ (3) Ignored, but safe to set, for boot protocols 2.02-2.09.
If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
the boot protocol version is "old". Loading an old kernel, the
-following parameters should be assumed:
+following parameters should be assumed::
Image type = zImage
initrd not supported
@@ -225,7 +233,8 @@ setting fields in the header, you must make sure only to set fields
supported by the protocol version in use.
-**** DETAILS OF HEADER FIELDS
+Details of Harder Fileds
+========================
For each field, some are information from the kernel to the bootloader
("read"), some are expected to be filled out by the bootloader
@@ -239,106 +248,132 @@ boot loaders can ignore those fields.
The byte order of all fields is littleendian (this is x86, after all.)
+============ ===========
Field name: setup_sects
Type: read
Offset/size: 0x1f1/1
Protocol: ALL
+============ ===========
The size of the setup code in 512-byte sectors. If this field is
0, the real value is 4. The real-mode code consists of the boot
sector (always one 512-byte sector) plus the setup code.
-Field name: root_flags
-Type: modify (optional)
-Offset/size: 0x1f2/2
-Protocol: ALL
+============ =================
+Field name: root_flags
+Type: modify (optional)
+Offset/size: 0x1f2/2
+Protocol: ALL
+============ =================
If this field is nonzero, the root defaults to readonly. The use of
this field is deprecated; use the "ro" or "rw" options on the
command line instead.
+============ ===============================================
Field name: syssize
Type: read
Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL)
Protocol: 2.04+
+============ ===============================================
The size of the protected-mode code in units of 16-byte paragraphs.
For protocol versions older than 2.04 this field is only two bytes
wide, and therefore cannot be trusted for the size of a kernel if
the LOAD_HIGH flag is set.
+============ ===============
Field name: ram_size
Type: kernel internal
Offset/size: 0x1f8/2
Protocol: ALL
+============ ===============
This field is obsolete.
+============ ===================
Field name: vid_mode
Type: modify (obligatory)
Offset/size: 0x1fa/2
+============ ===================
Please see the section on SPECIAL COMMAND LINE OPTIONS.
+============ =================
Field name: root_dev
Type: modify (optional)
Offset/size: 0x1fc/2
Protocol: ALL
+============ =================
The default root device device number. The use of this field is
deprecated, use the "root=" option on the command line instead.
+============ =========
Field name: boot_flag
Type: read
Offset/size: 0x1fe/2
Protocol: ALL
+============ =========
Contains 0xAA55. This is the closest thing old Linux kernels have
to a magic number.
+============ =======
Field name: jump
Type: read
Offset/size: 0x200/2
Protocol: 2.00+
+============ =======
Contains an x86 jump instruction, 0xEB followed by a signed offset
relative to byte 0x202. This can be used to determine the size of
the header.
+============ =======
Field name: header
Type: read
Offset/size: 0x202/4
Protocol: 2.00+
+============ =======
Contains the magic number "HdrS" (0x53726448).
+============ =======
Field name: version
Type: read
Offset/size: 0x206/2
Protocol: 2.00+
+============ =======
Contains the boot protocol version, in (major << 8)+minor format,
e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
10.17.
+============ =================
Field name: realmode_swtch
Type: modify (optional)
Offset/size: 0x208/4
Protocol: 2.00+
+============ =================
Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+============ =============
Field name: start_sys_seg
Type: read
Offset/size: 0x20c/2
Protocol: 2.00+
+============ =============
The load low segment (0x1000). Obsolete.
+============ ==============
Field name: kernel_version
Type: read
Offset/size: 0x20e/2
Protocol: 2.00+
+============ ==============
If set to a nonzero value, contains a pointer to a NUL-terminated
human-readable kernel version number string, less 0x200. This can
@@ -348,17 +383,19 @@ Protocol: 2.00+
For example, if this value is set to 0x1c00, the kernel version
number string can be found at offset 0x1e00 in the kernel file.
This is a valid value if and only if the "setup_sects" field
- contains the value 15 or higher, as:
+ contains the value 15 or higher, as::
0x1c00 < 15*0x200 (= 0x1e00) but
0x1c00 >= 14*0x200 (= 0x1c00)
- 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15.
+ 0x1c00 >> 9 = 14, So the minimum value for setup_secs is 15.
+============ ==================
Field name: type_of_loader
Type: write (obligatory)
Offset/size: 0x210/1
Protocol: 2.00+
+============ ==================
If your boot loader has an assigned id (see table below), enter
0xTV here, where T is an identifier for the boot loader and V is
@@ -369,17 +406,20 @@ Protocol: 2.00+
Similarly, the ext_loader_ver field can be used to provide more than
four bits for the bootloader version.
- For example, for T = 0x15, V = 0x234, write:
+ For example, for T = 0x15, V = 0x234, write::
- type_of_loader <- 0xE4
- ext_loader_type <- 0x05
- ext_loader_ver <- 0x23
+ type_of_loader <- 0xE4
+ ext_loader_type <- 0x05
+ ext_loader_ver <- 0x23
Assigned boot loader ids (hexadecimal):
- 0 LILO (0x00 reserved for pre-2.00 bootloader)
+ == =======================================
+ 0 LILO
+ (0x00 reserved for pre-2.00 bootloader)
1 Loadlin
- 2 bootsect-loader (0x20, all other values reserved)
+ 2 bootsect-loader
+ (0x20, all other values reserved)
3 Syslinux
4 Etherboot/gPXE/iPXE
5 ELILO
@@ -390,55 +430,70 @@ Protocol: 2.00+
B Qemu
C Arcturus Networks uCbootloader
D kexec-tools
- E Extended (see ext_loader_type)
- F Special (0xFF = undefined)
- 10 Reserved
- 11 Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de>
- 12 OVMF UEFI virtualization stack
+ E Extended (see ext_loader_type)
+ F Special (0xFF = undefined)
+ 10 Reserved
+ 11 Minimal Linux Bootloader
+ <http://sebastian-plotz.blogspot.de>
+ 12 OVMF UEFI virtualization stack
+ == =======================================
- Please contact <hpa@zytor.com> if you need a bootloader ID
- value assigned.
+ Please contact <hpa@zytor.com> if you need a bootloader ID value assigned.
+============ ===================
Field name: loadflags
Type: modify (obligatory)
Offset/size: 0x211/1
Protocol: 2.00+
+============ ===================
This field is a bitmask.
Bit 0 (read): LOADED_HIGH
+
- If 0, the protected-mode code is loaded at 0x10000.
- If 1, the protected-mode code is loaded at 0x100000.
Bit 1 (kernel internal): KASLR_FLAG
+
- Used internally by the compressed kernel to communicate
KASLR status to kernel proper.
- If 1, KASLR enabled.
- If 0, KASLR disabled.
+
+ - If 1, KASLR enabled.
+ - If 0, KASLR disabled.
Bit 5 (write): QUIET_FLAG
+
- If 0, print early messages.
- If 1, suppress early messages.
+
This requests to the kernel (decompressor and early
kernel) to not write early messages that require
accessing the display hardware directly.
Bit 6 (write): KEEP_SEGMENTS
+
Protocol: 2.07+
+
- If 0, reload the segment registers in the 32bit entry point.
- If 1, do not reload the segment registers in the 32bit entry point.
+
Assume that %cs %ds %ss %es are all set to flat segments with
a base of 0 (or the equivalent for their environment).
Bit 7 (write): CAN_USE_HEAP
+
Set this bit to 1 to indicate that the value entered in the
heap_end_ptr is valid. If this field is clear, some setup code
functionality will be disabled.
+
+============ ===================
Field name: setup_move_size
Type: modify (obligatory)
Offset/size: 0x212/2
Protocol: 2.00-2.01
+============ ===================
When using protocol 2.00 or 2.01, if the real mode kernel is not
loaded at 0x90000, it gets moved there later in the loading
@@ -447,14 +502,16 @@ Protocol: 2.00-2.01
itself.
The unit is bytes starting with the beginning of the boot sector.
-
+
This field is can be ignored when the protocol is 2.02 or higher, or
if the real-mode code is loaded at 0x90000.
+============ ========================
Field name: code32_start
Type: modify (optional, reloc)
Offset/size: 0x214/4
Protocol: 2.00+
+============ ========================
The address to jump to in protected mode. This defaults to the load
address of the kernel, and can be used by the boot loader to
@@ -462,47 +519,57 @@ Protocol: 2.00+
This field can be modified for two purposes:
- 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+ 1. as a boot loader hook (see Advanced Boot Loader Hooks below.)
- 2. if a bootloader which does not install a hook loads a
- relocatable kernel at a nonstandard address it will have to modify
- this field to point to the load address.
+ 2. if a bootloader which does not install a hook loads a
+ relocatable kernel at a nonstandard address it will have to modify
+ this field to point to the load address.
+============ ==================
Field name: ramdisk_image
Type: write (obligatory)
Offset/size: 0x218/4
Protocol: 2.00+
+============ ==================
The 32-bit linear address of the initial ramdisk or ramfs. Leave at
zero if there is no initial ramdisk/ramfs.
+============ ==================
Field name: ramdisk_size
Type: write (obligatory)
Offset/size: 0x21c/4
Protocol: 2.00+
+============ ==================
Size of the initial ramdisk or ramfs. Leave at zero if there is no
initial ramdisk/ramfs.
+============ ===============
Field name: bootsect_kludge
Type: kernel internal
Offset/size: 0x220/4
Protocol: 2.00+
+============ ===============
This field is obsolete.
+============ ==================
Field name: heap_end_ptr
Type: write (obligatory)
Offset/size: 0x224/2
Protocol: 2.01+
+============ ==================
Set this field to the offset (from the beginning of the real-mode
code) of the end of the setup stack/heap, minus 0x0200.
+============ ================
Field name: ext_loader_ver
Type: write (optional)
Offset/size: 0x226/1
Protocol: 2.02+
+============ ================
This field is used as an extension of the version number in the
type_of_loader field. The total version number is considered to be
@@ -514,10 +581,12 @@ Protocol: 2.02+
Kernels prior to 2.6.31 did not recognize this field, but it is safe
to write for protocol version 2.02 or higher.
+============ =====================================================
Field name: ext_loader_type
Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
Offset/size: 0x227/1
Protocol: 2.02+
+============ =====================================================
This field is used as an extension of the type number in
type_of_loader field. If the type in type_of_loader is 0xE, then
@@ -528,10 +597,12 @@ Protocol: 2.02+
Kernels prior to 2.6.31 did not recognize this field, but it is safe
to write for protocol version 2.02 or higher.
+============ ==================
Field name: cmd_line_ptr
Type: write (obligatory)
Offset/size: 0x228/4
Protocol: 2.02+
+============ ==================
Set this field to the linear address of the kernel command line.
The kernel command line can be located anywhere between the end of
@@ -544,10 +615,12 @@ Protocol: 2.02+
zero, the kernel will assume that your boot loader does not support
the 2.02+ protocol.
+============ ===============
Field name: initrd_addr_max
Type: read
Offset/size: 0x22c/4
Protocol: 2.03+
+============ ===============
The maximum address that may be occupied by the initial
ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this
@@ -556,10 +629,12 @@ Protocol: 2.03+
your ramdisk is exactly 131072 bytes long and this field is
0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
+============ ============================
Field name: kernel_alignment
Type: read/modify (reloc)
Offset/size: 0x230/4
Protocol: 2.05+ (read), 2.10+ (modify)
+============ ============================
Alignment unit required by the kernel (if relocatable_kernel is
true.) A relocatable kernel that is loaded at an alignment
@@ -571,25 +646,29 @@ Protocol: 2.05+ (read), 2.10+ (modify)
loader to modify this field to permit a lesser alignment. See the
min_alignment and pref_address field below.
+============ ==================
Field name: relocatable_kernel
Type: read (reloc)
Offset/size: 0x234/1
Protocol: 2.05+
+============ ==================
If this field is nonzero, the protected-mode part of the kernel can
be loaded at any address that satisfies the kernel_alignment field.
After loading, the boot loader must set the code32_start field to
point to the loaded code, or to a boot loader hook.
+============ =============
Field name: min_alignment
Type: read (reloc)
Offset/size: 0x235/1
Protocol: 2.10+
+============ =============
This field, if nonzero, indicates as a power of two the minimum
alignment required, as opposed to preferred, by the kernel to boot.
If a boot loader makes use of this field, it should update the
- kernel_alignment field with the alignment unit desired; typically:
+ kernel_alignment field with the alignment unit desired; typically::
kernel_alignment = 1 << min_alignment
@@ -597,44 +676,56 @@ Protocol: 2.10+
misaligned kernel. Therefore, a loader should typically try each
power-of-two alignment from kernel_alignment down to this alignment.
-Field name: xloadflags
-Type: read
-Offset/size: 0x236/2
-Protocol: 2.12+
+============ ==========
+Field name: xloadflags
+Type: read
+Offset/size: 0x236/2
+Protocol: 2.12+
+============ ==========
This field is a bitmask.
Bit 0 (read): XLF_KERNEL_64
+
- If 1, this kernel has the legacy 64-bit entry point at 0x200.
Bit 1 (read): XLF_CAN_BE_LOADED_ABOVE_4G
+
- If 1, kernel/boot_params/cmdline/ramdisk can be above 4G.
Bit 2 (read): XLF_EFI_HANDOVER_32
+
- If 1, the kernel supports the 32-bit EFI handoff entry point
given at handover_offset.
Bit 3 (read): XLF_EFI_HANDOVER_64
+
- If 1, the kernel supports the 64-bit EFI handoff entry point
given at handover_offset + 0x200.
Bit 4 (read): XLF_EFI_KEXEC
+
- If 1, the kernel supports kexec EFI boot with EFI runtime support.
+
+============ ============
Field name: cmdline_size
Type: read
Offset/size: 0x238/4
Protocol: 2.06+
+============ ============
The maximum size of the command line without the terminating
zero. This means that the command line can contain at most
cmdline_size characters. With protocol version 2.05 and earlier, the
maximum size was 255.
+============ ====================================
Field name: hardware_subarch
Type: write (optional, defaults to x86/PC)
Offset/size: 0x23c/4
Protocol: 2.07+
+============ ====================================
In a paravirtualized environment the hardware low level architectural
pieces such as interrupt handling, page table handling, and
@@ -643,25 +734,31 @@ Protocol: 2.07+
This field allows the bootloader to inform the kernel we are in one
one of those environments.
+ ========== ==============================
0x00000000 The default x86/PC environment
0x00000001 lguest
0x00000002 Xen
0x00000003 Moorestown MID
0x00000004 CE4100 TV Platform
+ ========== ==============================
+============ =========================
Field name: hardware_subarch_data
Type: write (subarch-dependent)
Offset/size: 0x240/8
Protocol: 2.07+
+============ =========================
A pointer to data that is specific to hardware subarch
This field is currently unused for the default x86/PC environment,
do not modify.
+============ ==============
Field name: payload_offset
Type: read
Offset/size: 0x248/4
Protocol: 2.08+
+============ ==============
If non-zero then this field contains the offset from the beginning
of the protected-mode code to the payload.
@@ -674,29 +771,33 @@ Protocol: 2.08+
02 21). The uncompressed payload is currently always ELF (magic
number 7F 45 4C 46).
+============ ==============
Field name: payload_length
Type: read
Offset/size: 0x24c/4
Protocol: 2.08+
+============ ==============
The length of the payload.
+============ ===============
Field name: setup_data
Type: write (special)
Offset/size: 0x250/8
Protocol: 2.09+
+============ ===============
The 64-bit physical pointer to NULL terminated single linked list of
struct setup_data. This is used to define a more extensible boot
parameters passing mechanism. The definition of struct setup_data is
- as follow:
+ as follow::
- struct setup_data {
- u64 next;
- u32 type;
- u32 len;
- u8 data[0];
- };
+ struct setup_data {
+ u64 next;
+ u32 type;
+ u32 len;
+ u8 data[0];
+ };
Where, the next is a 64-bit physical pointer to the next node of
linked list, the next field of the last node is 0; the type is used
@@ -708,10 +809,12 @@ Protocol: 2.09+
sure to consider the case where the linked list already contains
entries.
+============ ============
Field name: pref_address
Type: read (reloc)
Offset/size: 0x258/8
Protocol: 2.10+
+============ ============
This field, if nonzero, represents a preferred load address for the
kernel. A relocating bootloader should attempt to load at this
@@ -720,9 +823,11 @@ Protocol: 2.10+
A non-relocatable kernel will unconditionally move itself and to run
at this address.
+============ =======
Field name: init_size
Type: read
Offset/size: 0x260/4
+============ =======
This field indicates the amount of linear contiguous memory starting
at the kernel runtime start address that the kernel needs before it
@@ -731,16 +836,18 @@ Offset/size: 0x260/4
be used by a relocating boot loader to help select a safe load
address for the kernel.
- The kernel runtime start address is determined by the following algorithm:
+ The kernel runtime start address is determined by the following algorithm::
- if (relocatable_kernel)
+ if (relocatable_kernel)
runtime_start = align_up(load_address, kernel_alignment)
- else
+ else
runtime_start = pref_address
+============ ===============
Field name: handover_offset
Type: read
Offset/size: 0x264/4
+============ ===============
This field is the offset from the beginning of the kernel image to
the EFI handover protocol entry point. Boot loaders using the EFI
@@ -749,7 +856,8 @@ Offset/size: 0x264/4
See EFI HANDOVER PROTOCOL below for more details.
-**** THE IMAGE CHECKSUM
+The Image Checksum
+==================
From boot protocol version 2.08 onwards the CRC-32 is calculated over
the entire file using the characteristic polynomial 0x04C11DB7 and an
@@ -758,7 +866,8 @@ file; therefore the CRC of the file up to the limit specified in the
syssize field of the header is always 0.
-**** THE KERNEL COMMAND LINE
+The Kernel Command Line
+=======================
The kernel command line has become an important way for the boot
loader to communicate with the kernel. Some of its options are also
@@ -778,19 +887,20 @@ heap and 0xA0000.
If the protocol version is *not* 2.02 or higher, the kernel
command line is entered using the following protocol:
- At offset 0x0020 (word), "cmd_line_magic", enter the magic
- number 0xA33F.
+ - At offset 0x0020 (word), "cmd_line_magic", enter the magic
+ number 0xA33F.
+
+ - At offset 0x0022 (word), "cmd_line_offset", enter the offset
+ of the kernel command line (relative to the start of the
+ real-mode kernel).
- At offset 0x0022 (word), "cmd_line_offset", enter the offset
- of the kernel command line (relative to the start of the
- real-mode kernel).
-
- The kernel command line *must* be within the memory region
- covered by setup_move_size, so you may need to adjust this
- field.
+ - The kernel command line *must* be within the memory region
+ covered by setup_move_size, so you may need to adjust this
+ field.
-**** MEMORY LAYOUT OF THE REAL-MODE CODE
+Memory Layout of The Real-Mode Code
+===================================
The real-mode code requires a stack/heap to be set up, as well as
memory allocated for the kernel command line. This needs to be done
@@ -806,10 +916,11 @@ segment has to be used:
- When loading a zImage kernel ((loadflags & 0x01) == 0).
- When loading a 2.01 or earlier boot protocol kernel.
- -> For the 2.00 and 2.01 boot protocols, the real-mode code
- can be loaded at another address, but it is internally
- relocated to 0x90000. For the "old" protocol, the
- real-mode code must be loaded at 0x90000.
+.. note::
+ For the 2.00 and 2.01 boot protocols, the real-mode code
+ can be loaded at another address, but it is internally
+ relocated to 0x90000. For the "old" protocol, the
+ real-mode code must be loaded at 0x90000.
When loading at 0x90000, avoid using memory above 0x9a000.
@@ -822,24 +933,29 @@ The kernel command line should not be located below the real-mode
code, nor should it be located in high memory.
-**** SAMPLE BOOT CONFIGURATION
+Sample Boot Configuartion
+=========================
As a sample configuration, assume the following layout of the real
-mode segment:
+mode segment.
When loading below 0x90000, use the entire segment:
+ ============= ===================
0x0000-0x7fff Real mode kernel
0x8000-0xdfff Stack and heap
0xe000-0xffff Kernel command line
+ ============= ===================
When loading at 0x90000 OR the protocol version is 2.01 or earlier:
+ ============= ===================
0x0000-0x7fff Real mode kernel
0x8000-0x97ff Stack and heap
0x9800-0x9fff Kernel command line
+ ============= ===================
-Such a boot loader should enter the following fields in the header:
+Such a boot loader should enter the following fields in the header::
unsigned long base_ptr; /* base address for real-mode segment */
@@ -898,7 +1014,8 @@ Such a boot loader should enter the following fields in the header:
}
-**** LOADING THE REST OF THE KERNEL
+Loading The Rest of The Kernel
+==============================
The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
in the kernel file (again, if setup_sects == 0 the real value is 4.)
@@ -906,7 +1023,7 @@ It should be loaded at address 0x10000 for Image/zImage kernels and
0x100000 for bzImage kernels.
The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
-bit (LOAD_HIGH) in the loadflags field is set:
+bit (LOAD_HIGH) in the loadflags field is set::
is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
load_address = is_bzImage ? 0x100000 : 0x10000;
@@ -916,8 +1033,8 @@ the entire 0x10000-0x90000 range of memory. This means it is pretty
much a requirement for these kernels to load the real-mode part at
0x90000. bzImage kernels allow much more flexibility.
-
-**** SPECIAL COMMAND LINE OPTIONS
+Special Command Line Options
+============================
If the command line provided by the boot loader is entered by the
user, the user may expect the following command line options to work.
@@ -966,7 +1083,8 @@ or configuration-specified command line. Otherwise, "init=/bin/sh"
gets confused by the "auto" option.
-**** RUNNING THE KERNEL
+Running the Kernel
+==================
The kernel is started by jumping to the kernel entry point, which is
located at *segment* offset 0x20 from the start of the real mode
@@ -980,7 +1098,7 @@ interrupts should be disabled. Furthermore, to guard against bugs in
the kernel, it is recommended that the boot loader sets fs = gs = ds =
es = ss.
-In our example from above, we would do:
+In our example from above, we would do::
/* Note: in the case of the "old" kernel protocol, base_ptr must
be == 0x90000 at this point; see the previous sample code */
@@ -1003,7 +1121,8 @@ switched off, especially if the loaded kernel has the floppy driver as
a demand-loaded module!
-**** ADVANCED BOOT LOADER HOOKS
+Advanced Boot Loader Hooks
+==========================
If the boot loader runs in a particularly hostile environment (such as
LOADLIN, which runs under DOS) it may be impossible to follow the
@@ -1032,7 +1151,8 @@ IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and
(relocated, if appropriate.)
-**** 32-bit BOOT PROTOCOL
+32-bit Boot Protocol
+====================
For machine with some new BIOS other than legacy BIOS, such as EFI,
LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel
@@ -1045,7 +1165,7 @@ traditionally known as "zero page"). The memory for struct boot_params
should be allocated and initialized to all zero. Then the setup header
from offset 0x01f1 of kernel image on should be loaded into struct
boot_params and examined. The end of setup header can be calculated as
-follow:
+follow::
0x0202 + byte value at offset 0x0201
@@ -1069,7 +1189,8 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
address of the struct boot_params; %ebp, %edi and %ebx must be zero.
-**** 64-bit BOOT PROTOCOL
+64-bit Boot Protocol
+====================
For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
and we need a 64-bit boot protocol.
@@ -1080,7 +1201,7 @@ traditionally known as "zero page"). The memory for struct boot_params
could be allocated anywhere (even above 4G) and initialized to all zero.
Then, the setup header at offset 0x01f1 of kernel image on should be
loaded into struct boot_params and examined. The end of setup header
-can be calculated as follows:
+can be calculated as follows::
0x0202 + byte value at offset 0x0201
@@ -1107,7 +1228,8 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
address of the struct boot_params.
-**** EFI HANDOVER PROTOCOL
+EFI Handover Protocol
+=====================
This protocol allows boot loaders to defer initialisation to the EFI
boot stub. The boot loader is required to load the kernel/initrd(s)
@@ -1115,7 +1237,7 @@ from the boot media and jump to the EFI handover protocol entry point
which is hdr->handover_offset bytes from the beginning of
startup_{32,64}.
-The function prototype for the handover entry point looks like this,
+The function prototype for the handover entry point looks like this::
efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
@@ -1124,11 +1246,11 @@ firmware, 'table' is the EFI system table - these are the first two
arguments of the "handoff state" as described in section 2.3 of the
UEFI specification. 'bp' is the boot loader-allocated boot params.
-The boot loader *must* fill out the following fields in bp,
+The boot loader *must* fill out the following fields in bp::
- o hdr.code32_start
- o hdr.cmd_line_ptr
- o hdr.ramdisk_image (if applicable)
- o hdr.ramdisk_size (if applicable)
+ - hdr.code32_start
+ - hdr.cmd_line_ptr
+ - hdr.ramdisk_image (if applicable)
+ - hdr.ramdisk_size (if applicable)
All other fields should be zero.
diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
new file mode 100644
index 000000000000..33c5c3142e20
--- /dev/null
+++ b/Documentation/x86/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "X86 architecture specific documentation"
+
+tags.add("subproject")
+
+latex_documents = [
+ ('index', 'x86.tex', project,
+ 'The kernel development community', 'manual'),
+]
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.rst
index 46933e06c972..11307378acf0 100644
--- a/Documentation/x86/earlyprintk.txt
+++ b/Documentation/x86/earlyprintk.rst
@@ -1,52 +1,58 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Early Printk
+============
Mini-HOWTO for using the earlyprintk=dbgp boot option with a
USB2 Debug port key and a debug cable, on x86 systems.
You need two computers, the 'USB debug key' special gadget and
-and two USB cables, connected like this:
+and two USB cables, connected like this::
[host/target] <-------> [USB debug key] <-------> [client/console]
-1. There are a number of specific hardware requirements:
-
- a.) Host/target system needs to have USB debug port capability.
-
- You can check this capability by looking at a 'Debug port' bit in
- the lspci -vvv output:
-
- # lspci -vvv
- ...
- 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
- Subsystem: Lenovo ThinkPad T61
- Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
- Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
- Latency: 0
- Interrupt: pin D routed to IRQ 19
- Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
- Capabilities: [50] Power Management version 2
- Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
- Status: D0 PME-Enable- DSel=0 DScale=0 PME+
- Capabilities: [58] Debug port: BAR=1 offset=00a0
+Hardware requirements
+=====================
+
+ a) Host/target system needs to have USB debug port capability.
+
+ You can check this capability by looking at a 'Debug port' bit in
+ the lspci -vvv output::
+
+ # lspci -vvv
+ ...
+ 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
+ Subsystem: Lenovo ThinkPad T61
+ Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
+ Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Latency: 0
+ Interrupt: pin D routed to IRQ 19
+ Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
+ Capabilities: [50] Power Management version 2
+ Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
+ Status: D0 PME-Enable- DSel=0 DScale=0 PME+
+ Capabilities: [58] Debug port: BAR=1 offset=00a0
^^^^^^^^^^^ <==================== [ HERE ]
- Kernel driver in use: ehci_hcd
- Kernel modules: ehci-hcd
- ...
+ Kernel driver in use: ehci_hcd
+ Kernel modules: ehci-hcd
+ ...
-( If your system does not list a debug port capability then you probably
- won't be able to use the USB debug key. )
+ .. note::
+ If your system does not list a debug port capability then you probably
+ won't be able to use the USB debug key.
- b.) You also need a NetChip USB debug cable/key:
+ b) You also need a NetChip USB debug cable/key:
http://www.plxtech.com/products/NET2000/NET20DC/default.asp
This is a small blue plastic connector with two USB connections;
it draws power from its USB connections.
- c.) You need a second client/console system with a high speed USB 2.0
- port.
+ c) You need a second client/console system with a high speed USB 2.0 port.
- d.) The NetChip device must be plugged directly into the physical
- debug port on the "host/target" system. You cannot use a USB hub in
+ d) The NetChip device must be plugged directly into the physical
+ debug port on the "host/target" system. You cannot use a USB hub in
between the physical debug port and the "host/target" system.
The EHCI debug controller is bound to a specific physical USB
@@ -65,29 +71,31 @@ and two USB cables, connected like this:
to the hardware vendor, because there is no reason not to wire
this port into one of the physically accessible ports.
- e.) It is also important to note, that many versions of the NetChip
+ e) It is also important to note, that many versions of the NetChip
device require the "client/console" system to be plugged into the
right hand side of the device (with the product logo facing up and
readable left to right). The reason being is that the 5 volt
power supply is taken from only one side of the device and it
must be the side that does not get rebooted.
-2. Software requirements:
+Software requirements
+=====================
- a.) On the host/target system:
+ a) On the host/target system:
- You need to enable the following kernel config option:
+ You need to enable the following kernel config option::
CONFIG_EARLY_PRINTK_DBGP=y
And you need to add the boot command line: "earlyprintk=dbgp".
- (If you are using Grub, append it to the 'kernel' line in
- /etc/grub.conf. If you are using Grub2 on a BIOS firmware system,
- append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
- using Grub2 on an EFI firmware system, append it to the 'linux'
- or 'linuxefi' line in /boot/grub2/grub.cfg or
- /boot/efi/EFI/<distro>/grub.cfg.)
+ .. note::
+ If you are using Grub, append it to the 'kernel' line in
+ /etc/grub.conf. If you are using Grub2 on a BIOS firmware system,
+ append it to the 'linux' line in /boot/grub2/grub.cfg. If you are
+ using Grub2 on an EFI firmware system, append it to the 'linux'
+ or 'linuxefi' line in /boot/grub2/grub.cfg or
+ /boot/efi/EFI/<distro>/grub.cfg.
On systems with more than one EHCI debug controller you must
specify the correct EHCI debug controller number. The ordering
@@ -96,14 +104,15 @@ and two USB cables, connected like this:
controller. To use the second EHCI debug controller, you would
use the command line: "earlyprintk=dbgp1"
- NOTE: normally earlyprintk console gets turned off once the
- regular console is alive - use "earlyprintk=dbgp,keep" to keep
- this channel open beyond early bootup. This can be useful for
- debugging crashes under Xorg, etc.
+ .. note::
+ normally earlyprintk console gets turned off once the
+ regular console is alive - use "earlyprintk=dbgp,keep" to keep
+ this channel open beyond early bootup. This can be useful for
+ debugging crashes under Xorg, etc.
- b.) On the client/console system:
+ b) On the client/console system:
- You should enable the following kernel config option:
+ You should enable the following kernel config option::
CONFIG_USB_SERIAL_DEBUG=y
@@ -115,27 +124,28 @@ and two USB cables, connected like this:
it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
see the raw output.
- c.) On Nvidia Southbridge based systems: the kernel will try to probe
+ c) On Nvidia Southbridge based systems: the kernel will try to probe
and find out which port has a debug device connected.
-3. Testing that it works fine:
+Testing
+=======
- You can test the output by using earlyprintk=dbgp,keep and provoking
- kernel messages on the host/target system. You can provoke a harmless
- kernel message by for example doing:
+You can test the output by using earlyprintk=dbgp,keep and provoking
+kernel messages on the host/target system. You can provoke a harmless
+kernel message by for example doing::
echo h > /proc/sysrq-trigger
- On the host/target system you should see this help line in "dmesg" output:
+On the host/target system you should see this help line in "dmesg" output::
SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
- On the client/console system do:
+On the client/console system do::
cat /dev/ttyUSB0
- And you should see the help line above displayed shortly after you've
- provoked it on the host system.
+And you should see the help line above displayed shortly after you've
+provoked it on the host system.
If it does not work then please ask about it on the linux-kernel@vger.kernel.org
mailing list or contact the x86 maintainers.
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.rst
index c1df8eba9dfd..a48b3f6ebbe8 100644
--- a/Documentation/x86/entry_64.txt
+++ b/Documentation/x86/entry_64.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Kernel Entries
+==============
+
This file documents some of the kernel entries in
arch/x86/entry/entry_64.S. A lot of this explanation is adapted from
an email from Ingo Molnar:
@@ -59,7 +65,7 @@ Now, there's a secondary complication: there's a cheap way to test
which mode the CPU is in and an expensive way.
The cheap way is to pick this info off the entry frame on the kernel
-stack, from the CS of the ptregs area of the kernel stack:
+stack, from the CS of the ptregs area of the kernel stack::
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
@@ -67,7 +73,7 @@ stack, from the CS of the ptregs area of the kernel stack:
SWAPGS
The expensive (paranoid) way is to read back the MSR_GS_BASE value
-(which is what SWAPGS modifies):
+(which is what SWAPGS modifies)::
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
@@ -76,7 +82,7 @@ The expensive (paranoid) way is to read back the MSR_GS_BASE value
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx,%ebx
-1: ret
+ 1: ret
If we are at an interrupt or user-trap/gate-alike boundary then we can
use the faster check: the stack will be a reliable indicator of
diff --git a/Documentation/x86/exception-tables.txt b/Documentation/x86/exception-tables.rst
index e396bcd8d830..24596c8210b5 100644
--- a/Documentation/x86/exception-tables.txt
+++ b/Documentation/x86/exception-tables.rst
@@ -1,5 +1,10 @@
- Kernel level exception handling in Linux
- Commentary by Joerg Pommnitz <joerg@raleigh.ibm.com>
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Kernel level exception handling
+===============================
+
+Commentary by Joerg Pommnitz <joerg@raleigh.ibm.com>
When a process runs in kernel mode, it often has to access user
mode memory whose address has been passed by an untrusted program.
@@ -25,9 +30,9 @@ How does this work?
Whenever the kernel tries to access an address that is currently not
accessible, the CPU generates a page fault exception and calls the
-page fault handler
+page fault handler::
-void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+ void do_page_fault(struct pt_regs *regs, unsigned long error_code)
in arch/x86/mm/fault.c. The parameters on the stack are set up by
the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter
@@ -57,73 +62,74 @@ as an example. The definition is somewhat hard to follow, so let's peek at
the code generated by the preprocessor and the compiler. I selected
the get_user call in drivers/char/sysrq.c for a detailed examination.
-The original code in sysrq.c line 587:
+The original code in sysrq.c line 587::
+
get_user(c, buf);
-The preprocessor output (edited to become somewhat readable):
-
-(
- {
- long __gu_err = - 14 , __gu_val = 0;
- const __typeof__(*( ( buf ) )) *__gu_addr = ((buf));
- if (((((0 + current_set[0])->tss.segment) == 0x18 ) ||
- (((sizeof(*(buf))) <= 0xC0000000UL) &&
- ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf)))))))
- do {
- __gu_err = 0;
- switch ((sizeof(*(buf)))) {
- case 1:
- __asm__ __volatile__(
- "1: mov" "b" " %2,%" "b" "1\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %3,%0\n"
- " xor" "b" " %" "b" "1,%" "b" "1\n"
- " jmp 2b\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,3b\n"
- ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *)
- ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ;
- break;
- case 2:
- __asm__ __volatile__(
- "1: mov" "w" " %2,%" "w" "1\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %3,%0\n"
- " xor" "w" " %" "w" "1,%" "w" "1\n"
- " jmp 2b\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,3b\n"
- ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
- ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err ));
- break;
- case 4:
- __asm__ __volatile__(
- "1: mov" "l" " %2,%" "" "1\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %3,%0\n"
- " xor" "l" " %" "" "1,%" "" "1\n"
- " jmp 2b\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n" " .long 1b,3b\n"
- ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
- ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err));
- break;
- default:
- (__gu_val) = __get_user_bad();
- }
- } while (0) ;
- ((c)) = (__typeof__(*((buf))))__gu_val;
- __gu_err;
- }
-);
+The preprocessor output (edited to become somewhat readable)::
+
+ (
+ {
+ long __gu_err = - 14 , __gu_val = 0;
+ const __typeof__(*( ( buf ) )) *__gu_addr = ((buf));
+ if (((((0 + current_set[0])->tss.segment) == 0x18 ) ||
+ (((sizeof(*(buf))) <= 0xC0000000UL) &&
+ ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf)))))))
+ do {
+ __gu_err = 0;
+ switch ((sizeof(*(buf)))) {
+ case 1:
+ __asm__ __volatile__(
+ "1: mov" "b" " %2,%" "b" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "b" " %" "b" "1,%" "b" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ;
+ break;
+ case 2:
+ __asm__ __volatile__(
+ "1: mov" "w" " %2,%" "w" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "w" " %" "w" "1,%" "w" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err ));
+ break;
+ case 4:
+ __asm__ __volatile__(
+ "1: mov" "l" " %2,%" "" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "l" " %" "" "1,%" "" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n" " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err));
+ break;
+ default:
+ (__gu_val) = __get_user_bad();
+ }
+ } while (0) ;
+ ((c)) = (__typeof__(*((buf))))__gu_val;
+ __gu_err;
+ }
+ );
WOW! Black GCC/assembly magic. This is impossible to follow, so let's
-see what code gcc generates:
+see what code gcc generates::
> xorl %edx,%edx
> movl current_set,%eax
@@ -154,7 +160,7 @@ understand. Can we? The actual user access is quite obvious. Thanks
to the unified address space we can just access the address in user
memory. But what does the .section stuff do?????
-To understand this we have to look at the final kernel:
+To understand this we have to look at the final kernel::
> objdump --section-headers vmlinux
>
@@ -181,7 +187,7 @@ To understand this we have to look at the final kernel:
There are obviously 2 non standard ELF sections in the generated object
file. But first we want to find out what happened to our code in the
-final kernel executable:
+final kernel executable::
> objdump --disassemble --section=.text vmlinux
>
@@ -199,7 +205,7 @@ final kernel executable:
The whole user memory access is reduced to 10 x86 machine instructions.
The instructions bracketed in the .section directives are no longer
in the normal execution path. They are located in a different section
-of the executable file:
+of the executable file::
> objdump --disassemble --section=.fixup vmlinux
>
@@ -207,14 +213,15 @@ of the executable file:
> c0199ffa <.fixup+10ba> xorb %dl,%dl
> c0199ffc <.fixup+10bc> jmp c017e7a7 <do_con_write+e3>
-And finally:
+And finally::
+
> objdump --full-contents --section=__ex_table vmlinux
>
> c01aa7c4 93c017c0 e09f19c0 97c017c0 99c017c0 ................
> c01aa7d4 f6c217c0 e99f19c0 a5e717c0 f59f19c0 ................
> c01aa7e4 080a18c0 01a019c0 0a0a18c0 04a019c0 ................
-or in human readable byte order:
+or in human readable byte order::
> c01aa7c4 c017c093 c0199fe0 c017c097 c017c099 ................
> c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
@@ -222,18 +229,22 @@ or in human readable byte order:
this is the interesting part!
> c01aa7e4 c0180a08 c019a001 c0180a0a c019a004 ................
-What happened? The assembly directives
+What happened? The assembly directives::
-.section .fixup,"ax"
-.section __ex_table,"a"
+ .section .fixup,"ax"
+ .section __ex_table,"a"
told the assembler to move the following code to the specified
-sections in the ELF object file. So the instructions
-3: movl $-14,%eax
- xorb %dl,%dl
- jmp 2b
-ended up in the .fixup section of the object file and the addresses
+sections in the ELF object file. So the instructions::
+
+ 3: movl $-14,%eax
+ xorb %dl,%dl
+ jmp 2b
+
+ended up in the .fixup section of the object file and the addresses::
+
.long 1b,3b
+
ended up in the __ex_table section of the object file. 1b and 3b
are local labels. The local label 1b (1b stands for next label 1
backward) is the address of the instruction that might fault, i.e.
@@ -246,35 +257,39 @@ the fault, in our case the actual value is c0199ff5:
the original assembly code: > 3: movl $-14,%eax
and linked in vmlinux : > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
-The assembly code
+The assembly code::
+
> .section __ex_table,"a"
> .align 4
> .long 1b,3b
-becomes the value pair
+becomes the value pair::
+
> c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
^this is ^this is
1b 3b
+
c017e7a5,c0199ff5 in the exception table of the kernel.
So, what actually happens if a fault from kernel mode with no suitable
vma occurs?
-1.) access to invalid address:
- > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
-2.) MMU generates exception
-3.) CPU calls do_page_fault
-4.) do page fault calls search_exception_table (regs->eip == c017e7a5);
-5.) search_exception_table looks up the address c017e7a5 in the
- exception table (i.e. the contents of the ELF section __ex_table)
- and returns the address of the associated fault handle code c0199ff5.
-6.) do_page_fault modifies its own return address to point to the fault
- handle code and returns.
-7.) execution continues in the fault handling code.
-8.) 8a) EAX becomes -EFAULT (== -14)
- 8b) DL becomes zero (the value we "read" from user space)
- 8c) execution continues at local label 2 (address of the
- instruction immediately after the faulting user access).
+#. access to invalid address::
+
+ > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+#. MMU generates exception
+#. CPU calls do_page_fault
+#. do page fault calls search_exception_table (regs->eip == c017e7a5);
+#. search_exception_table looks up the address c017e7a5 in the
+ exception table (i.e. the contents of the ELF section __ex_table)
+ and returns the address of the associated fault handle code c0199ff5.
+#. do_page_fault modifies its own return address to point to the fault
+ handle code and returns.
+#. execution continues in the fault handling code.
+#. a) EAX becomes -EFAULT (== -14)
+ b) DL becomes zero (the value we "read" from user space)
+ c) execution continues at local label 2 (address of the
+ instruction immediately after the faulting user access).
The steps 8a to 8c in a certain way emulate the faulting instruction.
@@ -295,14 +310,15 @@ Things changed when 64-bit support was added to x86 Linux. Rather than
double the size of the exception table by expanding the two entries
from 32-bits to 64 bits, a clever trick was used to store addresses
as relative offsets from the table itself. The assembly code changed
-from:
- .long 1b,3b
-to:
- .long (from) - .
- .long (to) - .
+from::
+
+ .long 1b,3b
+ to:
+ .long (from) - .
+ .long (to) - .
and the C-code that uses these values converts back to absolute addresses
-like this:
+like this::
ex_insn_addr(const struct exception_table_entry *x)
{
@@ -313,15 +329,18 @@ In v4.6 the exception table entry was expanded with a new field "handler".
This is also 32-bits wide and contains a third relative function
pointer which points to one of:
-1) int ex_handler_default(const struct exception_table_entry *fixup)
- This is legacy case that just jumps to the fixup code
-2) int ex_handler_fault(const struct exception_table_entry *fixup)
- This case provides the fault number of the trap that occurred at
- entry->insn. It is used to distinguish page faults from machine
- check.
-3) int ex_handler_ext(const struct exception_table_entry *fixup)
- This case is used for uaccess_err ... we need to set a flag
- in the task structure. Before the handler functions existed this
- case was handled by adding a large offset to the fixup to tag
- it as special.
+1) ``int ex_handler_default(const struct exception_table_entry *fixup)``
+ This is legacy case that just jumps to the fixup code
+
+2) ``int ex_handler_fault(const struct exception_table_entry *fixup)``
+ This case provides the fault number of the trap that occurred at
+ entry->insn. It is used to distinguish page faults from machine
+ check.
+
+3) ``int ex_handler_ext(const struct exception_table_entry *fixup)``
+ This case is used for uaccess_err ... we need to set a flag
+ in the task structure. Before the handler functions existed this
+ case was handled by adding a large offset to the fixup to tag
+ it as special.
+
More functions can easily be added.
diff --git a/Documentation/x86/i386/IO-APIC.txt b/Documentation/x86/i386/IO-APIC.rst
index 15f5baf7e1b6..ce4d8df15e7c 100644
--- a/Documentation/x86/i386/IO-APIC.txt
+++ b/Documentation/x86/i386/IO-APIC.rst
@@ -1,3 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+IO-APIC
+=======
+
+:Author: Ingo Molnar <mingo@kernel.org>
+
Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC',
which is an enhanced interrupt controller. It enables us to route
hardware interrupts to multiple CPUs, or to CPU groups. Without an
@@ -13,9 +21,8 @@ usually worked around by the kernel. If your MP-compliant SMP board does
not boot Linux, then consult the linux-smp mailing list archives first.
If your box boots fine with enabled IO-APIC IRQs, then your
-/proc/interrupts will look like this one:
+/proc/interrupts will look like this one::
- ---------------------------->
hell:~> cat /proc/interrupts
CPU0
0: 1360293 IO-APIC-edge timer
@@ -28,7 +35,6 @@ If your box boots fine with enabled IO-APIC IRQs, then your
NMI: 0
ERR: 0
hell:~>
- <----------------------------
Some interrupts are still listed as 'XT PIC', but this is not a problem;
none of those IRQ sources is performance-critical.
@@ -37,14 +43,14 @@ none of those IRQ sources is performance-critical.
In the unlikely case that your board does not create a working mp-table,
you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This
is non-trivial though and cannot be automated. One sample /etc/lilo.conf
-entry:
+entry::
append="pirq=15,11,10"
The actual numbers depend on your system, on your PCI cards and on their
PCI slot position. Usually PCI slots are 'daisy chained' before they are
connected to the PCI chipset IRQ routing facility (the incoming PIRQ1-4
-lines):
+lines)::
,-. ,-. ,-. ,-. ,-.
PIRQ4 ----| |-. ,-| |-. ,-| |-. ,-| |--------| |
@@ -56,7 +62,7 @@ lines):
PIRQ1 ----| |- `----| |- `----| |- `----| |--------| |
`-' `-' `-' `-' `-'
-Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD:
+Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD::
,-.
INTD--| |
@@ -78,19 +84,19 @@ to have non shared interrupts). Slot5 should be used for videocards, they
do not use interrupts normally, thus they are not daisy chained either.
so if you have your SCSI card (IRQ11) in Slot1, Tulip card (IRQ9) in
-Slot2, then you'll have to specify this pirq= line:
+Slot2, then you'll have to specify this pirq= line::
append="pirq=11,9"
the following script tries to figure out such a default pirq= line from
-your PCI configuration:
+your PCI configuration::
echo -n pirq=; echo `scanpci | grep T_L | cut -c56-` | sed 's/ /,/g'
note that this script won't work if you have skipped a few slots or if your
board does not do default daisy-chaining. (or the IO-APIC has the PIRQ pins
connected in some strange way). E.g. if in the above case you have your SCSI
-card (IRQ11) in Slot3, and have Slot1 empty:
+card (IRQ11) in Slot3, and have Slot1 empty::
append="pirq=0,9,11"
@@ -105,7 +111,7 @@ won't function properly (e.g. if it's inserted as a module).
If you have 2 PCI buses, then you can use up to 8 pirq values, although such
boards tend to have a good configuration.
-Be prepared that it might happen that you need some strange pirq line:
+Be prepared that it might happen that you need some strange pirq line::
append="pirq=0,0,0,0,0,0,9,11"
@@ -115,5 +121,3 @@ Good luck and mail to linux-smp@vger.kernel.org or
linux-kernel@vger.kernel.org if you have any problems that are not covered
by this document.
--- mingo
-
diff --git a/Documentation/x86/i386/index.rst b/Documentation/x86/i386/index.rst
new file mode 100644
index 000000000000..8747cf5bbd49
--- /dev/null
+++ b/Documentation/x86/i386/index.rst
@@ -0,0 +1,10 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+i386 Support
+============
+
+.. toctree::
+ :maxdepth: 2
+
+ IO-APIC
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
new file mode 100644
index 000000000000..ae36fc5fc649
--- /dev/null
+++ b/Documentation/x86/index.rst
@@ -0,0 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+x86-specific Documentation
+==========================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ boot
+ topology
+ exception-tables
+ kernel-stacks
+ entry_64
+ earlyprintk
+ orc-unwinder
+ zero-page
+ tlb
+ mtrr
+ pat
+ protection-keys
+ intel_mpx
+ amd-memory-encryption
+ pti
+ mds
+ microcode
+ resctrl_ui
+ usb-legacy-support
+ i386/index
+ x86_64/index
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.rst
index 85d0549ad846..387a640941a6 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.rst
@@ -1,5 +1,11 @@
-1. Intel(R) MPX Overview
-========================
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Intel(R) Memory Protection Extensions (MPX)
+===========================================
+
+Intel(R) MPX Overview
+=====================
Intel(R) Memory Protection Extensions (Intel(R) MPX) is a new capability
introduced into Intel Architecture. Intel MPX provides hardware features
@@ -7,7 +13,7 @@ that can be used in conjunction with compiler changes to check memory
references, for those references whose compile-time normal intentions are
usurped at runtime due to buffer overflow or underflow.
-You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
+You can tell if your CPU supports MPX by looking in /proc/cpuinfo::
cat /proc/cpuinfo | grep ' mpx '
@@ -21,8 +27,8 @@ can be downloaded from
http://software.intel.com/en-us/articles/intel-software-development-emulator
-2. How to get the advantage of MPX
-==================================
+How to get the advantage of MPX
+===============================
For MPX to work, changes are required in the kernel, binutils and compiler.
No source changes are required for applications, just a recompile.
@@ -84,14 +90,15 @@ Kernel MPX Code:
is unmapped.
-3. How does MPX kernel code work
-================================
+How does MPX kernel code work
+=============================
Handling #BR faults caused by MPX
---------------------------------
When MPX is enabled, there are 2 new situations that can generate
#BR faults.
+
* new bounds tables (BT) need to be allocated to save bounds.
* bounds violation caused by MPX instructions.
@@ -124,37 +131,37 @@ the kernel. It can theoretically be done completely from userspace. Here
are a few ways this could be done. We don't think any of them are practical
in the real-world, but here they are.
-Q: Can virtual space simply be reserved for the bounds tables so that we
- never have to allocate them?
-A: MPX-enabled application will possibly create a lot of bounds tables in
- process address space to save bounds information. These tables can take
- up huge swaths of memory (as much as 80% of the memory on the system)
- even if we clean them up aggressively. In the worst-case scenario, the
- tables can be 4x the size of the data structure being tracked. IOW, a
- 1-page structure can require 4 bounds-table pages. An X-GB virtual
- area needs 4*X GB of virtual space, plus 2GB for the bounds directory.
- If we were to preallocate them for the 128TB of user virtual address
- space, we would need to reserve 512TB+2GB, which is larger than the
- entire virtual address space today. This means they can not be reserved
- ahead of time. Also, a single process's pre-populated bounds directory
- consumes 2GB of virtual *AND* physical memory. IOW, it's completely
- infeasible to prepopulate bounds directories.
-
-Q: Can we preallocate bounds table space at the same time memory is
- allocated which might contain pointers that might eventually need
- bounds tables?
-A: This would work if we could hook the site of each and every memory
- allocation syscall. This can be done for small, constrained applications.
- But, it isn't practical at a larger scale since a given app has no
- way of controlling how all the parts of the app might allocate memory
- (think libraries). The kernel is really the only place to intercept
- these calls.
-
-Q: Could a bounds fault be handed to userspace and the tables allocated
- there in a signal handler instead of in the kernel?
-A: mmap() is not on the list of safe async handler functions and even
- if mmap() would work it still requires locking or nasty tricks to
- keep track of the allocation state there.
+:Q: Can virtual space simply be reserved for the bounds tables so that we
+ never have to allocate them?
+:A: MPX-enabled application will possibly create a lot of bounds tables in
+ process address space to save bounds information. These tables can take
+ up huge swaths of memory (as much as 80% of the memory on the system)
+ even if we clean them up aggressively. In the worst-case scenario, the
+ tables can be 4x the size of the data structure being tracked. IOW, a
+ 1-page structure can require 4 bounds-table pages. An X-GB virtual
+ area needs 4*X GB of virtual space, plus 2GB for the bounds directory.
+ If we were to preallocate them for the 128TB of user virtual address
+ space, we would need to reserve 512TB+2GB, which is larger than the
+ entire virtual address space today. This means they can not be reserved
+ ahead of time. Also, a single process's pre-populated bounds directory
+ consumes 2GB of virtual *AND* physical memory. IOW, it's completely
+ infeasible to prepopulate bounds directories.
+
+:Q: Can we preallocate bounds table space at the same time memory is
+ allocated which might contain pointers that might eventually need
+ bounds tables?
+:A: This would work if we could hook the site of each and every memory
+ allocation syscall. This can be done for small, constrained applications.
+ But, it isn't practical at a larger scale since a given app has no
+ way of controlling how all the parts of the app might allocate memory
+ (think libraries). The kernel is really the only place to intercept
+ these calls.
+
+:Q: Could a bounds fault be handed to userspace and the tables allocated
+ there in a signal handler instead of in the kernel?
+:A: mmap() is not on the list of safe async handler functions and even
+ if mmap() would work it still requires locking or nasty tricks to
+ keep track of the allocation state there.
Having ruled out all of the userspace-only approaches for managing
bounds tables that we could think of, we create them on demand in
@@ -167,20 +174,20 @@ If a #BR is generated due to a bounds violation caused by MPX.
We need to decode MPX instructions to get violation address and
set this address into extended struct siginfo.
-The _sigfault field of struct siginfo is extended as follow:
-
-87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-88 struct {
-89 void __user *_addr; /* faulting insn/memory ref. */
-90 #ifdef __ARCH_SI_TRAPNO
-91 int _trapno; /* TRAP # which caused the signal */
-92 #endif
-93 short _addr_lsb; /* LSB of the reported address */
-94 struct {
-95 void __user *_lower;
-96 void __user *_upper;
-97 } _addr_bnd;
-98 } _sigfault;
+The _sigfault field of struct siginfo is extended as follow::
+
+ 87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ 88 struct {
+ 89 void __user *_addr; /* faulting insn/memory ref. */
+ 90 #ifdef __ARCH_SI_TRAPNO
+ 91 int _trapno; /* TRAP # which caused the signal */
+ 92 #endif
+ 93 short _addr_lsb; /* LSB of the reported address */
+ 94 struct {
+ 95 void __user *_lower;
+ 96 void __user *_upper;
+ 97 } _addr_bnd;
+ 98 } _sigfault;
The '_addr' field refers to violation address, and new '_addr_and'
field refers to the upper/lower bounds when a #BR is caused.
@@ -209,9 +216,10 @@ Adding new prctl commands
Two new prctl commands are added to enable and disable MPX bounds tables
management in kernel.
+::
-155 #define PR_MPX_ENABLE_MANAGEMENT 43
-156 #define PR_MPX_DISABLE_MANAGEMENT 44
+ 155 #define PR_MPX_ENABLE_MANAGEMENT 43
+ 156 #define PR_MPX_DISABLE_MANAGEMENT 44
Runtime library in userspace is responsible for allocation of bounds
directory. So kernel have to use XSAVE instruction to get the base
@@ -223,8 +231,8 @@ into struct mm_struct to be used in future during PR_MPX_ENABLE_MANAGEMENT
command execution.
-4. Special rules
-================
+Special rules
+=============
1) If userspace is requesting help from the kernel to do the management
of bounds tables, it may not create or modify entries in the bounds directory.
diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks.rst
index d1bfb0b95ee0..6b0bcf027ff1 100644
--- a/Documentation/x86/kernel-stacks
+++ b/Documentation/x86/kernel-stacks.rst
@@ -1,5 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Kernel Stacks
+=============
+
Kernel stacks on x86-64 bit
----------------------------
+===========================
Most of the text from Keith Owens, hacked by AK
@@ -57,7 +63,7 @@ IST events with the same code to be nested. However in most cases, the
stack size allocated to an IST assumes no nesting for the same code.
If that assumption is ever broken then the stacks will become corrupt.
-The currently assigned IST stacks are :-
+The currently assigned IST stacks are:
* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
@@ -103,7 +109,7 @@ For more details see the Intel IA32 or AMD AMD64 architecture manuals.
Printing backtraces on x86
---------------------------
+==========================
The question about the '?' preceding function names in an x86 stacktrace
keeps popping up, here's an indepth explanation. It helps if the reader
@@ -113,7 +119,7 @@ arch/x86/kernel/dumpstack.c.
Adapted from Ingo's mail, Message-ID: <20150521101614.GA10889@gmail.com>:
We always scan the full kernel stack for return addresses stored on
-the kernel stack(s) [*], from stack top to stack bottom, and print out
+the kernel stack(s) [1]_, from stack top to stack bottom, and print out
anything that 'looks like' a kernel text address.
If it fits into the frame pointer chain, we print it without a question
@@ -141,6 +147,6 @@ that look like kernel text addresses, so if debug information is wrong,
we still print out the real call chain as well - just with more question
marks than ideal.
-[*] For things like IRQ and IST stacks, we also scan those stacks, in
- the right order, and try to cross from one stack into another
- reconstructing the call chain. This works most of the time.
+.. [1] For things like IRQ and IST stacks, we also scan those stacks, in
+ the right order, and try to cross from one stack into another
+ reconstructing the call chain. This works most of the time.
diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
new file mode 100644
index 000000000000..5d4330be200f
--- /dev/null
+++ b/Documentation/x86/mds.rst
@@ -0,0 +1,193 @@
+Microarchitectural Data Sampling (MDS) mitigation
+=================================================
+
+.. _mds:
+
+Overview
+--------
+
+Microarchitectural Data Sampling (MDS) is a family of side channel attacks
+on internal buffers in Intel CPUs. The variants are:
+
+ - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
+ - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
+ - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
+ - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
+
+MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
+dependent load (store-to-load forwarding) as an optimization. The forward
+can also happen to a faulting or assisting load operation for a different
+memory address, which can be exploited under certain conditions. Store
+buffers are partitioned between Hyper-Threads so cross thread forwarding is
+not possible. But if a thread enters or exits a sleep state the store
+buffer is repartitioned which can expose data from one thread to the other.
+
+MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
+L1 miss situations and to hold data which is returned or sent in response
+to a memory or I/O operation. Fill buffers can forward data to a load
+operation and also write data to the cache. When the fill buffer is
+deallocated it can retain the stale data of the preceding operations which
+can then be forwarded to a faulting or assisting load operation, which can
+be exploited under certain conditions. Fill buffers are shared between
+Hyper-Threads so cross thread leakage is possible.
+
+MLPDS leaks Load Port Data. Load ports are used to perform load operations
+from memory or I/O. The received data is then forwarded to the register
+file or a subsequent operation. In some implementations the Load Port can
+contain stale data from a previous operation which can be forwarded to
+faulting or assisting loads under certain conditions, which again can be
+exploited eventually. Load ports are shared between Hyper-Threads so cross
+thread leakage is possible.
+
+MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
+memory that takes a fault or assist can leave data in a microarchitectural
+structure that may later be observed using one of the same methods used by
+MSBDS, MFBDS or MLPDS.
+
+Exposure assumptions
+--------------------
+
+It is assumed that attack code resides in user space or in a guest with one
+exception. The rationale behind this assumption is that the code construct
+needed for exploiting MDS requires:
+
+ - to control the load to trigger a fault or assist
+
+ - to have a disclosure gadget which exposes the speculatively accessed
+ data for consumption through a side channel.
+
+ - to control the pointer through which the disclosure gadget exposes the
+ data
+
+The existence of such a construct in the kernel cannot be excluded with
+100% certainty, but the complexity involved makes it extremly unlikely.
+
+There is one exception, which is untrusted BPF. The functionality of
+untrusted BPF is limited, but it needs to be thoroughly investigated
+whether it can be used to create such a construct.
+
+
+Mitigation strategy
+-------------------
+
+All variants have the same mitigation strategy at least for the single CPU
+thread case (SMT off): Force the CPU to clear the affected buffers.
+
+This is achieved by using the otherwise unused and obsolete VERW
+instruction in combination with a microcode update. The microcode clears
+the affected CPU buffers when the VERW instruction is executed.
+
+For virtualization there are two ways to achieve CPU buffer
+clearing. Either the modified VERW instruction or via the L1D Flush
+command. The latter is issued when L1TF mitigation is enabled so the extra
+VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
+be issued.
+
+If the VERW instruction with the supplied segment selector argument is
+executed on a CPU without the microcode update there is no side effect
+other than a small number of pointlessly wasted CPU cycles.
+
+This does not protect against cross Hyper-Thread attacks except for MSBDS
+which is only exploitable cross Hyper-thread when one of the Hyper-Threads
+enters a C-state.
+
+The kernel provides a function to invoke the buffer clearing:
+
+ mds_clear_cpu_buffers()
+
+The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
+(idle) transitions.
+
+As a special quirk to address virtualization scenarios where the host has
+the microcode updated, but the hypervisor does not (yet) expose the
+MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
+hope that it might actually clear the buffers. The state is reflected
+accordingly.
+
+According to current knowledge additional mitigations inside the kernel
+itself are not required because the necessary gadgets to expose the leaked
+data cannot be controlled in a way which allows exploitation from malicious
+user space or VM guests.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ======= ============================================================
+ off Mitigation is disabled. Either the CPU is not affected or
+ mds=off is supplied on the kernel command line
+
+ full Mitigation is enabled. CPU is affected and MD_CLEAR is
+ advertised in CPUID.
+
+ vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
+ advertised in CPUID. That is mainly for virtualization
+ scenarios where the host has the updated microcode but the
+ hypervisor does not expose MD_CLEAR in CPUID. It's a best
+ effort approach without guarantee.
+ ======= ============================================================
+
+If the CPU is affected and mds=off is not supplied on the kernel command
+line then the kernel selects the appropriate mitigation mode depending on
+the availability of the MD_CLEAR CPUID bit.
+
+Mitigation points
+-----------------
+
+1. Return to user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ When transitioning from kernel to user space the CPU buffers are flushed
+ on affected CPUs when the mitigation is not disabled on the kernel
+ command line. The migitation is enabled through the static key
+ mds_user_clear.
+
+ The mitigation is invoked in prepare_exit_to_usermode() which covers
+ all but one of the kernel to user space transitions. The exception
+ is when we return from a Non Maskable Interrupt (NMI), which is
+ handled directly in do_nmi().
+
+ (The reason that NMI is special is that prepare_exit_to_usermode() can
+ enable IRQs. In NMI context, NMIs are blocked, and we don't want to
+ enable IRQs with NMIs blocked.)
+
+
+2. C-State transition
+^^^^^^^^^^^^^^^^^^^^^
+
+ When a CPU goes idle and enters a C-State the CPU buffers need to be
+ cleared on affected CPUs when SMT is active. This addresses the
+ repartitioning of the store buffer when one of the Hyper-Threads enters
+ a C-State.
+
+ When SMT is inactive, i.e. either the CPU does not support it or all
+ sibling threads are offline CPU buffer clearing is not required.
+
+ The idle clearing is enabled on CPUs which are only affected by MSBDS
+ and not by any other MDS variant. The other MDS variants cannot be
+ protected against cross Hyper-Thread attacks because the Fill Buffer and
+ the Load Ports are shared. So on CPUs affected by other variants, the
+ idle clearing would be a window dressing exercise and is therefore not
+ activated.
+
+ The invocation is controlled by the static key mds_idle_clear which is
+ switched depending on the chosen mitigation mode and the SMT state of
+ the system.
+
+ The buffer clear is only invoked before entering the C-State to prevent
+ that stale data from the idling CPU from spilling to the Hyper-Thread
+ sibling after the store buffer got repartitioned and all entries are
+ available to the non idle sibling.
+
+ When coming out of idle the store buffer is partitioned again so each
+ sibling has half of it available. The back from idle CPU could be then
+ speculatively exposed to contents of the sibling. The buffers are
+ flushed either on exit to user space or on VMENTER so malicious code
+ in user space or the guest cannot speculatively access them.
+
+ The mitigation is hooked into all variants of halt()/mwait(), but does
+ not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
+ has been superseded by the intel_idle driver around 2010 and is
+ preferred on all affected CPUs which are expected to gain the MD_CLEAR
+ functionality in microcode. Aside of that the IO-Port mechanism is a
+ legacy interface which is only used on older systems which are either
+ not affected or do not receive microcode updates anymore.
diff --git a/Documentation/x86/microcode.txt b/Documentation/x86/microcode.rst
index 79fdb4a8148a..a320d37982ed 100644
--- a/Documentation/x86/microcode.txt
+++ b/Documentation/x86/microcode.rst
@@ -1,7 +1,11 @@
- The Linux Microcode Loader
+.. SPDX-License-Identifier: GPL-2.0
-Authors: Fenghua Yu <fenghua.yu@intel.com>
- Borislav Petkov <bp@suse.de>
+==========================
+The Linux Microcode Loader
+==========================
+
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+ - Borislav Petkov <bp@suse.de>
The kernel has a x86 microcode loading facility which is supposed to
provide microcode loading methods in the OS. Potential use cases are
@@ -10,8 +14,8 @@ and updating the microcode on long-running systems without rebooting.
The loader supports three loading methods:
-1. Early load microcode
-=======================
+Early load microcode
+====================
The kernel can update microcode very early during boot. Loading
microcode early can fix CPU issues before they are observed during
@@ -26,8 +30,10 @@ loader parses the combined initrd image during boot.
The microcode files in cpio name space are:
-on Intel: kernel/x86/microcode/GenuineIntel.bin
-on AMD : kernel/x86/microcode/AuthenticAMD.bin
+on Intel:
+ kernel/x86/microcode/GenuineIntel.bin
+on AMD :
+ kernel/x86/microcode/AuthenticAMD.bin
During BSP (BootStrapping Processor) boot (pre-SMP), the kernel
scans the microcode file in the initrd. If microcode matching the
@@ -42,8 +48,8 @@ Here's a crude example how to prepare an initrd with microcode (this is
normally done automatically by the distribution, when recreating the
initrd, so you don't really have to do it yourself. It is documented
here for future reference only).
+::
----
#!/bin/bash
if [ -z "$1" ]; then
@@ -76,15 +82,15 @@ here for future reference only).
cat ucode.cpio $INITRD.orig > $INITRD
rm -rf $TMPDIR
----
+
The system needs to have the microcode packages installed into
/lib/firmware or you need to fixup the paths above if yours are
somewhere else and/or you've downloaded them directly from the processor
vendor's site.
-2. Late loading
-===============
+Late loading
+============
There are two legacy user space interfaces to load microcode, either through
/dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file
@@ -94,9 +100,9 @@ The /dev/cpu/microcode method is deprecated because it needs a special
userspace tool for that.
The easier method is simply installing the microcode packages your distro
-supplies and running:
+supplies and running::
-# echo 1 > /sys/devices/system/cpu/microcode/reload
+ # echo 1 > /sys/devices/system/cpu/microcode/reload
as root.
@@ -104,29 +110,29 @@ The loading mechanism looks for microcode blobs in
/lib/firmware/{intel-ucode,amd-ucode}. The default distro installation
packages already put them there.
-3. Builtin microcode
-====================
+Builtin microcode
+=================
The loader supports also loading of a builtin microcode supplied through
the regular builtin firmware method CONFIG_EXTRA_FIRMWARE. Only 64-bit is
currently supported.
-Here's an example:
+Here's an example::
-CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
-CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
+ CONFIG_EXTRA_FIRMWARE="intel-ucode/06-3a-09 amd-ucode/microcode_amd_fam15h.bin"
+ CONFIG_EXTRA_FIRMWARE_DIR="/lib/firmware"
-This basically means, you have the following tree structure locally:
+This basically means, you have the following tree structure locally::
-/lib/firmware/
-|-- amd-ucode
-...
-| |-- microcode_amd_fam15h.bin
-...
-|-- intel-ucode
-...
-| |-- 06-3a-09
-...
+ /lib/firmware/
+ |-- amd-ucode
+ ...
+ | |-- microcode_amd_fam15h.bin
+ ...
+ |-- intel-ucode
+ ...
+ | |-- 06-3a-09
+ ...
so that the build system can find those files and integrate them into
the final kernel image. The early loader finds them and applies them.
diff --git a/Documentation/x86/mtrr.rst b/Documentation/x86/mtrr.rst
new file mode 100644
index 000000000000..c5b695d75349
--- /dev/null
+++ b/Documentation/x86/mtrr.rst
@@ -0,0 +1,354 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+MTRR (Memory Type Range Register) control
+=========================================
+
+:Authors: - Richard Gooch <rgooch@atnf.csiro.au> - 3 Jun 1999
+ - Luis R. Rodriguez <mcgrof@do-not-panic.com> - April 9, 2015
+
+
+Phasing out MTRR use
+====================
+
+MTRR use is replaced on modern x86 hardware with PAT. Direct MTRR use by
+drivers on Linux is now completely phased out, device drivers should use
+arch_phys_wc_add() in combination with ioremap_wc() to make MTRR effective on
+non-PAT systems while a no-op but equally effective on PAT enabled systems.
+
+Even if Linux does not use MTRRs directly, some x86 platform firmware may still
+set up MTRRs early before booting the OS. They do this as some platform
+firmware may still have implemented access to MTRRs which would be controlled
+and handled by the platform firmware directly. An example of platform use of
+MTRRs is through the use of SMI handlers, one case could be for fan control,
+the platform code would need uncachable access to some of its fan control
+registers. Such platform access does not need any Operating System MTRR code in
+place other than mtrr_type_lookup() to ensure any OS specific mapping requests
+are aligned with platform MTRR setup. If MTRRs are only set up by the platform
+firmware code though and the OS does not make any specific MTRR mapping
+requests mtrr_type_lookup() should always return MTRR_TYPE_INVALID.
+
+For details refer to :doc:`pat`.
+
+.. tip::
+ On Intel P6 family processors (Pentium Pro, Pentium II and later)
+ the Memory Type Range Registers (MTRRs) may be used to control
+ processor access to memory ranges. This is most useful when you have
+ a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+ allows bus write transfers to be combined into a larger transfer
+ before bursting over the PCI/AGP bus. This can increase performance
+ of image write operations 2.5 times or more.
+
+ The Cyrix 6x86, 6x86MX and M II processors have Address Range
+ Registers (ARRs) which provide a similar functionality to MTRRs. For
+ these, the ARRs are used to emulate the MTRRs.
+
+ The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+ MTRRs. These are supported. The AMD Athlon family provide 8 Intel
+ style MTRRs.
+
+ The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
+ are supported.
+
+ The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs.
+
+ The CONFIG_MTRR option creates a /proc/mtrr file which may be used
+ to manipulate your MTRRs. Typically the X server should use
+ this. This should have a reasonably generic interface so that
+ similar control registers on other processors can be easily
+ supported.
+
+There are two interfaces to /proc/mtrr: one is an ASCII interface
+which allows you to read and write. The other is an ioctl()
+interface. The ASCII interface is meant for administration. The
+ioctl() interface is meant for C programs (i.e. the X server). The
+interfaces are described below, with sample commands and C code.
+
+
+Reading MTRRs from the shell
+============================
+::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+
+Creating MTRRs from the C-shell::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr
+
+or if you use bash::
+
+ # echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr
+
+And the result thereof::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+ reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+ reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1
+
+This is for video RAM at base address 0xf8000000 and size 4 megabytes. To
+find out your base address, you need to look at the output of your X
+server, which tells you where the linear framebuffer address is. A
+typical line that you may get is::
+
+ (--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000
+
+Note that you should only use the value from the X server, as it may
+move the framebuffer base address, so the only value you can trust is
+that reported by the X server.
+
+To find out the size of your framebuffer (what, you don't actually
+know?), the following line will tell you::
+
+ (--) S3: videoram: 4096k
+
+That's 4 megabytes, which is 0x400000 bytes (in hexadecimal).
+A patch is being written for XFree86 which will make this automatic:
+in other words the X server will manipulate /proc/mtrr using the
+ioctl() interface, so users won't have to do anything. If you use a
+commercial X server, lobby your vendor to add support for MTRRs.
+
+
+Creating overlapping MTRRs
+==========================
+::
+
+ %echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr
+ %echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr
+
+And the results::
+
+ % cat /proc/mtrr
+ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
+ reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
+ reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
+
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
+excluded from the beginning of the region because it is used for
+registers.
+
+NOTE: You can only create type=uncachable region, if the first
+region that you created is type=write-combining.
+
+
+Removing MTRRs from the C-shel
+==============================
+::
+
+ % echo "disable=2" >! /proc/mtrr
+
+or using bash::
+
+ % echo "disable=2" >| /proc/mtrr
+
+
+Reading MTRRs from a C program using ioctl()'s
+==============================================
+::
+
+ /* mtrr-show.c
+
+ Source file for mtrr-show (example program to show MTRRs using ioctl()'s)
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This program will use an ioctl() on /proc/mtrr to show the current MTRR
+ settings. This is an alternative to reading /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main ()
+ {
+ int fd;
+ struct mtrr_gentry gentry;
+
+ if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (1);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (2);
+ }
+ for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0;
+ ++gentry.regnum)
+ {
+ if (gentry.size < 1)
+ {
+ fprintf (stderr, "Register: %u disabled\n", gentry.regnum);
+ continue;
+ }
+ fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n",
+ gentry.regnum, gentry.base, gentry.size,
+ mtrr_strings[gentry.type]);
+ }
+ if (errno == EINVAL) exit (0);
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (3);
+ } /* End Function main */
+
+
+Creating MTRRs from a C programme using ioctl()'s
+=================================================
+::
+
+ /* mtrr-add.c
+
+ Source file for mtrr-add (example programme to add an MTRRs using ioctl())
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+ */
+
+ /*
+ This programme will use an ioctl() on /proc/mtrr to add an entry. The first
+ available mtrr is used. This is an alternative to writing /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+ */
+ #include <stdio.h>
+ #include <string.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <errno.h>
+ #include <asm/mtrr.h>
+
+ #define TRUE 1
+ #define FALSE 0
+ #define ERRSTRING strerror (errno)
+
+ static char *mtrr_strings[MTRR_NUM_TYPES] =
+ {
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+ };
+
+ int main (int argc, char **argv)
+ {
+ int fd;
+ struct mtrr_sentry sentry;
+
+ if (argc != 4)
+ {
+ fprintf (stderr, "Usage:\tmtrr-add base size type\n");
+ exit (1);
+ }
+ sentry.base = strtoul (argv[1], NULL, 0);
+ sentry.size = strtoul (argv[2], NULL, 0);
+ for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type)
+ {
+ if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break;
+ }
+ if (sentry.type >= MTRR_NUM_TYPES)
+ {
+ fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]);
+ exit (2);
+ }
+ if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (3);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (4);
+ }
+ if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1)
+ {
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (5);
+ }
+ fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n");
+ sleep (5);
+ close (fd);
+ fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n",
+ stderr);
+ } /* End Function main */
diff --git a/Documentation/x86/mtrr.txt b/Documentation/x86/mtrr.txt
deleted file mode 100644
index dc3e703913ac..000000000000
--- a/Documentation/x86/mtrr.txt
+++ /dev/null
@@ -1,329 +0,0 @@
-MTRR (Memory Type Range Register) control
-
-Richard Gooch <rgooch@atnf.csiro.au> - 3 Jun 1999
-Luis R. Rodriguez <mcgrof@do-not-panic.com> - April 9, 2015
-
-===============================================================================
-Phasing out MTRR use
-
-MTRR use is replaced on modern x86 hardware with PAT. Direct MTRR use by
-drivers on Linux is now completely phased out, device drivers should use
-arch_phys_wc_add() in combination with ioremap_wc() to make MTRR effective on
-non-PAT systems while a no-op but equally effective on PAT enabled systems.
-
-Even if Linux does not use MTRRs directly, some x86 platform firmware may still
-set up MTRRs early before booting the OS. They do this as some platform
-firmware may still have implemented access to MTRRs which would be controlled
-and handled by the platform firmware directly. An example of platform use of
-MTRRs is through the use of SMI handlers, one case could be for fan control,
-the platform code would need uncachable access to some of its fan control
-registers. Such platform access does not need any Operating System MTRR code in
-place other than mtrr_type_lookup() to ensure any OS specific mapping requests
-are aligned with platform MTRR setup. If MTRRs are only set up by the platform
-firmware code though and the OS does not make any specific MTRR mapping
-requests mtrr_type_lookup() should always return MTRR_TYPE_INVALID.
-
-For details refer to Documentation/x86/pat.txt.
-
-===============================================================================
-
- On Intel P6 family processors (Pentium Pro, Pentium II and later)
- the Memory Type Range Registers (MTRRs) may be used to control
- processor access to memory ranges. This is most useful when you have
- a video (VGA) card on a PCI or AGP bus. Enabling write-combining
- allows bus write transfers to be combined into a larger transfer
- before bursting over the PCI/AGP bus. This can increase performance
- of image write operations 2.5 times or more.
-
- The Cyrix 6x86, 6x86MX and M II processors have Address Range
- Registers (ARRs) which provide a similar functionality to MTRRs. For
- these, the ARRs are used to emulate the MTRRs.
-
- The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
- MTRRs. These are supported. The AMD Athlon family provide 8 Intel
- style MTRRs.
-
- The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
- are supported.
-
- The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs.
-
- The CONFIG_MTRR option creates a /proc/mtrr file which may be used
- to manipulate your MTRRs. Typically the X server should use
- this. This should have a reasonably generic interface so that
- similar control registers on other processors can be easily
- supported.
-
-
-There are two interfaces to /proc/mtrr: one is an ASCII interface
-which allows you to read and write. The other is an ioctl()
-interface. The ASCII interface is meant for administration. The
-ioctl() interface is meant for C programs (i.e. the X server). The
-interfaces are described below, with sample commands and C code.
-
-===============================================================================
-Reading MTRRs from the shell:
-
-% cat /proc/mtrr
-reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
-reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
-===============================================================================
-Creating MTRRs from the C-shell:
-# echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr
-or if you use bash:
-# echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr
-
-And the result thereof:
-% cat /proc/mtrr
-reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
-reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
-reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1
-
-This is for video RAM at base address 0xf8000000 and size 4 megabytes. To
-find out your base address, you need to look at the output of your X
-server, which tells you where the linear framebuffer address is. A
-typical line that you may get is:
-
-(--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000
-
-Note that you should only use the value from the X server, as it may
-move the framebuffer base address, so the only value you can trust is
-that reported by the X server.
-
-To find out the size of your framebuffer (what, you don't actually
-know?), the following line will tell you:
-
-(--) S3: videoram: 4096k
-
-That's 4 megabytes, which is 0x400000 bytes (in hexadecimal).
-A patch is being written for XFree86 which will make this automatic:
-in other words the X server will manipulate /proc/mtrr using the
-ioctl() interface, so users won't have to do anything. If you use a
-commercial X server, lobby your vendor to add support for MTRRs.
-===============================================================================
-Creating overlapping MTRRs:
-
-%echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr
-%echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr
-
-And the results: cat /proc/mtrr
-reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
-reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
-reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
-
-Some cards (especially Voodoo Graphics boards) need this 4 kB area
-excluded from the beginning of the region because it is used for
-registers.
-
-NOTE: You can only create type=uncachable region, if the first
-region that you created is type=write-combining.
-===============================================================================
-Removing MTRRs from the C-shell:
-% echo "disable=2" >! /proc/mtrr
-or using bash:
-% echo "disable=2" >| /proc/mtrr
-===============================================================================
-Reading MTRRs from a C program using ioctl()'s:
-
-/* mtrr-show.c
-
- Source file for mtrr-show (example program to show MTRRs using ioctl()'s)
-
- Copyright (C) 1997-1998 Richard Gooch
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Richard Gooch may be reached by email at rgooch@atnf.csiro.au
- The postal address is:
- Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-*/
-
-/*
- This program will use an ioctl() on /proc/mtrr to show the current MTRR
- settings. This is an alternative to reading /proc/mtrr.
-
-
- Written by Richard Gooch 17-DEC-1997
-
- Last updated by Richard Gooch 2-MAY-1998
-
-
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <asm/mtrr.h>
-
-#define TRUE 1
-#define FALSE 0
-#define ERRSTRING strerror (errno)
-
-static char *mtrr_strings[MTRR_NUM_TYPES] =
-{
- "uncachable", /* 0 */
- "write-combining", /* 1 */
- "?", /* 2 */
- "?", /* 3 */
- "write-through", /* 4 */
- "write-protect", /* 5 */
- "write-back", /* 6 */
-};
-
-int main ()
-{
- int fd;
- struct mtrr_gentry gentry;
-
- if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 )
- {
- if (errno == ENOENT)
- {
- fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
- stderr);
- exit (1);
- }
- fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
- exit (2);
- }
- for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0;
- ++gentry.regnum)
- {
- if (gentry.size < 1)
- {
- fprintf (stderr, "Register: %u disabled\n", gentry.regnum);
- continue;
- }
- fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n",
- gentry.regnum, gentry.base, gentry.size,
- mtrr_strings[gentry.type]);
- }
- if (errno == EINVAL) exit (0);
- fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
- exit (3);
-} /* End Function main */
-===============================================================================
-Creating MTRRs from a C programme using ioctl()'s:
-
-/* mtrr-add.c
-
- Source file for mtrr-add (example programme to add an MTRRs using ioctl())
-
- Copyright (C) 1997-1998 Richard Gooch
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Richard Gooch may be reached by email at rgooch@atnf.csiro.au
- The postal address is:
- Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-*/
-
-/*
- This programme will use an ioctl() on /proc/mtrr to add an entry. The first
- available mtrr is used. This is an alternative to writing /proc/mtrr.
-
-
- Written by Richard Gooch 17-DEC-1997
-
- Last updated by Richard Gooch 2-MAY-1998
-
-
-*/
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <asm/mtrr.h>
-
-#define TRUE 1
-#define FALSE 0
-#define ERRSTRING strerror (errno)
-
-static char *mtrr_strings[MTRR_NUM_TYPES] =
-{
- "uncachable", /* 0 */
- "write-combining", /* 1 */
- "?", /* 2 */
- "?", /* 3 */
- "write-through", /* 4 */
- "write-protect", /* 5 */
- "write-back", /* 6 */
-};
-
-int main (int argc, char **argv)
-{
- int fd;
- struct mtrr_sentry sentry;
-
- if (argc != 4)
- {
- fprintf (stderr, "Usage:\tmtrr-add base size type\n");
- exit (1);
- }
- sentry.base = strtoul (argv[1], NULL, 0);
- sentry.size = strtoul (argv[2], NULL, 0);
- for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type)
- {
- if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break;
- }
- if (sentry.type >= MTRR_NUM_TYPES)
- {
- fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]);
- exit (2);
- }
- if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 )
- {
- if (errno == ENOENT)
- {
- fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
- stderr);
- exit (3);
- }
- fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
- exit (4);
- }
- if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1)
- {
- fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
- exit (5);
- }
- fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n");
- sleep (5);
- close (fd);
- fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n",
- stderr);
-} /* End Function main */
-===============================================================================
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.rst
index cd4b29be29af..d811576c1f3e 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.rst
@@ -1,8 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
ORC unwinder
============
Overview
---------
+========
The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the
@@ -23,12 +26,12 @@ correlate instruction addresses with their stack states at run time.
ORC vs frame pointers
----------------------
+=====================
With frame pointers enabled, GCC adds instrumentation code to every
function in the kernel. The kernel's .text size increases by about
3.2%, resulting in a broad kernel-wide slowdown. Measurements by Mel
-Gorman [1] have shown a slowdown of 5-10% for some workloads.
+Gorman [1]_ have shown a slowdown of 5-10% for some workloads.
In contrast, the ORC unwinder has no effect on text size or runtime
performance, because the debuginfo is out of band. So if you disable
@@ -55,7 +58,7 @@ depending on the kernel config.
ORC vs DWARF
-------------
+============
ORC debuginfo's advantage over DWARF itself is that it's much simpler.
It gets rid of the complex DWARF CFI state machine and also gets rid of
@@ -65,7 +68,7 @@ mission critical oops code.
The simpler debuginfo format also enables the unwinder to be much faster
than DWARF, which is important for perf and lockdep. In a basic
-performance test by Jiri Slaby [2], the ORC unwinder was about 20x
+performance test by Jiri Slaby [2]_, the ORC unwinder was about 20x
faster than an out-of-tree DWARF unwinder. (Note: That measurement was
taken before some performance tweaks were added, which doubled
performance, so the speedup over DWARF may be closer to 40x.)
@@ -85,7 +88,7 @@ still be able to control the format, e.g. no complex state machines.
ORC unwind table generation
----------------------------
+===========================
The ORC data is generated by objtool. With the existing compile-time
stack metadata validation feature, objtool already follows all code
@@ -133,7 +136,7 @@ objtool follows GCC code quite well.
Unwinder implementation details
--------------------------------
+===============================
Objtool generates the ORC data by integrating with the compile-time
stack metadata validation feature, which is described in detail in
@@ -154,7 +157,7 @@ subset of the table needs to be searched.
Etymology
----------
+=========
Orcs, fearsome creatures of medieval folklore, are the Dwarves' natural
enemies. Similarly, the ORC unwinder was created in opposition to the
@@ -162,7 +165,7 @@ complexity and slowness of DWARF.
"Although Orcs rarely consider multiple solutions to a problem, they do
excel at getting things done because they are creatures of action, not
-thought." [3] Similarly, unlike the esoteric DWARF unwinder, the
+thought." [3]_ Similarly, unlike the esoteric DWARF unwinder, the
veracious ORC unwinder wastes no time or siloconic effort decoding
variable-length zero-extended unsigned-integer byte-coded
state-machine-based debug information entries.
@@ -174,6 +177,6 @@ brutal, unyielding efficiency.
ORC stands for Oops Rewind Capability.
-[1] https://lkml.kernel.org/r/20170602104048.jkkzssljsompjdwy@suse.de
-[2] https://lkml.kernel.org/r/d2ca5435-6386-29b8-db87-7f227c2b713a@suse.cz
-[3] http://dustin.wikidot.com/half-orcs-and-orcs
+.. [1] https://lkml.kernel.org/r/20170602104048.jkkzssljsompjdwy@suse.de
+.. [2] https://lkml.kernel.org/r/d2ca5435-6386-29b8-db87-7f227c2b713a@suse.cz
+.. [3] http://dustin.wikidot.com/half-orcs-and-orcs
diff --git a/Documentation/x86/pat.rst b/Documentation/x86/pat.rst
new file mode 100644
index 000000000000..9a298fd97d74
--- /dev/null
+++ b/Documentation/x86/pat.rst
@@ -0,0 +1,242 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+PAT (Page Attribute Table)
+==========================
+
+x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
+page level granularity. PAT is complementary to the MTRR settings which allows
+for setting of memory types over physical address ranges. However, PAT is
+more flexible than MTRR due to its capability to set attributes at page level
+and also due to the fact that there are no hardware limitations on number of
+such attribute settings allowed. Added flexibility comes with guidelines for
+not having memory type aliasing for the same physical memory with multiple
+virtual addresses.
+
+PAT allows for different types of memory attributes. The most commonly used
+ones that will be supported at this time are:
+
+=== ==============
+WB Write-back
+UC Uncached
+WC Write-combined
+WT Write-through
+UC- Uncached Minus
+=== ==============
+
+
+PAT APIs
+========
+
+There are many different APIs in the kernel that allows setting of memory
+attributes at the page level. In order to avoid aliasing, these interfaces
+should be used thoughtfully. Below is a table of interfaces available,
+their intended usage and their memory attribute relationships. Internally,
+these APIs use a reserve_memtype()/free_memtype() interface on the physical
+address range to avoid any aliasing.
+
++------------------------+----------+--------------+------------------+
+| API | RAM | ACPI,... | Reserved/Holes |
++------------------------+----------+--------------+------------------+
+| ioremap | -- | UC- | UC- |
++------------------------+----------+--------------+------------------+
+| ioremap_cache | -- | WB | WB |
++------------------------+----------+--------------+------------------+
+| ioremap_uc | -- | UC | UC |
++------------------------+----------+--------------+------------------+
+| ioremap_nocache | -- | UC- | UC- |
++------------------------+----------+--------------+------------------+
+| ioremap_wc | -- | -- | WC |
++------------------------+----------+--------------+------------------+
+| ioremap_wt | -- | -- | WT |
++------------------------+----------+--------------+------------------+
+| set_memory_uc, | UC- | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| set_memory_wc, | WC | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| set_memory_wt, | WT | -- | -- |
+| set_memory_wb | | | |
++------------------------+----------+--------------+------------------+
+| pci sysfs resource | -- | -- | UC- |
++------------------------+----------+--------------+------------------+
+| pci sysfs resource_wc | -- | -- | WC |
+| is IORESOURCE_PREFETCH | | | |
++------------------------+----------+--------------+------------------+
+| pci proc | -- | -- | UC- |
+| !PCIIOC_WRITE_COMBINE | | | |
++------------------------+----------+--------------+------------------+
+| pci proc | -- | -- | WC |
+| PCIIOC_WRITE_COMBINE | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+| read-write | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | UC- | UC- |
+| mmap SYNC flag | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+| mmap !SYNC flag | | | |
+| and | |(from existing| (from existing |
+| any alias to this area | |alias) | alias) |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | WB | WB |
+| mmap !SYNC flag | | | |
+| no alias to this area | | | |
+| and | | | |
+| MTRR says WB | | | |
++------------------------+----------+--------------+------------------+
+| /dev/mem | -- | -- | UC- |
+| mmap !SYNC flag | | | |
+| no alias to this area | | | |
+| and | | | |
+| MTRR says !WB | | | |
++------------------------+----------+--------------+------------------+
+
+
+Advanced APIs for drivers
+=========================
+
+A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
+vmf_insert_pfn.
+
+Drivers wanting to export some pages to userspace do it by using mmap
+interface and a combination of:
+
+ 1) pgprot_noncached()
+ 2) io_remap_pfn_range() or remap_pfn_range() or vmf_insert_pfn()
+
+With PAT support, a new API pgprot_writecombine is being added. So, drivers can
+continue to use the above sequence, with either pgprot_noncached() or
+pgprot_writecombine() in step 1, followed by step 2.
+
+In addition, step 2 internally tracks the region as UC or WC in memtype
+list in order to ensure no conflicting mapping.
+
+Note that this set of APIs only works with IO (non RAM) regions. If driver
+wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
+as step 0 above and also track the usage of those pages and use set_memory_wb()
+before the page is freed to free pool.
+
+MTRR effects on PAT / non-PAT systems
+=====================================
+
+The following table provides the effects of using write-combining MTRRs when
+using ioremap*() calls on x86 for both non-PAT and PAT systems. Ideally
+mtrr_add() usage will be phased out in favor of arch_phys_wc_add() which will
+be a no-op on PAT enabled systems. The region over which a arch_phys_wc_add()
+is made, should already have been ioremapped with WC attributes or PAT entries,
+this can be done by using ioremap_wc() / set_memory_wc(). Devices which
+combine areas of IO memory desired to remain uncacheable with areas where
+write-combining is desirable should consider use of ioremap_uc() followed by
+set_memory_wc() to white-list effective write-combined areas. Such use is
+nevertheless discouraged as the effective memory type is considered
+implementation defined, yet this strategy can be used as last resort on devices
+with size-constrained regions where otherwise MTRR write-combining would
+otherwise not be effective.
+::
+
+ ==== ======= === ========================= =====================
+ MTRR Non-PAT PAT Linux ioremap value Effective memory type
+ ==== ======= === ========================= =====================
+ PAT Non-PAT | PAT
+ |PCD |
+ ||PWT |
+ ||| |
+ WC 000 WB _PAGE_CACHE_MODE_WB WC | WC
+ WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC
+ WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | UC
+ WC 011 UC _PAGE_CACHE_MODE_UC UC | UC
+ ==== ======= === ========================= =====================
+
+ (*) denotes implementation defined and is discouraged
+
+.. note:: -- in the above table mean "Not suggested usage for the API". Some
+ of the --'s are strictly enforced by the kernel. Some others are not really
+ enforced today, but may be enforced in future.
+
+For ioremap and pci access through /sys or /proc - The actual type returned
+can be more restrictive, in case of any existing aliasing for that address.
+For example: If there is an existing uncached mapping, a new ioremap_wc can
+return uncached mapping in place of write-combine requested.
+
+set_memory_[uc|wc|wt] and set_memory_wb should be used in pairs, where driver
+will first make a region uc, wc or wt and switch it back to wb after use.
+
+Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
+interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
+
+Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
+types.
+
+Drivers should use set_memory_[uc|wc|wt] to set access type for RAM ranges.
+
+
+PAT debugging
+=============
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by::
+
+ # mount -t debugfs debugfs /sys/kernel/debug
+ # cat /sys/kernel/debug/x86/pat_memtype_list
+ PAT memtype list:
+ uncached-minus @ 0x7fadf000-0x7fae0000
+ uncached-minus @ 0x7fb19000-0x7fb1a000
+ uncached-minus @ 0x7fb1a000-0x7fb1b000
+ uncached-minus @ 0x7fb1b000-0x7fb1c000
+ uncached-minus @ 0x7fb1c000-0x7fb1d000
+ uncached-minus @ 0x7fb1d000-0x7fb1e000
+ uncached-minus @ 0x7fb1e000-0x7fb25000
+ uncached-minus @ 0x7fb25000-0x7fb26000
+ uncached-minus @ 0x7fb26000-0x7fb27000
+ uncached-minus @ 0x7fb27000-0x7fb28000
+ uncached-minus @ 0x7fb28000-0x7fb2e000
+ uncached-minus @ 0x7fb2e000-0x7fb2f000
+ uncached-minus @ 0x7fb2f000-0x7fb30000
+ uncached-minus @ 0x7fb31000-0x7fb32000
+ uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+
+PAT Initialization
+==================
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ ==== ===== ========================== ========= =======
+ MTRR PAT Call Sequence PAT State PAT MSR
+ ==== ===== ========================== ========= =======
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+ ==== ===== ========================== ========= =======
+
+ Legend
+
+ ========= =======================================
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+ ========= =======================================
+
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
deleted file mode 100644
index 481d8d8536ac..000000000000
--- a/Documentation/x86/pat.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-
-PAT (Page Attribute Table)
-
-x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
-page level granularity. PAT is complementary to the MTRR settings which allows
-for setting of memory types over physical address ranges. However, PAT is
-more flexible than MTRR due to its capability to set attributes at page level
-and also due to the fact that there are no hardware limitations on number of
-such attribute settings allowed. Added flexibility comes with guidelines for
-not having memory type aliasing for the same physical memory with multiple
-virtual addresses.
-
-PAT allows for different types of memory attributes. The most commonly used
-ones that will be supported at this time are Write-back, Uncached,
-Write-combined, Write-through and Uncached Minus.
-
-
-PAT APIs
---------
-
-There are many different APIs in the kernel that allows setting of memory
-attributes at the page level. In order to avoid aliasing, these interfaces
-should be used thoughtfully. Below is a table of interfaces available,
-their intended usage and their memory attribute relationships. Internally,
-these APIs use a reserve_memtype()/free_memtype() interface on the physical
-address range to avoid any aliasing.
-
-
--------------------------------------------------------------------
-API | RAM | ACPI,... | Reserved/Holes |
------------------------|----------|------------|------------------|
- | | | |
-ioremap | -- | UC- | UC- |
- | | | |
-ioremap_cache | -- | WB | WB |
- | | | |
-ioremap_uc | -- | UC | UC |
- | | | |
-ioremap_nocache | -- | UC- | UC- |
- | | | |
-ioremap_wc | -- | -- | WC |
- | | | |
-ioremap_wt | -- | -- | WT |
- | | | |
-set_memory_uc | UC- | -- | -- |
- set_memory_wb | | | |
- | | | |
-set_memory_wc | WC | -- | -- |
- set_memory_wb | | | |
- | | | |
-set_memory_wt | WT | -- | -- |
- set_memory_wb | | | |
- | | | |
-pci sysfs resource | -- | -- | UC- |
- | | | |
-pci sysfs resource_wc | -- | -- | WC |
- is IORESOURCE_PREFETCH| | | |
- | | | |
-pci proc | -- | -- | UC- |
- !PCIIOC_WRITE_COMBINE | | | |
- | | | |
-pci proc | -- | -- | WC |
- PCIIOC_WRITE_COMBINE | | | |
- | | | |
-/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
- read-write | | | |
- | | | |
-/dev/mem | -- | UC- | UC- |
- mmap SYNC flag | | | |
- | | | |
-/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
- mmap !SYNC flag | |(from exist-| (from exist- |
- and | | ing alias)| ing alias) |
- any alias to this area| | | |
- | | | |
-/dev/mem | -- | WB | WB |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says WB | | | |
- | | | |
-/dev/mem | -- | -- | UC- |
- mmap !SYNC flag | | | |
- no alias to this area | | | |
- and | | | |
- MTRR says !WB | | | |
- | | | |
--------------------------------------------------------------------
-
-Advanced APIs for drivers
--------------------------
-A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
-vmf_insert_pfn
-
-Drivers wanting to export some pages to userspace do it by using mmap
-interface and a combination of
-1) pgprot_noncached()
-2) io_remap_pfn_range() or remap_pfn_range() or vmf_insert_pfn()
-
-With PAT support, a new API pgprot_writecombine is being added. So, drivers can
-continue to use the above sequence, with either pgprot_noncached() or
-pgprot_writecombine() in step 1, followed by step 2.
-
-In addition, step 2 internally tracks the region as UC or WC in memtype
-list in order to ensure no conflicting mapping.
-
-Note that this set of APIs only works with IO (non RAM) regions. If driver
-wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
-as step 0 above and also track the usage of those pages and use set_memory_wb()
-before the page is freed to free pool.
-
-MTRR effects on PAT / non-PAT systems
--------------------------------------
-
-The following table provides the effects of using write-combining MTRRs when
-using ioremap*() calls on x86 for both non-PAT and PAT systems. Ideally
-mtrr_add() usage will be phased out in favor of arch_phys_wc_add() which will
-be a no-op on PAT enabled systems. The region over which a arch_phys_wc_add()
-is made, should already have been ioremapped with WC attributes or PAT entries,
-this can be done by using ioremap_wc() / set_memory_wc(). Devices which
-combine areas of IO memory desired to remain uncacheable with areas where
-write-combining is desirable should consider use of ioremap_uc() followed by
-set_memory_wc() to white-list effective write-combined areas. Such use is
-nevertheless discouraged as the effective memory type is considered
-implementation defined, yet this strategy can be used as last resort on devices
-with size-constrained regions where otherwise MTRR write-combining would
-otherwise not be effective.
-
-----------------------------------------------------------------------
-MTRR Non-PAT PAT Linux ioremap value Effective memory type
-----------------------------------------------------------------------
- Non-PAT | PAT
- PAT
- |PCD
- ||PWT
- |||
-WC 000 WB _PAGE_CACHE_MODE_WB WC | WC
-WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC
-WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | UC
-WC 011 UC _PAGE_CACHE_MODE_UC UC | UC
-----------------------------------------------------------------------
-
-(*) denotes implementation defined and is discouraged
-
-Notes:
-
--- in the above table mean "Not suggested usage for the API". Some of the --'s
-are strictly enforced by the kernel. Some others are not really enforced
-today, but may be enforced in future.
-
-For ioremap and pci access through /sys or /proc - The actual type returned
-can be more restrictive, in case of any existing aliasing for that address.
-For example: If there is an existing uncached mapping, a new ioremap_wc can
-return uncached mapping in place of write-combine requested.
-
-set_memory_[uc|wc|wt] and set_memory_wb should be used in pairs, where driver
-will first make a region uc, wc or wt and switch it back to wb after use.
-
-Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
-interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
-
-Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
-types.
-
-Drivers should use set_memory_[uc|wc|wt] to set access type for RAM ranges.
-
-
-PAT debugging
--------------
-
-With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
-
-# mount -t debugfs debugfs /sys/kernel/debug
-# cat /sys/kernel/debug/x86/pat_memtype_list
-PAT memtype list:
-uncached-minus @ 0x7fadf000-0x7fae0000
-uncached-minus @ 0x7fb19000-0x7fb1a000
-uncached-minus @ 0x7fb1a000-0x7fb1b000
-uncached-minus @ 0x7fb1b000-0x7fb1c000
-uncached-minus @ 0x7fb1c000-0x7fb1d000
-uncached-minus @ 0x7fb1d000-0x7fb1e000
-uncached-minus @ 0x7fb1e000-0x7fb25000
-uncached-minus @ 0x7fb25000-0x7fb26000
-uncached-minus @ 0x7fb26000-0x7fb27000
-uncached-minus @ 0x7fb27000-0x7fb28000
-uncached-minus @ 0x7fb28000-0x7fb2e000
-uncached-minus @ 0x7fb2e000-0x7fb2f000
-uncached-minus @ 0x7fb2f000-0x7fb30000
-uncached-minus @ 0x7fb31000-0x7fb32000
-uncached-minus @ 0x80000000-0x90000000
-
-This list shows physical address ranges and various PAT settings used to
-access those physical address ranges.
-
-Another, more verbose way of getting PAT related debug messages is with
-"debugpat" boot parameter. With this parameter, various debug messages are
-printed to dmesg log.
-
-PAT Initialization
-------------------
-
-The following table describes how PAT is initialized under various
-configurations. The PAT MSR must be updated by Linux in order to support WC
-and WT attributes. Otherwise, the PAT MSR has the value programmed in it
-by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
-
- MTRR PAT Call Sequence PAT State PAT MSR
- =========================================================
- E E MTRR -> PAT init Enabled OS
- E D MTRR -> PAT init Disabled -
- D E MTRR -> PAT disable Disabled BIOS
- D D MTRR -> PAT disable Disabled -
- - np/E PAT -> PAT disable Disabled BIOS
- - np/D PAT -> PAT disable Disabled -
- E !P/E MTRR -> PAT init Disabled BIOS
- D !P/E MTRR -> PAT disable Disabled BIOS
- !M !P/E MTRR stub -> PAT disable Disabled BIOS
-
- Legend
- ------------------------------------------------
- E Feature enabled in CPU
- D Feature disabled/unsupported in CPU
- np "nopat" boot option specified
- !P CONFIG_X86_PAT option unset
- !M CONFIG_MTRR option unset
- Enabled PAT state set to enabled
- Disabled PAT state set to disabled
- OS PAT initializes PAT MSR with OS setting
- BIOS PAT keeps PAT MSR with BIOS setting
-
diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.rst
index ecb0d2dadfb7..49d9833af871 100644
--- a/Documentation/x86/protection-keys.txt
+++ b/Documentation/x86/protection-keys.rst
@@ -1,3 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Memory Protection Keys
+======================
+
Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
which is found on Intel's Skylake "Scalable Processor" Server CPUs.
It will be avalable in future non-server parts.
@@ -23,9 +29,10 @@ even though there is theoretically space in the PAE PTEs. These
permissions are enforced on data access only and have no effect on
instruction fetches.
-=========================== Syscalls ===========================
+Syscalls
+========
-There are 3 system calls which directly interact with pkeys:
+There are 3 system calls which directly interact with pkeys::
int pkey_alloc(unsigned long flags, unsigned long init_access_rights)
int pkey_free(int pkey);
@@ -37,6 +44,7 @@ pkey_alloc(). An application calls the WRPKRU instruction
directly in order to change access permissions to memory covered
with a key. In this example WRPKRU is wrapped by a C function
called pkey_set().
+::
int real_prot = PROT_READ|PROT_WRITE;
pkey = pkey_alloc(0, PKEY_DISABLE_WRITE);
@@ -45,43 +53,44 @@ called pkey_set().
... application runs here
Now, if the application needs to update the data at 'ptr', it can
-gain access, do the update, then remove its write access:
+gain access, do the update, then remove its write access::
pkey_set(pkey, 0); // clear PKEY_DISABLE_WRITE
*ptr = foo; // assign something
pkey_set(pkey, PKEY_DISABLE_WRITE); // set PKEY_DISABLE_WRITE again
Now when it frees the memory, it will also free the pkey since it
-is no longer in use:
+is no longer in use::
munmap(ptr, PAGE_SIZE);
pkey_free(pkey);
-(Note: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
- An example implementation can be found in
- tools/testing/selftests/x86/protection_keys.c)
+.. note:: pkey_set() is a wrapper for the RDPKRU and WRPKRU instructions.
+ An example implementation can be found in
+ tools/testing/selftests/x86/protection_keys.c.
-=========================== Behavior ===========================
+Behavior
+========
The kernel attempts to make protection keys consistent with the
-behavior of a plain mprotect(). For instance if you do this:
+behavior of a plain mprotect(). For instance if you do this::
mprotect(ptr, size, PROT_NONE);
something(ptr);
-you can expect the same effects with protection keys when doing this:
+you can expect the same effects with protection keys when doing this::
pkey = pkey_alloc(0, PKEY_DISABLE_WRITE | PKEY_DISABLE_READ);
pkey_mprotect(ptr, size, PROT_READ|PROT_WRITE, pkey);
something(ptr);
That should be true whether something() is a direct access to 'ptr'
-like:
+like::
*ptr = foo;
or when the kernel does the access on the application's behalf like
-with a read():
+with a read()::
read(fd, ptr, 1);
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.rst
index 5cd58439ad2d..4b858a9bad8d 100644
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.rst
@@ -1,9 +1,15 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Page Table Isolation (PTI)
+==========================
+
Overview
========
-Page Table Isolation (pti, previously known as KAISER[1]) is a
+Page Table Isolation (pti, previously known as KAISER [1]_) is a
countermeasure against attacks on the shared user/kernel address
-space such as the "Meltdown" approach[2].
+space such as the "Meltdown" approach [2]_.
To mitigate this class of attacks, we create an independent set of
page tables for use only when running userspace applications. When
@@ -60,6 +66,7 @@ Protection against side-channel attacks is important. But,
this protection comes at a cost:
1. Increased Memory Use
+
a. Each process now needs an order-1 PGD instead of order-0.
(Consumes an additional 4k per process).
b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
@@ -68,6 +75,7 @@ this protection comes at a cost:
is decompressed, but no space in the kernel image itself.
2. Runtime Cost
+
a. CR3 manipulation to switch between the page table copies
must be done at interrupt, syscall, and exception entry
and exit (it can be skipped when the kernel is interrupted,
@@ -142,6 +150,7 @@ ideally doing all of these in parallel:
interrupted, including nested NMIs. Using "-c" boosts the rate of
NMIs, and using two -c with separate counters encourages nested NMIs
and less deterministic behavior.
+ ::
while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
@@ -182,5 +191,5 @@ that are worth noting here.
tended to be TLB invalidation issues. Usually invalidating
the wrong PCID, or otherwise missing an invalidation.
-1. https://gruss.cc/files/kaiser.pdf
-2. https://meltdownattack.com/meltdown.pdf
+.. [1] https://gruss.cc/files/kaiser.pdf
+.. [2] https://meltdownattack.com/meltdown.pdf
diff --git a/Documentation/x86/resctrl_ui.txt b/Documentation/x86/resctrl_ui.rst
index c1f95b59e14d..225cfd4daaee 100644
--- a/Documentation/x86/resctrl_ui.txt
+++ b/Documentation/x86/resctrl_ui.rst
@@ -1,33 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================
User Interface for Resource Control feature
+===========================================
-Intel refers to this feature as Intel Resource Director Technology(Intel(R) RDT).
-AMD refers to this feature as AMD Platform Quality of Service(AMD QoS).
+:Copyright: |copy| 2016 Intel Corporation
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+ - Tony Luck <tony.luck@intel.com>
+ - Vikas Shivappa <vikas.shivappa@intel.com>
-Copyright (C) 2016 Intel Corporation
-Fenghua Yu <fenghua.yu@intel.com>
-Tony Luck <tony.luck@intel.com>
-Vikas Shivappa <vikas.shivappa@intel.com>
+Intel refers to this feature as Intel Resource Director Technology(Intel(R) RDT).
+AMD refers to this feature as AMD Platform Quality of Service(AMD QoS).
This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo
flag bits:
-RDT (Resource Director Technology) Allocation - "rdt_a"
-CAT (Cache Allocation Technology) - "cat_l3", "cat_l2"
-CDP (Code and Data Prioritization ) - "cdp_l3", "cdp_l2"
-CQM (Cache QoS Monitoring) - "cqm_llc", "cqm_occup_llc"
-MBM (Memory Bandwidth Monitoring) - "cqm_mbm_total", "cqm_mbm_local"
-MBA (Memory Bandwidth Allocation) - "mba"
-To use the feature mount the file system:
+============================================= ================================
+RDT (Resource Director Technology) Allocation "rdt_a"
+CAT (Cache Allocation Technology) "cat_l3", "cat_l2"
+CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2"
+CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc"
+MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local"
+MBA (Memory Bandwidth Allocation) "mba"
+============================================= ================================
+
+To use the feature mount the file system::
# mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps]] /sys/fs/resctrl
mount options are:
-"cdp": Enable code/data prioritization in L3 cache allocations.
-"cdpl2": Enable code/data prioritization in L2 cache allocations.
-"mba_MBps": Enable the MBA Software Controller(mba_sc) to specify MBA
- bandwidth in MBps
+"cdp":
+ Enable code/data prioritization in L3 cache allocations.
+"cdpl2":
+ Enable code/data prioritization in L2 cache allocations.
+"mba_MBps":
+ Enable the MBA Software Controller(mba_sc) to specify MBA
+ bandwidth in MBps
L2 and L3 CDP are controlled seperately.
@@ -44,7 +55,7 @@ For more details on the behavior of the interface during monitoring
and allocation, see the "Resource alloc and monitor groups" section.
Info directory
---------------
+==============
The 'info' directory contains information about the enabled
resources. Each resource has its own subdirectory. The subdirectory
@@ -56,77 +67,93 @@ allocation:
Cache resource(L3/L2) subdirectory contains the following files
related to allocation:
-"num_closids": The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
-
-"cbm_mask": The bitmask which is valid for this resource.
- This mask is equivalent to 100%.
-
-"min_cbm_bits": The minimum number of consecutive bits which
- must be set when writing a mask.
-
-"shareable_bits": Bitmask of shareable resource with other executing
- entities (e.g. I/O). User can use this when
- setting up exclusive cache partitions. Note that
- some platforms support devices that have their
- own settings for cache use which can over-ride
- these bits.
-"bit_usage": Annotated capacity bitmasks showing how all
- instances of the resource are used. The legend is:
- "0" - Corresponding region is unused. When the system's
+"num_closids":
+ The number of CLOSIDs which are valid for this
+ resource. The kernel uses the smallest number of
+ CLOSIDs of all enabled resources as limit.
+"cbm_mask":
+ The bitmask which is valid for this resource.
+ This mask is equivalent to 100%.
+"min_cbm_bits":
+ The minimum number of consecutive bits which
+ must be set when writing a mask.
+
+"shareable_bits":
+ Bitmask of shareable resource with other executing
+ entities (e.g. I/O). User can use this when
+ setting up exclusive cache partitions. Note that
+ some platforms support devices that have their
+ own settings for cache use which can over-ride
+ these bits.
+"bit_usage":
+ Annotated capacity bitmasks showing how all
+ instances of the resource are used. The legend is:
+
+ "0":
+ Corresponding region is unused. When the system's
resources have been allocated and a "0" is found
in "bit_usage" it is a sign that resources are
wasted.
- "H" - Corresponding region is used by hardware only
+
+ "H":
+ Corresponding region is used by hardware only
but available for software use. If a resource
has bits set in "shareable_bits" but not all
of these bits appear in the resource groups'
schematas then the bits appearing in
"shareable_bits" but no resource group will
be marked as "H".
- "X" - Corresponding region is available for sharing and
+ "X":
+ Corresponding region is available for sharing and
used by hardware and software. These are the
bits that appear in "shareable_bits" as
well as a resource group's allocation.
- "S" - Corresponding region is used by software
+ "S":
+ Corresponding region is used by software
and available for sharing.
- "E" - Corresponding region is used exclusively by
+ "E":
+ Corresponding region is used exclusively by
one resource group. No sharing allowed.
- "P" - Corresponding region is pseudo-locked. No
+ "P":
+ Corresponding region is pseudo-locked. No
sharing allowed.
Memory bandwitdh(MB) subdirectory contains the following files
with respect to allocation:
-"min_bandwidth": The minimum memory bandwidth percentage which
- user can request.
+"min_bandwidth":
+ The minimum memory bandwidth percentage which
+ user can request.
-"bandwidth_gran": The granularity in which the memory bandwidth
- percentage is allocated. The allocated
- b/w percentage is rounded off to the next
- control step available on the hardware. The
- available bandwidth control steps are:
- min_bandwidth + N * bandwidth_gran.
+"bandwidth_gran":
+ The granularity in which the memory bandwidth
+ percentage is allocated. The allocated
+ b/w percentage is rounded off to the next
+ control step available on the hardware. The
+ available bandwidth control steps are:
+ min_bandwidth + N * bandwidth_gran.
-"delay_linear": Indicates if the delay scale is linear or
- non-linear. This field is purely informational
- only.
+"delay_linear":
+ Indicates if the delay scale is linear or
+ non-linear. This field is purely informational
+ only.
If RDT monitoring is available there will be an "L3_MON" directory
with the following files:
-"num_rmids": The number of RMIDs available. This is the
- upper bound for how many "CTRL_MON" + "MON"
- groups can be created.
+"num_rmids":
+ The number of RMIDs available. This is the
+ upper bound for how many "CTRL_MON" + "MON"
+ groups can be created.
-"mon_features": Lists the monitoring events if
- monitoring is enabled for the resource.
+"mon_features":
+ Lists the monitoring events if
+ monitoring is enabled for the resource.
"max_threshold_occupancy":
- Read/write file provides the largest value (in
- bytes) at which a previously used LLC_occupancy
- counter can be considered for re-use.
+ Read/write file provides the largest value (in
+ bytes) at which a previously used LLC_occupancy
+ counter can be considered for re-use.
Finally, in the top level of the "info" directory there is a file
named "last_cmd_status". This is reset with every "command" issued
@@ -134,6 +161,7 @@ via the file system (making new directories or writing to any of the
control files). If the command was successful, it will read as "ok".
If the command failed, it will provide more information that can be
conveyed in the error returns from file operations. E.g.
+::
# echo L3:0=f7 > schemata
bash: echo: write error: Invalid argument
@@ -141,7 +169,7 @@ conveyed in the error returns from file operations. E.g.
mask f7 has non-consecutive 1-bits
Resource alloc and monitor groups
----------------------------------
+=================================
Resource groups are represented as directories in the resctrl file
system. The default group is the root directory which, immediately
@@ -226,6 +254,7 @@ When monitoring is enabled all MON groups will also contain:
Resource allocation rules
-------------------------
+
When a task is running the following rules define which resources are
available to it:
@@ -252,7 +281,7 @@ Resource monitoring rules
Notes on cache occupancy monitoring and control
------------------------------------------------
+===============================================
When moving a task from one group to another you should remember that
this only affects *new* cache allocations by the task. E.g. you may have
a task in a monitor group showing 3 MB of cache occupancy. If you move
@@ -321,7 +350,7 @@ of the capacity of the cache. You could partition the cache into four
equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
Memory bandwidth Allocation and monitoring
-------------------------------------------
+==========================================
For Memory bandwidth resource, by default the user controls the resource
by indicating the percentage of total memory bandwidth.
@@ -369,7 +398,7 @@ In order to mitigate this and make the interface more user friendly,
resctrl added support for specifying the bandwidth in MBps as well. The
kernel underneath would use a software feedback mechanism or a "Software
Controller(mba_sc)" which reads the actual bandwidth using MBM counters
-and adjust the memowy bandwidth percentages to ensure
+and adjust the memowy bandwidth percentages to ensure::
"actual bandwidth < user specified bandwidth".
@@ -380,14 +409,14 @@ sections.
L3 schemata file details (code and data prioritization disabled)
----------------------------------------------------------------
-With CDP disabled the L3 schemata format is:
+With CDP disabled the L3 schemata format is::
L3:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
L3 schemata file details (CDP enabled via mount option to resctrl)
------------------------------------------------------------------
When CDP is enabled L3 control is split into two separate resources
-so you can specify independent masks for code and data like this:
+so you can specify independent masks for code and data like this::
L3data:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
L3code:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
@@ -395,7 +424,7 @@ so you can specify independent masks for code and data like this:
L2 schemata file details
------------------------
L2 cache does not support code and data prioritization, so the
-schemata format is always:
+schemata format is always::
L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
@@ -403,6 +432,7 @@ Memory bandwidth Allocation (default mode)
------------------------------------------
Memory b/w domain is L3 cache.
+::
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
@@ -410,6 +440,7 @@ Memory bandwidth Allocation specified in MBps
---------------------------------------------
Memory bandwidth domain is L3 cache.
+::
MB:<cache_id0>=bw_MBps0;<cache_id1>=bw_MBps1;...
@@ -418,17 +449,18 @@ Reading/writing the schemata file
Reading the schemata file will show the state of all resources
on all domains. When writing you only need to specify those values
which you wish to change. E.g.
+::
-# cat schemata
-L3DATA:0=fffff;1=fffff;2=fffff;3=fffff
-L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
-# echo "L3DATA:2=3c0;" > schemata
-# cat schemata
-L3DATA:0=fffff;1=fffff;2=3c0;3=fffff
-L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
+ # cat schemata
+ L3DATA:0=fffff;1=fffff;2=fffff;3=fffff
+ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
+ # echo "L3DATA:2=3c0;" > schemata
+ # cat schemata
+ L3DATA:0=fffff;1=fffff;2=3c0;3=fffff
+ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
Cache Pseudo-Locking
---------------------
+====================
CAT enables a user to specify the amount of cache space that an
application can fill. Cache pseudo-locking builds on the fact that a
CPU can still read and write data pre-allocated outside its current
@@ -442,6 +474,7 @@ a region of memory with reduced average read latency.
The creation of a cache pseudo-locked region is triggered by a request
from the user to do so that is accompanied by a schemata of the region
to be pseudo-locked. The cache pseudo-locked region is created as follows:
+
- Create a CAT allocation CLOSNEW with a CBM matching the schemata
from the user of the cache region that will contain the pseudo-locked
memory. This region must not overlap with any current CAT allocation/CLOS
@@ -480,6 +513,7 @@ initial mmap() handling, there is no enforcement afterwards and the
application self needs to ensure it remains affine to the correct cores.
Pseudo-locking is accomplished in two stages:
+
1) During the first stage the system administrator allocates a portion
of cache that should be dedicated to pseudo-locking. At this time an
equivalent portion of memory is allocated, loaded into allocated
@@ -506,7 +540,7 @@ by user space in order to obtain access to the pseudo-locked memory region.
An example of cache pseudo-locked region creation and usage can be found below.
Cache Pseudo-Locking Debugging Interface
----------------------------------------
+----------------------------------------
The pseudo-locking debugging interface is enabled by default (if
CONFIG_DEBUG_FS is enabled) and can be found in /sys/kernel/debug/resctrl.
@@ -514,6 +548,7 @@ There is no explicit way for the kernel to test if a provided memory
location is present in the cache. The pseudo-locking debugging interface uses
the tracing infrastructure to provide two ways to measure cache residency of
the pseudo-locked region:
+
1) Memory access latency using the pseudo_lock_mem_latency tracepoint. Data
from these measurements are best visualized using a hist trigger (see
example below). In this test the pseudo-locked region is traversed at
@@ -529,87 +564,97 @@ it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
write-only file, pseudo_lock_measure, is present in this directory. The
measurement of the pseudo-locked region depends on the number written to this
debugfs file:
-1 - writing "1" to the pseudo_lock_measure file will trigger the latency
+
+1:
+ writing "1" to the pseudo_lock_measure file will trigger the latency
measurement captured in the pseudo_lock_mem_latency tracepoint. See
example below.
-2 - writing "2" to the pseudo_lock_measure file will trigger the L2 cache
+2:
+ writing "2" to the pseudo_lock_measure file will trigger the L2 cache
residency (cache hits and misses) measurement captured in the
pseudo_lock_l2 tracepoint. See example below.
-3 - writing "3" to the pseudo_lock_measure file will trigger the L3 cache
+3:
+ writing "3" to the pseudo_lock_measure file will trigger the L3 cache
residency (cache hits and misses) measurement captured in the
pseudo_lock_l3 tracepoint.
All measurements are recorded with the tracing infrastructure. This requires
the relevant tracepoints to be enabled before the measurement is triggered.
-Example of latency debugging interface:
+Example of latency debugging interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In this example a pseudo-locked region named "newlock" was created. Here is
how we can measure the latency in cycles of reading from this region and
visualize this data with a histogram that is available if CONFIG_HIST_TRIGGERS
-is set:
-# :> /sys/kernel/debug/tracing/trace
-# echo 'hist:keys=latency' > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/trigger
-# echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
-# echo 1 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
-# echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
-# cat /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/hist
-
-# event histogram
-#
-# trigger info: hist:keys=latency:vals=hitcount:sort=hitcount:size=2048 [active]
-#
-
-{ latency: 456 } hitcount: 1
-{ latency: 50 } hitcount: 83
-{ latency: 36 } hitcount: 96
-{ latency: 44 } hitcount: 174
-{ latency: 48 } hitcount: 195
-{ latency: 46 } hitcount: 262
-{ latency: 42 } hitcount: 693
-{ latency: 40 } hitcount: 3204
-{ latency: 38 } hitcount: 3484
-
-Totals:
- Hits: 8192
- Entries: 9
- Dropped: 0
-
-Example of cache hits/misses debugging:
+is set::
+
+ # :> /sys/kernel/debug/tracing/trace
+ # echo 'hist:keys=latency' > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/trigger
+ # echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
+ # echo 1 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
+ # echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/enable
+ # cat /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_mem_latency/hist
+
+ # event histogram
+ #
+ # trigger info: hist:keys=latency:vals=hitcount:sort=hitcount:size=2048 [active]
+ #
+
+ { latency: 456 } hitcount: 1
+ { latency: 50 } hitcount: 83
+ { latency: 36 } hitcount: 96
+ { latency: 44 } hitcount: 174
+ { latency: 48 } hitcount: 195
+ { latency: 46 } hitcount: 262
+ { latency: 42 } hitcount: 693
+ { latency: 40 } hitcount: 3204
+ { latency: 38 } hitcount: 3484
+
+ Totals:
+ Hits: 8192
+ Entries: 9
+ Dropped: 0
+
+Example of cache hits/misses debugging
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In this example a pseudo-locked region named "newlock" was created on the L2
cache of a platform. Here is how we can obtain details of the cache hits
and misses using the platform's precision counters.
+::
-# :> /sys/kernel/debug/tracing/trace
-# echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
-# echo 2 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
-# echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
-# cat /sys/kernel/debug/tracing/trace
+ # :> /sys/kernel/debug/tracing/trace
+ # echo 1 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
+ # echo 2 > /sys/kernel/debug/resctrl/newlock/pseudo_lock_measure
+ # echo 0 > /sys/kernel/debug/tracing/events/resctrl/pseudo_lock_l2/enable
+ # cat /sys/kernel/debug/tracing/trace
-# tracer: nop
-#
-# _-----=> irqs-off
-# / _----=> need-resched
-# | / _---=> hardirq/softirq
-# || / _--=> preempt-depth
-# ||| / delay
-# TASK-PID CPU# |||| TIMESTAMP FUNCTION
-# | | | |||| | |
- pseudo_lock_mea-1672 [002] .... 3132.860500: pseudo_lock_l2: hits=4097 miss=0
+ # tracer: nop
+ #
+ # _-----=> irqs-off
+ # / _----=> need-resched
+ # | / _---=> hardirq/softirq
+ # || / _--=> preempt-depth
+ # ||| / delay
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
+ # | | | |||| | |
+ pseudo_lock_mea-1672 [002] .... 3132.860500: pseudo_lock_l2: hits=4097 miss=0
-Examples for RDT allocation usage:
+Examples for RDT allocation usage
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1) Example 1
-Example 1
----------
On a two socket machine (one L3 cache per socket) with just four bits
for cache bit masks, minimum b/w of 10% with a memory bandwidth
-granularity of 10%
+granularity of 10%.
+::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
-# mkdir p0 p1
-# echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
+ # mkdir p0 p1
+ # echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
+ # echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
The default resource group is unmodified, so we have access to all parts
of all caches (its schemata file reads "L3:0=f;1=f").
@@ -628,100 +673,106 @@ the b/w accordingly.
If the MBA is specified in MB(megabytes) then user can enter the max b/w in MB
rather than the percentage values.
+::
-# echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata
+ # echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
+ # echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata
In the above example the tasks in "p1" and "p0" on socket 0 would use a max b/w
of 1024MB where as on socket 1 they would use 500MB.
-Example 2
----------
+2) Example 2
+
Again two sockets, but this time with a more realistic 20-bit mask.
Two real time tasks pid=1234 running on processor 0 and pid=5678 running on
processor 1 on socket 0 on a 2-socket and dual core machine. To avoid noisy
neighbors, each of the two real-time tasks exclusively occupies one quarter
of L3 cache on socket 0.
+::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
50% of the L3 cache on socket 0 and 50% of memory b/w cannot be used by
-ordinary tasks:
+ordinary tasks::
-# echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
+ # echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
Next we make a resource group for our first real time task and give
it access to the "top" 25% of the cache on socket 0.
+::
-# mkdir p0
-# echo "L3:0=f8000;1=fffff" > p0/schemata
+ # mkdir p0
+ # echo "L3:0=f8000;1=fffff" > p0/schemata
Finally we move our first real time task into this resource group. We
also use taskset(1) to ensure the task always runs on a dedicated CPU
on socket 0. Most uses of resource groups will also constrain which
processors tasks run on.
+::
-# echo 1234 > p0/tasks
-# taskset -cp 1 1234
+ # echo 1234 > p0/tasks
+ # taskset -cp 1 1234
-Ditto for the second real time task (with the remaining 25% of cache):
+Ditto for the second real time task (with the remaining 25% of cache)::
-# mkdir p1
-# echo "L3:0=7c00;1=fffff" > p1/schemata
-# echo 5678 > p1/tasks
-# taskset -cp 2 5678
+ # mkdir p1
+ # echo "L3:0=7c00;1=fffff" > p1/schemata
+ # echo 5678 > p1/tasks
+ # taskset -cp 2 5678
For the same 2 socket system with memory b/w resource and CAT L3 the
schemata would look like(Assume min_bandwidth 10 and bandwidth_gran is
10):
-For our first real time task this would request 20% memory b/w on socket
-0.
+For our first real time task this would request 20% memory b/w on socket 0.
+::
-# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+ # echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
For our second real time task this would request an other 20% memory b/w
on socket 0.
+::
-# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+ # echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
-Example 3
----------
+3) Example 3
A single socket system which has real-time tasks running on core 4-7 and
non real-time workload assigned to core 0-3. The real-time tasks share text
and data, so a per task association is not required and due to interaction
with the kernel it's desired that the kernel on these cores shares L3 with
the tasks.
+::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
50% of the L3 cache on socket 0, and 50% of memory bandwidth on socket 0
-cannot be used by ordinary tasks:
+cannot be used by ordinary tasks::
-# echo "L3:0=3ff\nMB:0=50" > schemata
+ # echo "L3:0=3ff\nMB:0=50" > schemata
Next we make a resource group for our real time cores and give it access
to the "top" 50% of the cache on socket 0 and 50% of memory bandwidth on
socket 0.
+::
-# mkdir p0
-# echo "L3:0=ffc00\nMB:0=50" > p0/schemata
+ # mkdir p0
+ # echo "L3:0=ffc00\nMB:0=50" > p0/schemata
Finally we move core 4-7 over to the new group and make sure that the
kernel and the tasks running there get 50% of the cache. They should
also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
siblings and only the real time threads are scheduled on the cores 4-7.
+::
-# echo F0 > p0/cpus
+ # echo F0 > p0/cpus
-Example 4
----------
+4) Example 4
The resource groups in previous examples were all in the default "shareable"
mode allowing sharing of their cache allocations. If one resource group
@@ -732,157 +783,168 @@ In this example a new exclusive resource group will be created on a L2 CAT
system with two L2 cache instances that can be configured with an 8-bit
capacity bitmask. The new exclusive resource group will be configured to use
25% of each cache instance.
+::
-# mount -t resctrl resctrl /sys/fs/resctrl/
-# cd /sys/fs/resctrl
+ # mount -t resctrl resctrl /sys/fs/resctrl/
+ # cd /sys/fs/resctrl
First, we observe that the default group is configured to allocate to all L2
-cache:
+cache::
-# cat schemata
-L2:0=ff;1=ff
+ # cat schemata
+ L2:0=ff;1=ff
We could attempt to create the new resource group at this point, but it will
-fail because of the overlap with the schemata of the default group:
-# mkdir p0
-# echo 'L2:0=0x3;1=0x3' > p0/schemata
-# cat p0/mode
-shareable
-# echo exclusive > p0/mode
--sh: echo: write error: Invalid argument
-# cat info/last_cmd_status
-schemata overlaps
+fail because of the overlap with the schemata of the default group::
+
+ # mkdir p0
+ # echo 'L2:0=0x3;1=0x3' > p0/schemata
+ # cat p0/mode
+ shareable
+ # echo exclusive > p0/mode
+ -sh: echo: write error: Invalid argument
+ # cat info/last_cmd_status
+ schemata overlaps
To ensure that there is no overlap with another resource group the default
resource group's schemata has to change, making it possible for the new
resource group to become exclusive.
-# echo 'L2:0=0xfc;1=0xfc' > schemata
-# echo exclusive > p0/mode
-# grep . p0/*
-p0/cpus:0
-p0/mode:exclusive
-p0/schemata:L2:0=03;1=03
-p0/size:L2:0=262144;1=262144
+::
+
+ # echo 'L2:0=0xfc;1=0xfc' > schemata
+ # echo exclusive > p0/mode
+ # grep . p0/*
+ p0/cpus:0
+ p0/mode:exclusive
+ p0/schemata:L2:0=03;1=03
+ p0/size:L2:0=262144;1=262144
A new resource group will on creation not overlap with an exclusive resource
-group:
-# mkdir p1
-# grep . p1/*
-p1/cpus:0
-p1/mode:shareable
-p1/schemata:L2:0=fc;1=fc
-p1/size:L2:0=786432;1=786432
-
-The bit_usage will reflect how the cache is used:
-# cat info/L2/bit_usage
-0=SSSSSSEE;1=SSSSSSEE
-
-A resource group cannot be forced to overlap with an exclusive resource group:
-# echo 'L2:0=0x1;1=0x1' > p1/schemata
--sh: echo: write error: Invalid argument
-# cat info/last_cmd_status
-overlaps with exclusive group
+group::
+
+ # mkdir p1
+ # grep . p1/*
+ p1/cpus:0
+ p1/mode:shareable
+ p1/schemata:L2:0=fc;1=fc
+ p1/size:L2:0=786432;1=786432
+
+The bit_usage will reflect how the cache is used::
+
+ # cat info/L2/bit_usage
+ 0=SSSSSSEE;1=SSSSSSEE
+
+A resource group cannot be forced to overlap with an exclusive resource group::
+
+ # echo 'L2:0=0x1;1=0x1' > p1/schemata
+ -sh: echo: write error: Invalid argument
+ # cat info/last_cmd_status
+ overlaps with exclusive group
Example of Cache Pseudo-Locking
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Lock portion of L2 cache from cache id 1 using CBM 0x3. Pseudo-locked
region is exposed at /dev/pseudo_lock/newlock that can be provided to
application for argument to mmap().
+::
-# mount -t resctrl resctrl /sys/fs/resctrl/
-# cd /sys/fs/resctrl
+ # mount -t resctrl resctrl /sys/fs/resctrl/
+ # cd /sys/fs/resctrl
Ensure that there are bits available that can be pseudo-locked, since only
unused bits can be pseudo-locked the bits to be pseudo-locked needs to be
-removed from the default resource group's schemata:
-# cat info/L2/bit_usage
-0=SSSSSSSS;1=SSSSSSSS
-# echo 'L2:1=0xfc' > schemata
-# cat info/L2/bit_usage
-0=SSSSSSSS;1=SSSSSS00
+removed from the default resource group's schemata::
+
+ # cat info/L2/bit_usage
+ 0=SSSSSSSS;1=SSSSSSSS
+ # echo 'L2:1=0xfc' > schemata
+ # cat info/L2/bit_usage
+ 0=SSSSSSSS;1=SSSSSS00
Create a new resource group that will be associated with the pseudo-locked
region, indicate that it will be used for a pseudo-locked region, and
-configure the requested pseudo-locked region capacity bitmask:
+configure the requested pseudo-locked region capacity bitmask::
-# mkdir newlock
-# echo pseudo-locksetup > newlock/mode
-# echo 'L2:1=0x3' > newlock/schemata
+ # mkdir newlock
+ # echo pseudo-locksetup > newlock/mode
+ # echo 'L2:1=0x3' > newlock/schemata
On success the resource group's mode will change to pseudo-locked, the
bit_usage will reflect the pseudo-locked region, and the character device
-exposing the pseudo-locked region will exist:
-
-# cat newlock/mode
-pseudo-locked
-# cat info/L2/bit_usage
-0=SSSSSSSS;1=SSSSSSPP
-# ls -l /dev/pseudo_lock/newlock
-crw------- 1 root root 243, 0 Apr 3 05:01 /dev/pseudo_lock/newlock
-
-/*
- * Example code to access one page of pseudo-locked cache region
- * from user space.
- */
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-/*
- * It is required that the application runs with affinity to only
- * cores associated with the pseudo-locked region. Here the cpu
- * is hardcoded for convenience of example.
- */
-static int cpuid = 2;
-
-int main(int argc, char *argv[])
-{
- cpu_set_t cpuset;
- long page_size;
- void *mapping;
- int dev_fd;
- int ret;
-
- page_size = sysconf(_SC_PAGESIZE);
-
- CPU_ZERO(&cpuset);
- CPU_SET(cpuid, &cpuset);
- ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
- if (ret < 0) {
- perror("sched_setaffinity");
- exit(EXIT_FAILURE);
- }
-
- dev_fd = open("/dev/pseudo_lock/newlock", O_RDWR);
- if (dev_fd < 0) {
- perror("open");
- exit(EXIT_FAILURE);
- }
-
- mapping = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
- dev_fd, 0);
- if (mapping == MAP_FAILED) {
- perror("mmap");
- close(dev_fd);
- exit(EXIT_FAILURE);
- }
-
- /* Application interacts with pseudo-locked memory @mapping */
-
- ret = munmap(mapping, page_size);
- if (ret < 0) {
- perror("munmap");
- close(dev_fd);
- exit(EXIT_FAILURE);
- }
-
- close(dev_fd);
- exit(EXIT_SUCCESS);
-}
+exposing the pseudo-locked region will exist::
+
+ # cat newlock/mode
+ pseudo-locked
+ # cat info/L2/bit_usage
+ 0=SSSSSSSS;1=SSSSSSPP
+ # ls -l /dev/pseudo_lock/newlock
+ crw------- 1 root root 243, 0 Apr 3 05:01 /dev/pseudo_lock/newlock
+
+::
+
+ /*
+ * Example code to access one page of pseudo-locked cache region
+ * from user space.
+ */
+ #define _GNU_SOURCE
+ #include <fcntl.h>
+ #include <sched.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <sys/mman.h>
+
+ /*
+ * It is required that the application runs with affinity to only
+ * cores associated with the pseudo-locked region. Here the cpu
+ * is hardcoded for convenience of example.
+ */
+ static int cpuid = 2;
+
+ int main(int argc, char *argv[])
+ {
+ cpu_set_t cpuset;
+ long page_size;
+ void *mapping;
+ int dev_fd;
+ int ret;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpuid, &cpuset);
+ ret = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ if (ret < 0) {
+ perror("sched_setaffinity");
+ exit(EXIT_FAILURE);
+ }
+
+ dev_fd = open("/dev/pseudo_lock/newlock", O_RDWR);
+ if (dev_fd < 0) {
+ perror("open");
+ exit(EXIT_FAILURE);
+ }
+
+ mapping = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ dev_fd, 0);
+ if (mapping == MAP_FAILED) {
+ perror("mmap");
+ close(dev_fd);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Application interacts with pseudo-locked memory @mapping */
+
+ ret = munmap(mapping, page_size);
+ if (ret < 0) {
+ perror("munmap");
+ close(dev_fd);
+ exit(EXIT_FAILURE);
+ }
+
+ close(dev_fd);
+ exit(EXIT_SUCCESS);
+ }
Locking between applications
----------------------------
@@ -921,86 +983,86 @@ Read lock:
B) If success read the directory structure.
C) funlock
-Example with bash:
-
-# Atomically read directory structure
-$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
-
-# Read directory contents and create new subdirectory
-
-$ cat create-dir.sh
-find /sys/fs/resctrl/ > output.txt
-mask = function-of(output.txt)
-mkdir /sys/fs/resctrl/newres/
-echo mask > /sys/fs/resctrl/newres/schemata
-
-$ flock /sys/fs/resctrl/ ./create-dir.sh
-
-Example with C:
-
-/*
- * Example code do take advisory locks
- * before accessing resctrl filesystem
- */
-#include <sys/file.h>
-#include <stdlib.h>
-
-void resctrl_take_shared_lock(int fd)
-{
- int ret;
-
- /* take shared lock on resctrl filesystem */
- ret = flock(fd, LOCK_SH);
- if (ret) {
- perror("flock");
- exit(-1);
- }
-}
-
-void resctrl_take_exclusive_lock(int fd)
-{
- int ret;
-
- /* release lock on resctrl filesystem */
- ret = flock(fd, LOCK_EX);
- if (ret) {
- perror("flock");
- exit(-1);
- }
-}
-
-void resctrl_release_lock(int fd)
-{
- int ret;
-
- /* take shared lock on resctrl filesystem */
- ret = flock(fd, LOCK_UN);
- if (ret) {
- perror("flock");
- exit(-1);
- }
-}
-
-void main(void)
-{
- int fd, ret;
-
- fd = open("/sys/fs/resctrl", O_DIRECTORY);
- if (fd == -1) {
- perror("open");
- exit(-1);
- }
- resctrl_take_shared_lock(fd);
- /* code to read directory contents */
- resctrl_release_lock(fd);
-
- resctrl_take_exclusive_lock(fd);
- /* code to read and write directory contents */
- resctrl_release_lock(fd);
-}
-
-Examples for RDT Monitoring along with allocation usage:
-
+Example with bash::
+
+ # Atomically read directory structure
+ $ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
+
+ # Read directory contents and create new subdirectory
+
+ $ cat create-dir.sh
+ find /sys/fs/resctrl/ > output.txt
+ mask = function-of(output.txt)
+ mkdir /sys/fs/resctrl/newres/
+ echo mask > /sys/fs/resctrl/newres/schemata
+
+ $ flock /sys/fs/resctrl/ ./create-dir.sh
+
+Example with C::
+
+ /*
+ * Example code do take advisory locks
+ * before accessing resctrl filesystem
+ */
+ #include <sys/file.h>
+ #include <stdlib.h>
+
+ void resctrl_take_shared_lock(int fd)
+ {
+ int ret;
+
+ /* take shared lock on resctrl filesystem */
+ ret = flock(fd, LOCK_SH);
+ if (ret) {
+ perror("flock");
+ exit(-1);
+ }
+ }
+
+ void resctrl_take_exclusive_lock(int fd)
+ {
+ int ret;
+
+ /* release lock on resctrl filesystem */
+ ret = flock(fd, LOCK_EX);
+ if (ret) {
+ perror("flock");
+ exit(-1);
+ }
+ }
+
+ void resctrl_release_lock(int fd)
+ {
+ int ret;
+
+ /* take shared lock on resctrl filesystem */
+ ret = flock(fd, LOCK_UN);
+ if (ret) {
+ perror("flock");
+ exit(-1);
+ }
+ }
+
+ void main(void)
+ {
+ int fd, ret;
+
+ fd = open("/sys/fs/resctrl", O_DIRECTORY);
+ if (fd == -1) {
+ perror("open");
+ exit(-1);
+ }
+ resctrl_take_shared_lock(fd);
+ /* code to read directory contents */
+ resctrl_release_lock(fd);
+
+ resctrl_take_exclusive_lock(fd);
+ /* code to read and write directory contents */
+ resctrl_release_lock(fd);
+ }
+
+Examples for RDT Monitoring along with allocation usage
+=======================================================
Reading monitored data
----------------------
Reading an event file (for ex: mon_data/mon_L3_00/llc_occupancy) would
@@ -1009,17 +1071,17 @@ group or CTRL_MON group.
Example 1 (Monitor CTRL_MON group and subset of tasks in CTRL_MON group)
----------
+------------------------------------------------------------------------
On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks
+for cache bit masks::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
-# mkdir p0 p1
-# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
-# echo 5678 > p1/tasks
-# echo 5679 > p1/tasks
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
+ # mkdir p0 p1
+ # echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
+ # echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
+ # echo 5678 > p1/tasks
+ # echo 5679 > p1/tasks
The default resource group is unmodified, so we have access to all parts
of all caches (its schemata file reads "L3:0=f;1=f").
@@ -1029,47 +1091,51 @@ Tasks that are under the control of group "p0" may only allocate from the
Tasks in group "p1" use the "lower" 50% of cache on both sockets.
Create monitor groups and assign a subset of tasks to each monitor group.
+::
-# cd /sys/fs/resctrl/p1/mon_groups
-# mkdir m11 m12
-# echo 5678 > m11/tasks
-# echo 5679 > m12/tasks
+ # cd /sys/fs/resctrl/p1/mon_groups
+ # mkdir m11 m12
+ # echo 5678 > m11/tasks
+ # echo 5679 > m12/tasks
fetch data (data shown in bytes)
+::
-# cat m11/mon_data/mon_L3_00/llc_occupancy
-16234000
-# cat m11/mon_data/mon_L3_01/llc_occupancy
-14789000
-# cat m12/mon_data/mon_L3_00/llc_occupancy
-16789000
+ # cat m11/mon_data/mon_L3_00/llc_occupancy
+ 16234000
+ # cat m11/mon_data/mon_L3_01/llc_occupancy
+ 14789000
+ # cat m12/mon_data/mon_L3_00/llc_occupancy
+ 16789000
The parent ctrl_mon group shows the aggregated data.
+::
-# cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
-31234000
+ # cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
+ 31234000
Example 2 (Monitor a task from its creation)
----------
-On a two socket machine (one L3 cache per socket)
+--------------------------------------------
+On a two socket machine (one L3 cache per socket)::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
-# mkdir p0 p1
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
+ # mkdir p0 p1
An RMID is allocated to the group once its created and hence the <cmd>
below is monitored from its creation.
+::
-# echo $$ > /sys/fs/resctrl/p1/tasks
-# <cmd>
+ # echo $$ > /sys/fs/resctrl/p1/tasks
+ # <cmd>
-Fetch the data
+Fetch the data::
-# cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
-31789000
+ # cat /sys/fs/resctrl/p1/mon_data/mon_l3_00/llc_occupancy
+ 31789000
Example 3 (Monitor without CAT support or before creating CAT groups)
----------
+---------------------------------------------------------------------
Assume a system like HSW has only CQM and no CAT support. In this case
the resctrl will still mount but cannot create CTRL_MON directories.
@@ -1078,27 +1144,29 @@ able to monitor all tasks including kernel threads.
This can also be used to profile jobs cache size footprint before being
able to allocate them to different allocation groups.
+::
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
-# mkdir mon_groups/m01
-# mkdir mon_groups/m02
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
+ # mkdir mon_groups/m01
+ # mkdir mon_groups/m02
-# echo 3478 > /sys/fs/resctrl/mon_groups/m01/tasks
-# echo 2467 > /sys/fs/resctrl/mon_groups/m02/tasks
+ # echo 3478 > /sys/fs/resctrl/mon_groups/m01/tasks
+ # echo 2467 > /sys/fs/resctrl/mon_groups/m02/tasks
Monitor the groups separately and also get per domain data. From the
below its apparent that the tasks are mostly doing work on
domain(socket) 0.
+::
-# cat /sys/fs/resctrl/mon_groups/m01/mon_L3_00/llc_occupancy
-31234000
-# cat /sys/fs/resctrl/mon_groups/m01/mon_L3_01/llc_occupancy
-34555
-# cat /sys/fs/resctrl/mon_groups/m02/mon_L3_00/llc_occupancy
-31234000
-# cat /sys/fs/resctrl/mon_groups/m02/mon_L3_01/llc_occupancy
-32789
+ # cat /sys/fs/resctrl/mon_groups/m01/mon_L3_00/llc_occupancy
+ 31234000
+ # cat /sys/fs/resctrl/mon_groups/m01/mon_L3_01/llc_occupancy
+ 34555
+ # cat /sys/fs/resctrl/mon_groups/m02/mon_L3_00/llc_occupancy
+ 31234000
+ # cat /sys/fs/resctrl/mon_groups/m02/mon_L3_01/llc_occupancy
+ 32789
Example 4 (Monitor real time tasks)
@@ -1107,15 +1175,17 @@ Example 4 (Monitor real time tasks)
A single socket system which has real time tasks running on cores 4-7
and non real time tasks on other cpus. We want to monitor the cache
occupancy of the real time threads on these cores.
+::
+
+ # mount -t resctrl resctrl /sys/fs/resctrl
+ # cd /sys/fs/resctrl
+ # mkdir p1
-# mount -t resctrl resctrl /sys/fs/resctrl
-# cd /sys/fs/resctrl
-# mkdir p1
+Move the cpus 4-7 over to p1::
-Move the cpus 4-7 over to p1
-# echo f0 > p1/cpus
+ # echo f0 > p1/cpus
-View the llc occupancy snapshot
+View the llc occupancy snapshot::
-# cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
-11234000
+ # cat /sys/fs/resctrl/p1/mon_data/mon_L3_00/llc_occupancy
+ 11234000
diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.rst
index 6a0607b99ed8..82ec58ae63a8 100644
--- a/Documentation/x86/tlb.txt
+++ b/Documentation/x86/tlb.rst
@@ -1,5 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+The TLB
+=======
+
When the kernel unmaps or modified the attributes of a range of
memory, it has two choices:
+
1. Flush the entire TLB with a two-instruction sequence. This is
a quick operation, but it causes collateral damage: TLB entries
from areas other than the one we are trying to flush will be
@@ -10,6 +17,7 @@ memory, it has two choices:
damage to other TLB entries.
Which method to do depends on a few things:
+
1. The size of the flush being performed. A flush of the entire
address space is obviously better performed by flushing the
entire TLB than doing 2^48/PAGE_SIZE individual flushes.
@@ -33,7 +41,7 @@ well. There is essentially no "right" point to choose.
You may be doing too many individual invalidations if you see the
invlpg instruction (or instructions _near_ it) show up high in
profiles. If you believe that individual invalidations being
-called too often, you can lower the tunable:
+called too often, you can lower the tunable::
/sys/kernel/debug/x86/tlb_single_page_flush_ceiling
@@ -43,7 +51,7 @@ Setting it to 1 is a very conservative setting and it should
never need to be 0 under normal circumstances.
Despite the fact that a single individual flush on x86 is
-guaranteed to flush a full 2MB [1], hugetlbfs always uses the full
+guaranteed to flush a full 2MB [1]_, hugetlbfs always uses the full
flushes. THP is treated exactly the same as normal memory.
You might see invlpg inside of flush_tlb_mm_range() show up in
@@ -54,15 +62,15 @@ Essentially, you are balancing the cycles you spend doing invlpg
with the cycles that you spend refilling the TLB later.
You can measure how expensive TLB refills are by using
-performance counters and 'perf stat', like this:
+performance counters and 'perf stat', like this::
-perf stat -e
- cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
- cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
- cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
- cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
- cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
- cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
+ perf stat -e
+ cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
+ cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
+ cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
+ cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
+ cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
+ cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
That works on an IvyBridge-era CPU (i5-3320M). Different CPUs
may have differently-named counters, but they should at least
@@ -70,6 +78,6 @@ be there in some form. You can use pmu-tools 'ocperf list'
(https://github.com/andikleen/pmu-tools) to find the right
counters for a given CPU.
-1. A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
+.. [1] A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
says: "One execution of INVLPG is sufficient even for a page
with size greater than 4 KBytes."
diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.rst
index 06b3cdbc4048..6e28dbe818ab 100644
--- a/Documentation/x86/topology.txt
+++ b/Documentation/x86/topology.rst
@@ -1,3 +1,6 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
x86 Topology
============
@@ -33,14 +36,14 @@ The topology of a system is described in the units of:
- cores
- threads
-* Package:
-
- Packages contain a number of cores plus shared resources, e.g. DRAM
- controller, shared caches etc.
+Package
+=======
+Packages contain a number of cores plus shared resources, e.g. DRAM
+controller, shared caches etc.
- AMD nomenclature for package is 'Node'.
+AMD nomenclature for package is 'Node'.
- Package-related topology information in the kernel:
+Package-related topology information in the kernel:
- cpuinfo_x86.x86_max_cores:
@@ -66,40 +69,41 @@ The topology of a system is described in the units of:
- cpu_llc_id:
A per-CPU variable containing:
- - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
- Cache
- - On AMD, the Node ID or Core Complex ID containing the Last Level
- Cache. In general, it is a number identifying an LLC uniquely on the
- system.
+ - On Intel, the first APIC ID of the list of CPUs sharing the Last Level
+ Cache
-* Cores:
+ - On AMD, the Node ID or Core Complex ID containing the Last Level
+ Cache. In general, it is a number identifying an LLC uniquely on the
+ system.
- A core consists of 1 or more threads. It does not matter whether the threads
- are SMT- or CMT-type threads.
+Cores
+=====
+A core consists of 1 or more threads. It does not matter whether the threads
+are SMT- or CMT-type threads.
- AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
- "core".
+AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
+"core".
- Core-related topology information in the kernel:
+Core-related topology information in the kernel:
- smp_num_siblings:
The number of threads in a core. The number of threads in a package can be
- calculated by:
+ calculated by::
threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
-* Threads:
+Threads
+=======
+A thread is a single scheduling unit. It's the equivalent to a logical Linux
+CPU.
- A thread is a single scheduling unit. It's the equivalent to a logical Linux
- CPU.
+AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
+uses "thread".
- AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
- uses "thread".
-
- Thread-related topology information in the kernel:
+Thread-related topology information in the kernel:
- topology_core_cpumask():
@@ -113,15 +117,15 @@ The topology of a system is described in the units of:
The cpumask contains all online threads in the core to which a thread
belongs.
- - topology_logical_package_id():
+ - topology_logical_package_id():
The logical package ID to which a thread belongs.
- - topology_physical_package_id():
+ - topology_physical_package_id():
The physical package ID to which a thread belongs.
- - topology_core_id();
+ - topology_core_id();
The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
"core_id."
@@ -129,41 +133,41 @@ The topology of a system is described in the units of:
System topology examples
+========================
-Note:
-
-The alternative Linux CPU enumeration depends on how the BIOS enumerates the
-threads. Many BIOSes enumerate all threads 0 first and then all threads 1.
-That has the "advantage" that the logical Linux CPU numbers of threads 0 stay
-the same whether threads are enabled or not. That's merely an implementation
-detail and has no practical impact.
+.. note::
+ The alternative Linux CPU enumeration depends on how the BIOS enumerates the
+ threads. Many BIOSes enumerate all threads 0 first and then all threads 1.
+ That has the "advantage" that the logical Linux CPU numbers of threads 0 stay
+ the same whether threads are enabled or not. That's merely an implementation
+ detail and has no practical impact.
-1) Single Package, Single Core
+1) Single Package, Single Core::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
2) Single Package, Dual Core
- a) One thread per core
+ a) One thread per core::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [core 1] -> [thread 0] -> Linux CPU 1
- b) Two threads per core
+ b) Two threads per core::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [thread 1] -> Linux CPU 1
-> [core 1] -> [thread 0] -> Linux CPU 2
-> [thread 1] -> Linux CPU 3
- Alternative enumeration:
+ Alternative enumeration::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [thread 1] -> Linux CPU 2
-> [core 1] -> [thread 0] -> Linux CPU 1
-> [thread 1] -> Linux CPU 3
- AMD nomenclature for CMT systems:
+ AMD nomenclature for CMT systems::
[node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
-> [Compute Unit Core 1] -> Linux CPU 1
@@ -172,7 +176,7 @@ detail and has no practical impact.
4) Dual Package, Dual Core
- a) One thread per core
+ a) One thread per core::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [core 1] -> [thread 0] -> Linux CPU 1
@@ -180,7 +184,7 @@ detail and has no practical impact.
[package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
-> [core 1] -> [thread 0] -> Linux CPU 3
- b) Two threads per core
+ b) Two threads per core::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [thread 1] -> Linux CPU 1
@@ -192,7 +196,7 @@ detail and has no practical impact.
-> [core 1] -> [thread 0] -> Linux CPU 6
-> [thread 1] -> Linux CPU 7
- Alternative enumeration:
+ Alternative enumeration::
[package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
-> [thread 1] -> Linux CPU 4
@@ -204,7 +208,7 @@ detail and has no practical impact.
-> [core 1] -> [thread 0] -> Linux CPU 3
-> [thread 1] -> Linux CPU 7
- AMD nomenclature for CMT systems:
+ AMD nomenclature for CMT systems::
[node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
-> [Compute Unit Core 1] -> Linux CPU 1
diff --git a/Documentation/x86/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.rst
index 1894cdfc69d9..e01c08b7c981 100644
--- a/Documentation/x86/usb-legacy-support.txt
+++ b/Documentation/x86/usb-legacy-support.rst
@@ -1,7 +1,11 @@
+
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
USB Legacy support
-~~~~~~~~~~~~~~~~~~
+==================
-Vojtech Pavlik <vojtech@suse.cz>, January 2004
+:Author: Vojtech Pavlik <vojtech@suse.cz>, January 2004
Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a
@@ -27,18 +31,20 @@ It has several drawbacks, though:
Solutions:
-Problem 1) can be solved by loading the USB drivers prior to loading the
-PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into
-the kernel unconditionally, this means the USB drivers need to be
-compiled-in, too.
-
-Problem 2) can currently only be solved by either disabling HIGHMEM64G
-in the kernel config or USB Legacy support in the BIOS. A BIOS update
-could help, but so far no such update exists.
-
-Problem 3) is usually fixed by a BIOS update. Check the board
-manufacturers web site. If an update is not available, disable USB
-Legacy support in the BIOS. If this alone doesn't help, try also adding
-idle=poll on the kernel command line. The BIOS may be entering the SMM
-on the HLT instruction as well.
-
+Problem 1)
+ can be solved by loading the USB drivers prior to loading the
+ PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into
+ the kernel unconditionally, this means the USB drivers need to be
+ compiled-in, too.
+
+Problem 2)
+ can currently only be solved by either disabling HIGHMEM64G
+ in the kernel config or USB Legacy support in the BIOS. A BIOS update
+ could help, but so far no such update exists.
+
+Problem 3)
+ is usually fixed by a BIOS update. Check the board
+ manufacturers web site. If an update is not available, disable USB
+ Legacy support in the BIOS. If this alone doesn't help, try also adding
+ idle=poll on the kernel command line. The BIOS may be entering the SMM
+ on the HLT instruction as well.
diff --git a/Documentation/x86/x86_64/5level-paging.txt b/Documentation/x86/x86_64/5level-paging.rst
index 2432a5ef86d9..ab88a4514163 100644
--- a/Documentation/x86/x86_64/5level-paging.txt
+++ b/Documentation/x86/x86_64/5level-paging.rst
@@ -1,5 +1,11 @@
-== Overview ==
+.. SPDX-License-Identifier: GPL-2.0
+==============
+5-level paging
+==============
+
+Overview
+========
Original x86-64 was limited by 4-level paing to 256 TiB of virtual address
space and 64 TiB of physical address space. We are already bumping into
this limit: some vendors offers servers with 64 TiB of memory today.
@@ -16,16 +22,17 @@ QEMU 2.9 and later support 5-level paging.
Virtual memory layout for 5-level paging is described in
Documentation/x86/x86_64/mm.txt
-== Enabling 5-level paging ==
+Enabling 5-level paging
+=======================
CONFIG_X86_5LEVEL=y enables the feature.
Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
In this case additional page table level -- p4d -- will be folded at
runtime.
-== User-space and large virtual address space ==
-
+User-space and large virtual address space
+==========================================
On x86, 5-level paging enables 56-bit userspace virtual address space.
Not all user space is ready to handle wide addresses. It's known that
at least some JIT compilers use higher bits in pointers to encode their
@@ -58,4 +65,3 @@ One important case we need to handle here is interaction with MPX.
MPX (without MAWA extension) cannot handle addresses above 47-bit, so we
need to make sure that MPX cannot be enabled we already have VMA above
the boundary and forbid creating such VMAs once MPX is enabled.
-
diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
new file mode 100644
index 000000000000..2f69836b8445
--- /dev/null
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -0,0 +1,335 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+AMD64 Specific Boot Options
+===========================
+
+There are many others (usually documented in driver documentation), but
+only the AMD64 specific ones are listed here.
+
+Machine check
+=============
+Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+
+ mce=off
+ Disable machine check
+ mce=no_cmci
+ Disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your hardware
+ is misbehaving.
+ Note that you'll get more problems without CMCI than with
+ due to the shared banks, i.e. you might get duplicated
+ error logs.
+ mce=dont_log_ce
+ Don't make logs for corrected errors. All events reported
+ as corrected are silently cleared by OS.
+ This option will be useful if you have no interest in any
+ of corrected errors.
+ mce=ignore_ce
+ Disable features for corrected errors, e.g. polling timer
+ and CMCI. All events reported as corrected are not cleared
+ by OS and remained in its error banks.
+ Usually this disablement is not recommended, however if
+ there is an agent checking/clearing corrected errors
+ (e.g. BIOS or hardware monitoring applications), conflicting
+ with OS's error handling, and you cannot deactivate the agent,
+ then this option will be a help.
+ mce=no_lmce
+ Do not opt-in to Local MCE delivery. Use legacy method
+ to broadcast MCEs.
+ mce=bootlog
+ Enable logging of machine checks left over from booting.
+ Disabled by default on AMD Fam10h and older because some BIOS
+ leave bogus ones.
+ If your BIOS doesn't do that it's a good idea to enable though
+ to make sure you log even machine check events that result
+ in a reboot. On Intel systems it is enabled by default.
+ mce=nobootlog
+ Disable boot machine check logging.
+ mce=tolerancelevel[,monarchtimeout] (number,number)
+ tolerance levels:
+ 0: always panic on uncorrected errors, log corrected errors
+ 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ 2: SIGBUS or log uncorrected errors, log corrected errors
+ 3: never panic or SIGBUS, log all errors (for testing only)
+ Default is 1
+ Can be also set using sysfs which is preferable.
+ monarchtimeout:
+ Sets the time in us to wait for other CPUs on machine checks. 0
+ to disable.
+ mce=bios_cmci_threshold
+ Don't overwrite the bios-set CMCI threshold. This boot option
+ prevents Linux from overwriting the CMCI threshold set by the
+ bios. Without this option, Linux always sets the CMCI
+ threshold to 1. Enabling this may make memory predictive failure
+ analysis less effective if the bios sets thresholds for memory
+ errors since we will not see details for all errors.
+ mce=recovery
+ Force-enable recoverable machine check code paths
+
+ nomce (for compatibility with i386)
+ same as mce=off
+
+ Everything else is in sysfs now.
+
+APICs
+=====
+
+ apic
+ Use IO-APIC. Default
+
+ noapic
+ Don't use the IO-APIC.
+
+ disableapic
+ Don't use the local APIC
+
+ nolapic
+ Don't use the local APIC (alias for i386 compatibility)
+
+ pirq=...
+ See Documentation/x86/i386/IO-APIC.txt
+
+ noapictimer
+ Don't set up the APIC timer
+
+ no_timer_check
+ Don't check the IO-APIC timer. This can work around
+ problems with incorrect timer initialization on some boards.
+
+ apicpmtimer
+ Do APIC timer calibration using the pmtimer. Implies
+ apicmaintimer. Useful when your PIT timer is totally broken.
+
+Timing
+======
+
+ notsc
+ Deprecated, use tsc=unstable instead.
+
+ nohpet
+ Don't use the HPET timer.
+
+Idle loop
+=========
+
+ idle=poll
+ Don't do power saving in the idle loop using HLT, but poll for rescheduling
+ event. This will make the CPUs eat a lot more power, but may be useful
+ to get slightly better performance in multiprocessor benchmarks. It also
+ makes some profiling using performance counters more accurate.
+ Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
+ CPUs) this option has no performance advantage over the normal idle loop.
+ It may also interact badly with hyperthreading.
+
+Rebooting
+=========
+
+ reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
+ bios
+ Use the CPU reboot vector for warm reset
+ warm
+ Don't set the cold reboot flag
+ cold
+ Set the cold reboot flag
+ triple
+ Force a triple fault (init)
+ kbd
+ Use the keyboard controller. cold reset (default)
+ acpi
+ Use the ACPI RESET_REG in the FADT. If ACPI is not configured or
+ the ACPI reset does not work, the reboot path attempts the reset
+ using the keyboard controller.
+ efi
+ Use efi reset_system runtime service. If EFI is not configured or
+ the EFI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
+
+ Using warm reset will be much faster especially on big memory
+ systems because the BIOS will not go through the memory check.
+ Disadvantage is that not all hardware will be completely reinitialized
+ on reboot so there may be boot problems on some systems.
+
+ reboot=force
+ Don't stop other CPUs on reboot. This can make reboot more reliable
+ in some cases.
+
+Non Executable Mappings
+=======================
+
+ noexec=on|off
+ on
+ Enable(default)
+ off
+ Disable
+
+NUMA
+====
+
+ numa=off
+ Only set up a single NUMA node spanning all memory.
+
+ numa=noacpi
+ Don't parse the SRAT table for NUMA setup
+
+ numa=fake=<size>[MG]
+ If given as a memory unit, fills all system RAM with nodes of
+ size interleaved over physical nodes.
+
+ numa=fake=<N>
+ If given as an integer, fills all system RAM with N fake nodes
+ interleaved over physical nodes.
+
+ numa=fake=<N>U
+ If given as an integer followed by 'U', it will divide each
+ physical node into N emulated nodes.
+
+ACPI
+====
+
+ acpi=off
+ Don't enable ACPI
+ acpi=ht
+ Use ACPI boot table parsing, but don't enable ACPI interpreter
+ acpi=force
+ Force ACPI on (currently not needed)
+ acpi=strict
+ Disable out of spec ACPI workarounds.
+ acpi_sci={edge,level,high,low}
+ Set up ACPI SCI interrupt.
+ acpi=noirq
+ Don't route interrupts
+ acpi=nocmcff
+ Disable firmware first mode for corrected errors. This
+ disables parsing the HEST CMC error source to check if
+ firmware has set the FF flag. This may result in
+ duplicate corrected error reports.
+
+PCI
+===
+
+ pci=off
+ Don't use PCI
+ pci=conf1
+ Use conf1 access.
+ pci=conf2
+ Use conf2 access.
+ pci=rom
+ Assign ROMs.
+ pci=assign-busses
+ Assign busses
+ pci=irqmask=MASK
+ Set PCI interrupt mask to MASK
+ pci=lastbus=NUMBER
+ Scan up to NUMBER busses, no matter what the mptable says.
+ pci=noacpi
+ Don't use ACPI to set up PCI interrupt routing.
+
+IOMMU (input/output memory management unit)
+===========================================
+Multiple x86-64 PCI-DMA mapping implementations exist, for example:
+
+ 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all
+ (e.g. because you have < 3 GB memory).
+ Kernel boot message: "PCI-DMA: Disabling IOMMU"
+
+ 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
+ Kernel boot message: "PCI-DMA: using GART IOMMU"
+
+ 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
+ e.g. if there is no hardware IOMMU in the system and it is need because
+ you have >3GB memory or told the kernel to us it (iommu=soft))
+ Kernel boot message: "PCI-DMA: Using software bounce buffering
+ for IO (SWIOTLB)"
+
+ 4. <arch/x86_64/pci-calgary.c> : IBM Calgary hardware IOMMU. Used in IBM
+ pSeries and xSeries servers. This hardware IOMMU supports DMA address
+ mapping with memory protection, etc.
+ Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
+
+::
+
+ iommu=[<size>][,noagp][,off][,force][,noforce]
+ [,memaper[=<order>]][,merge][,fullflush][,nomerge]
+ [,noaperture][,calgary]
+
+General iommu options:
+
+ off
+ Don't initialize and use any kind of IOMMU.
+ noforce
+ Don't force hardware IOMMU usage when it is not needed. (default).
+ force
+ Force the use of the hardware IOMMU even when it is
+ not actually needed (e.g. because < 3 GB memory).
+ soft
+ Use software bounce buffering (SWIOTLB) (default for
+ Intel machines). This can be used to prevent the usage
+ of an available hardware IOMMU.
+
+iommu options only relevant to the AMD GART hardware IOMMU:
+
+ <size>
+ Set the size of the remapping area in bytes.
+ allowed
+ Overwrite iommu off workarounds for specific chipsets.
+ fullflush
+ Flush IOMMU on each allocation (default).
+ nofullflush
+ Don't use IOMMU fullflush.
+ memaper[=<order>]
+ Allocate an own aperture over RAM with size 32MB<<order.
+ (default: order=1, i.e. 64MB)
+ merge
+ Do scatter-gather (SG) merging. Implies "force" (experimental).
+ nomerge
+ Don't do scatter-gather (SG) merging.
+ noaperture
+ Ask the IOMMU not to touch the aperture for AGP.
+ noagp
+ Don't initialize the AGP driver and use full aperture.
+ panic
+ Always panic when IOMMU overflows.
+ calgary
+ Use the Calgary IOMMU if it is available
+
+iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
+implementation:
+
+ swiotlb=<pages>[,force]
+ <pages>
+ Prereserve that many 128K pages for the software IO bounce buffering.
+ force
+ Force all IO through the software TLB.
+
+Settings for the IBM Calgary hardware IOMMU currently found in IBM
+pSeries and xSeries machines
+
+ calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
+ Set the size of each PCI slot's translation table when using the
+ Calgary IOMMU. This is the size of the translation table itself
+ in main memory. The smallest table, 64k, covers an IO space of
+ 32MB; the largest, 8MB table, can cover an IO space of 4GB.
+ Normally the kernel will make the right choice by itself.
+ calgary=[translate_empty_slots]
+ Enable translation even on slots that have no devices attached to
+ them, in case a device will be hotplugged in the future.
+ calgary=[disable=<PCI bus number>]
+ Disable translation on a given PHB. For
+ example, the built-in graphics adapter resides on the first bridge
+ (PCI bus number 0); if translation (isolation) is enabled on this
+ bridge, X servers that access the hardware directly from user
+ space might stop working. Use this option if you have devices that
+ are accessed from userspace directly on some PCI host bridge.
+ panic
+ Always panic when IOMMU overflows
+
+
+Miscellaneous
+=============
+
+ nogbpages
+ Do not use GB pages for kernel direct mappings.
+ gbpages
+ Use GB pages for kernel direct mappings.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
deleted file mode 100644
index abc53886655e..000000000000
--- a/Documentation/x86/x86_64/boot-options.txt
+++ /dev/null
@@ -1,278 +0,0 @@
-AMD64 specific boot options
-
-There are many others (usually documented in driver documentation), but
-only the AMD64 specific ones are listed here.
-
-Machine check
-
- Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
-
- mce=off
- Disable machine check
- mce=no_cmci
- Disable CMCI(Corrected Machine Check Interrupt) that
- Intel processor supports. Usually this disablement is
- not recommended, but it might be handy if your hardware
- is misbehaving.
- Note that you'll get more problems without CMCI than with
- due to the shared banks, i.e. you might get duplicated
- error logs.
- mce=dont_log_ce
- Don't make logs for corrected errors. All events reported
- as corrected are silently cleared by OS.
- This option will be useful if you have no interest in any
- of corrected errors.
- mce=ignore_ce
- Disable features for corrected errors, e.g. polling timer
- and CMCI. All events reported as corrected are not cleared
- by OS and remained in its error banks.
- Usually this disablement is not recommended, however if
- there is an agent checking/clearing corrected errors
- (e.g. BIOS or hardware monitoring applications), conflicting
- with OS's error handling, and you cannot deactivate the agent,
- then this option will be a help.
- mce=no_lmce
- Do not opt-in to Local MCE delivery. Use legacy method
- to broadcast MCEs.
- mce=bootlog
- Enable logging of machine checks left over from booting.
- Disabled by default on AMD Fam10h and older because some BIOS
- leave bogus ones.
- If your BIOS doesn't do that it's a good idea to enable though
- to make sure you log even machine check events that result
- in a reboot. On Intel systems it is enabled by default.
- mce=nobootlog
- Disable boot machine check logging.
- mce=tolerancelevel[,monarchtimeout] (number,number)
- tolerance levels:
- 0: always panic on uncorrected errors, log corrected errors
- 1: panic or SIGBUS on uncorrected errors, log corrected errors
- 2: SIGBUS or log uncorrected errors, log corrected errors
- 3: never panic or SIGBUS, log all errors (for testing only)
- Default is 1
- Can be also set using sysfs which is preferable.
- monarchtimeout:
- Sets the time in us to wait for other CPUs on machine checks. 0
- to disable.
- mce=bios_cmci_threshold
- Don't overwrite the bios-set CMCI threshold. This boot option
- prevents Linux from overwriting the CMCI threshold set by the
- bios. Without this option, Linux always sets the CMCI
- threshold to 1. Enabling this may make memory predictive failure
- analysis less effective if the bios sets thresholds for memory
- errors since we will not see details for all errors.
- mce=recovery
- Force-enable recoverable machine check code paths
-
- nomce (for compatibility with i386): same as mce=off
-
- Everything else is in sysfs now.
-
-APICs
-
- apic Use IO-APIC. Default
-
- noapic Don't use the IO-APIC.
-
- disableapic Don't use the local APIC
-
- nolapic Don't use the local APIC (alias for i386 compatibility)
-
- pirq=... See Documentation/x86/i386/IO-APIC.txt
-
- noapictimer Don't set up the APIC timer
-
- no_timer_check Don't check the IO-APIC timer. This can work around
- problems with incorrect timer initialization on some boards.
- apicpmtimer
- Do APIC timer calibration using the pmtimer. Implies
- apicmaintimer. Useful when your PIT timer is totally
- broken.
-
-Timing
-
- notsc
- Deprecated, use tsc=unstable instead.
-
- nohpet
- Don't use the HPET timer.
-
-Idle loop
-
- idle=poll
- Don't do power saving in the idle loop using HLT, but poll for rescheduling
- event. This will make the CPUs eat a lot more power, but may be useful
- to get slightly better performance in multiprocessor benchmarks. It also
- makes some profiling using performance counters more accurate.
- Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
- CPUs) this option has no performance advantage over the normal idle loop.
- It may also interact badly with hyperthreading.
-
-Rebooting
-
- reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
- bios Use the CPU reboot vector for warm reset
- warm Don't set the cold reboot flag
- cold Set the cold reboot flag
- triple Force a triple fault (init)
- kbd Use the keyboard controller. cold reset (default)
- acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
- ACPI reset does not work, the reboot path attempts the reset using
- the keyboard controller.
- efi Use efi reset_system runtime service. If EFI is not configured or the
- EFI reset does not work, the reboot path attempts the reset using
- the keyboard controller.
-
- Using warm reset will be much faster especially on big memory
- systems because the BIOS will not go through the memory check.
- Disadvantage is that not all hardware will be completely reinitialized
- on reboot so there may be boot problems on some systems.
-
- reboot=force
-
- Don't stop other CPUs on reboot. This can make reboot more reliable
- in some cases.
-
-Non Executable Mappings
-
- noexec=on|off
-
- on Enable(default)
- off Disable
-
-NUMA
-
- numa=off Only set up a single NUMA node spanning all memory.
-
- numa=noacpi Don't parse the SRAT table for NUMA setup
-
- numa=fake=<size>[MG]
- If given as a memory unit, fills all system RAM with nodes of
- size interleaved over physical nodes.
-
- numa=fake=<N>
- If given as an integer, fills all system RAM with N fake nodes
- interleaved over physical nodes.
-
- numa=fake=<N>U
- If given as an integer followed by 'U', it will divide each
- physical node into N emulated nodes.
-
-ACPI
-
- acpi=off Don't enable ACPI
- acpi=ht Use ACPI boot table parsing, but don't enable ACPI
- interpreter
- acpi=force Force ACPI on (currently not needed)
-
- acpi=strict Disable out of spec ACPI workarounds.
-
- acpi_sci={edge,level,high,low} Set up ACPI SCI interrupt.
-
- acpi=noirq Don't route interrupts
-
- acpi=nocmcff Disable firmware first mode for corrected errors. This
- disables parsing the HEST CMC error source to check if
- firmware has set the FF flag. This may result in
- duplicate corrected error reports.
-
-PCI
-
- pci=off Don't use PCI
- pci=conf1 Use conf1 access.
- pci=conf2 Use conf2 access.
- pci=rom Assign ROMs.
- pci=assign-busses Assign busses
- pci=irqmask=MASK Set PCI interrupt mask to MASK
- pci=lastbus=NUMBER Scan up to NUMBER busses, no matter what the mptable says.
- pci=noacpi Don't use ACPI to set up PCI interrupt routing.
-
-IOMMU (input/output memory management unit)
-
- Multiple x86-64 PCI-DMA mapping implementations exist, for example:
-
- 1. <lib/dma-direct.c>: use no hardware/software IOMMU at all
- (e.g. because you have < 3 GB memory).
- Kernel boot message: "PCI-DMA: Disabling IOMMU"
-
- 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
- Kernel boot message: "PCI-DMA: using GART IOMMU"
-
- 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
- e.g. if there is no hardware IOMMU in the system and it is need because
- you have >3GB memory or told the kernel to us it (iommu=soft))
- Kernel boot message: "PCI-DMA: Using software bounce buffering
- for IO (SWIOTLB)"
-
- 4. <arch/x86_64/pci-calgary.c> : IBM Calgary hardware IOMMU. Used in IBM
- pSeries and xSeries servers. This hardware IOMMU supports DMA address
- mapping with memory protection, etc.
- Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
-
- iommu=[<size>][,noagp][,off][,force][,noforce]
- [,memaper[=<order>]][,merge][,fullflush][,nomerge]
- [,noaperture][,calgary]
-
- General iommu options:
- off Don't initialize and use any kind of IOMMU.
- noforce Don't force hardware IOMMU usage when it is not needed.
- (default).
- force Force the use of the hardware IOMMU even when it is
- not actually needed (e.g. because < 3 GB memory).
- soft Use software bounce buffering (SWIOTLB) (default for
- Intel machines). This can be used to prevent the usage
- of an available hardware IOMMU.
-
- iommu options only relevant to the AMD GART hardware IOMMU:
- <size> Set the size of the remapping area in bytes.
- allowed Overwrite iommu off workarounds for specific chipsets.
- fullflush Flush IOMMU on each allocation (default).
- nofullflush Don't use IOMMU fullflush.
- memaper[=<order>] Allocate an own aperture over RAM with size 32MB<<order.
- (default: order=1, i.e. 64MB)
- merge Do scatter-gather (SG) merging. Implies "force"
- (experimental).
- nomerge Don't do scatter-gather (SG) merging.
- noaperture Ask the IOMMU not to touch the aperture for AGP.
- noagp Don't initialize the AGP driver and use full aperture.
- panic Always panic when IOMMU overflows.
- calgary Use the Calgary IOMMU if it is available
-
- iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
- implementation:
- swiotlb=<pages>[,force]
- <pages> Prereserve that many 128K pages for the software IO
- bounce buffering.
- force Force all IO through the software TLB.
-
- Settings for the IBM Calgary hardware IOMMU currently found in IBM
- pSeries and xSeries machines:
-
- calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
- calgary=[translate_empty_slots]
- calgary=[disable=<PCI bus number>]
- panic Always panic when IOMMU overflows
-
- 64k,...,8M - Set the size of each PCI slot's translation table
- when using the Calgary IOMMU. This is the size of the translation
- table itself in main memory. The smallest table, 64k, covers an IO
- space of 32MB; the largest, 8MB table, can cover an IO space of
- 4GB. Normally the kernel will make the right choice by itself.
-
- translate_empty_slots - Enable translation even on slots that have
- no devices attached to them, in case a device will be hotplugged
- in the future.
-
- disable=<PCI bus number> - Disable translation on a given PHB. For
- example, the built-in graphics adapter resides on the first bridge
- (PCI bus number 0); if translation (isolation) is enabled on this
- bridge, X servers that access the hardware directly from user
- space might stop working. Use this option if you have devices that
- are accessed from userspace directly on some PCI host bridge.
-
-Miscellaneous
-
- nogbpages
- Do not use GB pages for kernel direct mappings.
- gbpages
- Use GB pages for kernel direct mappings.
diff --git a/Documentation/x86/x86_64/cpu-hotplug-spec b/Documentation/x86/x86_64/cpu-hotplug-spec.rst
index 3c23e0587db3..8d1c91f0c880 100644
--- a/Documentation/x86/x86_64/cpu-hotplug-spec
+++ b/Documentation/x86/x86_64/cpu-hotplug-spec.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
Firmware support for CPU hotplug under Linux/x86-64
----------------------------------------------------
+===================================================
Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to
know in advance of boot time the maximum number of CPUs that could be plugged
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
index 4b09f18831f8..74fbb78b3c67 100644
--- a/Documentation/x86/x86_64/fake-numa-for-cpusets
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets.rst
@@ -1,5 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Fake NUMA For CPUSets
+=====================
+
+:Author: David Rientjes <rientjes@cs.washington.edu>
+
Using numa=fake and CPUSets for Resource Management
-Written by David Rientjes <rientjes@cs.washington.edu>
This document describes how the numa=fake x86_64 command-line option can be used
in conjunction with cpusets for coarse memory management. Using this feature,
@@ -20,7 +27,7 @@ you become more familiar with using this combination for resource control,
you'll determine a better setup to minimize the number of nodes you have to deal
with.
-A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg::
Faking node 0 at 0000000000000000-0000000020000000 (512MB)
Faking node 1 at 0000000020000000-0000000040000000 (512MB)
@@ -34,7 +41,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
Now following the instructions for mounting the cpusets filesystem from
Documentation/cgroup-v1/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
-address spaces) to individual cpusets:
+address spaces) to individual cpusets::
[root@xroads /]# mkdir exampleset
[root@xroads /]# mount -t cpuset none exampleset
@@ -47,7 +54,7 @@ Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
memory allocations (1G).
You can now assign tasks to these cpusets to limit the memory resources
-available to them according to the fake nodes assigned as mems:
+available to them according to the fake nodes assigned as mems::
[root@xroads /exampleset/ddset]# echo $$ > tasks
[root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
@@ -57,9 +64,13 @@ Notice the difference between the system memory usage as reported by
/proc/meminfo between the restricted cpuset case above and the unrestricted
case (i.e. running the same 'dd' command without assigning it to a fake NUMA
cpuset):
- Unrestricted Restricted
- MemTotal: 3091900 kB 3091900 kB
- MemFree: 42113 kB 1513236 kB
+
+ ======== ============ ==========
+ Name Unrestricted Restricted
+ ======== ============ ==========
+ MemTotal 3091900 kB 3091900 kB
+ MemFree 42113 kB 1513236 kB
+ ======== ============ ==========
This allows for coarse memory management for the tasks you assign to particular
cpusets. Since cpusets can form a hierarchy, you can create some pretty
diff --git a/Documentation/x86/x86_64/index.rst b/Documentation/x86/x86_64/index.rst
new file mode 100644
index 000000000000..d6eaaa5a35fc
--- /dev/null
+++ b/Documentation/x86/x86_64/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+x86_64 Support
+==============
+
+.. toctree::
+ :maxdepth: 2
+
+ boot-options
+ uefi
+ mm
+ 5level-paging
+ fake-numa-for-cpusets
+ cpu-hotplug-spec
+ machinecheck
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck.rst
index d0648a74fceb..e189168406fa 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
-Configurable sysfs parameters for the x86-64 machine check code.
+===============================================================
+Configurable sysfs parameters for the x86-64 machine check code
+===============================================================
Machine checks report internal hardware error conditions detected
by the CPU. Uncorrected errors typically cause a machine check
@@ -16,14 +19,13 @@ log then mcelog should run to collect and decode machine check entries
from /dev/mcelog. Normally mcelog should be run regularly from a cronjob.
Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN
-(N = CPU number)
+(N = CPU number).
The directory contains some configurable entries:
-Entries:
-
bankNctl
-(N bank number)
+ (N bank number)
+
64bit Hex bitmask enabling/disabling specific subevents for bank N
When a bit in the bitmask is zero then the respective
subevent will not be reported.
diff --git a/Documentation/x86/x86_64/mm.rst b/Documentation/x86/x86_64/mm.rst
new file mode 100644
index 000000000000..267fc4808945
--- /dev/null
+++ b/Documentation/x86/x86_64/mm.rst
@@ -0,0 +1,161 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Memory Managment
+================
+
+Complete virtual memory map with 4-level page tables
+====================================================
+
+.. note::
+
+ - Negative addresses such as "-23 TB" are absolute addresses in bytes, counted down
+ from the top of the 64-bit address space. It's easier to understand the layout
+ when seen both in absolute addresses and in distance-from-top notation.
+
+ For example 0xffffe90000000000 == -23 TB, it's 23 TB lower than the top of the
+ 64-bit address space (ffffffffffffffff).
+
+ Note that as we get closer to the top of the address space, the notation changes
+ from TB to GB and then MB/KB.
+
+ - "16M TB" might look weird at first sight, but it's an easier to visualize size
+ notation than "16 EB", which few will recognize at first sight as 16 exabytes.
+ It also shows it nicely how incredibly large 64-bit address space is.
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -128 TB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
+ ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI
+ ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
+ ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole
+ ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base)
+ ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole
+ ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base)
+ ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole
+ ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 56-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
+ | | | | vaddr_end for KASLR
+ fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
+ ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
+ ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
+ ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
+ ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
+ ffffffff80000000 |-2048 MB | | |
+ ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
+ ffffffffff000000 | -16 MB | | |
+ FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
+ ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
+ ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
+ __________________|____________|__________________|_________|___________________________________________________________
+
+
+Complete virtual memory map with 5-level page tables
+====================================================
+
+.. note::
+
+ - With 56-bit addresses, user-space memory gets expanded by a factor of 512x,
+ from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PB starting
+ offset and many of the regions expand to support the much larger physical
+ memory supported.
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -64 PB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
+ ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
+ ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
+ ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
+ ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
+ ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole
+ ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base)
+ ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole
+ ffdf000000000000 | -8.25 PB | fffffbffffffffff | ~8 PB | KASAN shadow memory
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ | Identical layout to the 47-bit one from here on:
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
+ | | | | vaddr_end for KASLR
+ fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
+ ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
+ ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
+ ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
+ ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
+ ffffffff80000000 |-2048 MB | | |
+ ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
+ ffffffffff000000 | -16 MB | | |
+ FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
+ ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
+ ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
+ __________________|____________|__________________|_________|___________________________________________________________
+
+Architecture defines a 64-bit virtual address. Implementations can support
+less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
+
+The direct mapping covers all memory in the system up to the highest
+memory address (this means in some cases it can also include PCI memory
+holes).
+
+vmalloc space is lazily synchronized into the different PML4/PML5 pages of
+the processes using the page fault handler, with init_top_pgt as
+reference.
+
+We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
+memory window (this size is arbitrary, it can be raised later if needed).
+The mappings are not part of any other kernel PGD and are only available
+during EFI runtime calls.
+
+Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+Their order is preserved but their base will be offset early at boot time.
+
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
+
+For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
+hole: ffffffffffff4111
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
deleted file mode 100644
index 6cbe652d7a49..000000000000
--- a/Documentation/x86/x86_64/mm.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-====================================================
-Complete virtual memory map with 4-level page tables
-====================================================
-
-Notes:
-
- - Negative addresses such as "-23 TB" are absolute addresses in bytes, counted down
- from the top of the 64-bit address space. It's easier to understand the layout
- when seen both in absolute addresses and in distance-from-top notation.
-
- For example 0xffffe90000000000 == -23 TB, it's 23 TB lower than the top of the
- 64-bit address space (ffffffffffffffff).
-
- Note that as we get closer to the top of the address space, the notation changes
- from TB to GB and then MB/KB.
-
- - "16M TB" might look weird at first sight, but it's an easier to visualize size
- notation than "16 EB", which few will recognize at first sight as 16 exabytes.
- It also shows it nicely how incredibly large 64-bit address space is.
-
-========================================================================================================================
- Start addr | Offset | End addr | Size | VM area description
-========================================================================================================================
- | | | |
- 0000000000000000 | 0 | 00007fffffffffff | 128 TB | user-space virtual memory, different per mm
-__________________|____________|__________________|_________|___________________________________________________________
- | | | |
- 0000800000000000 | +128 TB | ffff7fffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
- | | | | virtual memory addresses up to the -128 TB
- | | | | starting offset of kernel mappings.
-__________________|____________|__________________|_________|___________________________________________________________
- |
- | Kernel-space virtual memory, shared between all processes:
-____________________________________________________________|___________________________________________________________
- | | | |
- ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
- ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI
- ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
- ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole
- ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base)
- ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole
- ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base)
- ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole
- ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory
-__________________|____________|__________________|_________|____________________________________________________________
- |
- | Identical layout to the 56-bit one from here on:
-____________________________________________________________|____________________________________________________________
- | | | |
- fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
- | | | | vaddr_end for KASLR
- fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
- fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
- ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
- ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
- ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
- ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
- ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
- ffffffff80000000 |-2048 MB | | |
- ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
- ffffffffff000000 | -16 MB | | |
- FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
- ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
- ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
-__________________|____________|__________________|_________|___________________________________________________________
-
-
-====================================================
-Complete virtual memory map with 5-level page tables
-====================================================
-
-Notes:
-
- - With 56-bit addresses, user-space memory gets expanded by a factor of 512x,
- from 0.125 PB to 64 PB. All kernel mappings shift down to the -64 PB starting
- offset and many of the regions expand to support the much larger physical
- memory supported.
-
-========================================================================================================================
- Start addr | Offset | End addr | Size | VM area description
-========================================================================================================================
- | | | |
- 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
-__________________|____________|__________________|_________|___________________________________________________________
- | | | |
- 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
- | | | | virtual memory addresses up to the -64 PB
- | | | | starting offset of kernel mappings.
-__________________|____________|__________________|_________|___________________________________________________________
- |
- | Kernel-space virtual memory, shared between all processes:
-____________________________________________________________|___________________________________________________________
- | | | |
- ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
- ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
- ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
- ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
- ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
- ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole
- ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base)
- ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole
- ffdf000000000000 | -8.25 PB | fffffbffffffffff | ~8 PB | KASAN shadow memory
-__________________|____________|__________________|_________|____________________________________________________________
- |
- | Identical layout to the 47-bit one from here on:
-____________________________________________________________|____________________________________________________________
- | | | |
- fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
- | | | | vaddr_end for KASLR
- fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
- fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
- ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
- ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
- ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
- ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
- ffffffff80000000 | -2 GB | ffffffff9fffffff | 512 MB | kernel text mapping, mapped to physical address 0
- ffffffff80000000 |-2048 MB | | |
- ffffffffa0000000 |-1536 MB | fffffffffeffffff | 1520 MB | module mapping space
- ffffffffff000000 | -16 MB | | |
- FIXADDR_START | ~-11 MB | ffffffffff5fffff | ~0.5 MB | kernel-internal fixmap range, variable size and offset
- ffffffffff600000 | -10 MB | ffffffffff600fff | 4 kB | legacy vsyscall ABI
- ffffffffffe00000 | -2 MB | ffffffffffffffff | 2 MB | ... unused hole
-__________________|____________|__________________|_________|___________________________________________________________
-
-Architecture defines a 64-bit virtual address. Implementations can support
-less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are sign extended.
-This causes hole between user space and kernel addresses if you interpret them
-as unsigned.
-
-The direct mapping covers all memory in the system up to the highest
-memory address (this means in some cases it can also include PCI memory
-holes).
-
-vmalloc space is lazily synchronized into the different PML4/PML5 pages of
-the processes using the page fault handler, with init_top_pgt as
-reference.
-
-We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
-memory window (this size is arbitrary, it can be raised later if needed).
-The mappings are not part of any other kernel PGD and are only available
-during EFI runtime calls.
-
-Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
-physical memory, vmalloc/ioremap space and virtual memory map are randomized.
-Their order is preserved but their base will be offset early at boot time.
-
-Be very careful vs. KASLR when changing anything here. The KASLR address
-range must not overlap with anything except the KASAN shadow area, which is
-correct as KASAN disables KASLR.
-
-For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
-hole: ffffffffffff4111
diff --git a/Documentation/x86/x86_64/uefi.txt b/Documentation/x86/x86_64/uefi.rst
index a5e2b4fdb170..88c3ba32546f 100644
--- a/Documentation/x86/x86_64/uefi.txt
+++ b/Documentation/x86/x86_64/uefi.rst
@@ -1,5 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
General note on [U]EFI x86_64 support
--------------------------------------
+=====================================
The nomenclature EFI and UEFI are used interchangeably in this document.
@@ -14,29 +17,42 @@ with EFI firmware and specifications are listed below.
3. x86_64 platform with EFI/UEFI firmware.
-Mechanics:
+Mechanics
---------
-- Build the kernel with the following configuration.
+
+- Build the kernel with the following configuration::
+
CONFIG_FB_EFI=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+
If EFI runtime services are expected, the following configuration should
- be selected.
+ be selected::
+
CONFIG_EFI=y
CONFIG_EFI_VARS=y or m # optional
+
- Create a VFAT partition on the disk
- Copy the following to the VFAT partition:
+
elilo bootloader with x86_64 support, elilo configuration file,
kernel image built in first step and corresponding
initrd. Instructions on building elilo and its dependencies
can be found in the elilo sourceforge project.
+
- Boot to EFI shell and invoke elilo choosing the kernel image built
in first step.
- If some or all EFI runtime services don't work, you can try following
kernel command line parameters to turn off some or all EFI runtime
services.
- noefi turn off all EFI runtime services
- reboot_type=k turn off EFI reboot runtime service
+
+ noefi
+ turn off all EFI runtime services
+ reboot_type=k
+ turn off EFI reboot runtime service
+
- If the EFI memory map has additional entries not in the E820 map,
you can include those entries in the kernels memory map of available
physical RAM by using the following kernel command line parameter.
- add_efi_memmap include EFI memory map of available physical RAM
+
+ add_efi_memmap
+ include EFI memory map of available physical RAM
diff --git a/Documentation/x86/zero-page.rst b/Documentation/x86/zero-page.rst
new file mode 100644
index 000000000000..f088f5881666
--- /dev/null
+++ b/Documentation/x86/zero-page.rst
@@ -0,0 +1,45 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========
+Zero Page
+=========
+The additional fields in struct boot_params as a part of 32-bit boot
+protocol of kernel. These should be filled by bootloader or 16-bit
+real-mode setup code of the kernel. References/settings to it mainly
+are in::
+
+ arch/x86/include/uapi/asm/bootparam.h
+
+=========== ===== ======================= =================================================
+Offset/Size Proto Name Meaning
+
+000/040 ALL screen_info Text mode or frame buffer information
+ (struct screen_info)
+040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info)
+058/008 ALL tboot_addr Physical address of tboot shared page
+060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
+ (struct ist_info)
+080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
+090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
+0A0/010 ALL sys_desc_table System description table (struct sys_desc_table),
+ OBSOLETE!!
+0B0/010 ALL olpc_ofw_header OLPC's OpenFirmware CIF and friends
+0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
+0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
+0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
+140/080 ALL edid_info Video mode setup (struct edid_info)
+1C0/020 ALL efi_info EFI 32 information (struct efi_info)
+1E0/004 ALL alt_mem_k Alternative mem check, in KB
+1E4/004 ALL scratch Scratch field for the kernel setup code
+1E8/001 ALL e820_entries Number of entries in e820_table (below)
+1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
+1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
+ (below)
+1EB/001 ALL kbd_status Numlock is enabled
+1EC/001 ALL secure_boot Secure boot is enabled in the firmware
+1EF/001 ALL sentinel Used to detect broken bootloaders
+290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
+2D0/A00 ALL e820_table E820 memory map table
+ (array of struct e820_entry)
+D00/1EC ALL eddbuf EDD data (array of struct edd_info)
+=========== ===== ======================= =================================================
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
deleted file mode 100644
index 68aed077f7b6..000000000000
--- a/Documentation/x86/zero-page.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-The additional fields in struct boot_params as a part of 32-bit boot
-protocol of kernel. These should be filled by bootloader or 16-bit
-real-mode setup code of the kernel. References/settings to it mainly
-are in:
-
- arch/x86/include/uapi/asm/bootparam.h
-
-
-Offset Proto Name Meaning
-/Size
-
-000/040 ALL screen_info Text mode or frame buffer information
- (struct screen_info)
-040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info)
-058/008 ALL tboot_addr Physical address of tboot shared page
-060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
- (struct ist_info)
-080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
-090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
-0A0/010 ALL sys_desc_table System description table (struct sys_desc_table),
- OBSOLETE!!
-0B0/010 ALL olpc_ofw_header OLPC's OpenFirmware CIF and friends
-0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
-0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
-0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
-140/080 ALL edid_info Video mode setup (struct edid_info)
-1C0/020 ALL efi_info EFI 32 information (struct efi_info)
-1E0/004 ALL alt_mem_k Alternative mem check, in KB
-1E4/004 ALL scratch Scratch field for the kernel setup code
-1E8/001 ALL e820_entries Number of entries in e820_table (below)
-1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
-1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
- (below)
-1EB/001 ALL kbd_status Numlock is enabled
-1EC/001 ALL secure_boot Secure boot is enabled in the firmware
-1EF/001 ALL sentinel Used to detect broken bootloaders
-290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
-2D0/A00 ALL e820_table E820 memory map table
- (array of struct e820_entry)
-D00/1EC ALL eddbuf EDD data (array of struct edd_info)
diff --git a/Documentation/xilinx/eemi.txt b/Documentation/xilinx/eemi.txt
index 0ab686c173be..5f39b4ffdcd4 100644
--- a/Documentation/xilinx/eemi.txt
+++ b/Documentation/xilinx/eemi.txt
@@ -41,8 +41,8 @@ Example of EEMI ops usage:
int ret;
eemi_ops = zynqmp_pm_get_eemi_ops();
- if (!eemi_ops)
- return -ENXIO;
+ if (IS_ERR(eemi_ops))
+ return PTR_ERR(eemi_ops);
ret = eemi_ops->query_data(qdata, ret_payload);