diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d401577b5a6a..f2875484795a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -446,6 +446,9 @@ arm64.nobti [ARM64] Unconditionally disable Branch Target Identification support + arm64.nogcs [ARM64] Unconditionally disable Guarded Control Stack + support + arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory Set instructions support diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index 8502bc174640..a58bd3f7e190 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -26,3 +26,4 @@ Performance monitor support meson-ddr-pmu cxl ampere_cspmu + mrvl-pem-pmu diff --git a/Documentation/admin-guide/perf/mrvl-pem-pmu.rst b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst new file mode 100644 index 000000000000..c39007149b97 --- /dev/null +++ b/Documentation/admin-guide/perf/mrvl-pem-pmu.rst @@ -0,0 +1,56 @@ +================================================================= +Marvell Odyssey PEM Performance Monitoring Unit (PMU UNCORE) +================================================================= + +The PCI Express Interface Units(PEM) are associated with a corresponding +monitoring unit. This includes performance counters to track various +characteristics of the data that is transmitted over the PCIe link. + +The counters track inbound and outbound transactions which +includes separate counters for posted/non-posted/completion TLPs. +Also, inbound and outbound memory read requests along with their +latencies can also be monitored. Address Translation Services(ATS)events +such as ATS Translation, ATS Page Request, ATS Invalidation along with +their corresponding latencies are also tracked. + +There are separate 64 bit counters to measure posted/non-posted/completion +tlps in inbound and outbound transactions. ATS events are measured by +different counters. + +The PMU driver exposes the available events and format options under sysfs, +/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/events/ +/sys/bus/event_source/devices/mrvl_pcie_rc_pmu_<>/format/ + +Examples:: + + # perf list | grep mrvl_pcie_rc_pmu + mrvl_pcie_rc_pmu_<>/ats_inv/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ats_inv_latency/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ats_pri/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ats_pri_latency/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ats_trans/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ats_trans_latency/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_inflight/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_reads/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ebus/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_req_no_ro_ncb/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_cpl_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_cpl_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_npr/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_dwords_pr/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_npr/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ib_tlp_pr/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_inflight_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_merges_cpl_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_merges_npr_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_merges_pr_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_reads_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_cpl_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_cpl_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_npr_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_dwords_pr_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_npr_partid/ [Kernel PMU event] + mrvl_pcie_rc_pmu_<>/ob_tlp_pr_partid/ [Kernel PMU event] + + + # perf stat -e ib_inflight,ib_reads,ib_req_no_ro_ebus,ib_req_no_ro_ncb diff --git a/Documentation/arch/arm64/arm-cca.rst b/Documentation/arch/arm64/arm-cca.rst new file mode 100644 index 000000000000..c48b7d4ab6bd --- /dev/null +++ b/Documentation/arch/arm64/arm-cca.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +Arm Confidential Compute Architecture +===================================== + +Arm systems that support the Realm Management Extension (RME) contain +hardware to allow a VM guest to be run in a way which protects the code +and data of the guest from the hypervisor. It extends the older "two +world" model (Normal and Secure World) into four worlds: Normal, Secure, +Root and Realm. Linux can then also be run as a guest to a monitor +running in the Realm world. + +The monitor running in the Realm world is known as the Realm Management +Monitor (RMM) and implements the Realm Management Monitor +specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs +in EL2 and manages the stage 2 page tables etc of the guests running in +Realm world), however much of the control is handled by a hypervisor +running in the Normal World. The Normal World hypervisor uses the Realm +Management Interface (RMI) defined by the RMM specification to request +the RMM to perform operations (e.g. mapping memory or executing a vCPU). + +The RMM defines an environment for guests where the address space (IPA) +is split into two. The lower half is protected - any memory that is +mapped in this half cannot be seen by the Normal World and the RMM +restricts what operations the Normal World can perform on this memory +(e.g. the Normal World cannot replace pages in this region without the +guest's cooperation). The upper half is shared, the Normal World is free +to make changes to the pages in this region, and is able to emulate MMIO +devices in this region too. + +A guest running in a Realm may also communicate with the RMM using the +Realm Services Interface (RSI) to request changes in its environment or +to perform attestation about its environment. In particular it may +request that areas of the protected address space are transitioned +between 'RAM' and 'EMPTY' (in either direction). This allows a Realm +guest to give up memory to be returned to the Normal World, or to +request new memory from the Normal World. Without an explicit request +from the Realm guest the RMM will otherwise prevent the Normal World +from making these changes. + +Linux as a Realm Guest +---------------------- + +To run Linux as a guest within a Realm, the following must be provided +either by the VMM or by a `boot loader` run in the Realm before Linux: + + * All protected RAM described to Linux (by DT or ACPI) must be marked + RIPAS RAM before handing control over to Linux. + + * MMIO devices must be either unprotected (e.g. emulated by the Normal + World) or marked RIPAS DEV. + + * MMIO devices emulated by the Normal World and used very early in boot + (specifically earlycon) must be specified in the upper half of IPA. + For earlycon this can be done by specifying the address on the + command line, e.g. with an IPA size of 33 bits and the base address + of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000`` + + * Linux will use bounce buffers for communicating with unprotected + devices. It will transition some protected memory to RIPAS EMPTY and + expect to be able to access unprotected pages at the same IPA address + but with the highest valid IPA bit set. The expectation is that the + VMM will remove the physical pages from the protected mapping and + provide those pages as unprotected pages. + +References +---------- +[1] https://developer.arm.com/documentation/den0137/ diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index b57776a68f15..3278fb4bf219 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of the RAM in the machine, or any other method the boot loader designer sees fit.) +For Arm Confidential Compute Realms this includes ensuring that all +protected RAM has a Realm IPA state (RIPAS) of "RAM". + 2. Setup the device tree ------------------------- @@ -385,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met: - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1. + - HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor + must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`. + For CPUs with the Extended Translation Control Register feature (FEAT_TCR2): - If EL3 is present: @@ -411,6 +417,38 @@ Before jumping into the kernel, the following conditions must be met: - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1. + - For CPUs with Guarded Control Stacks (FEAT_GCS): + + - GCSCR_EL1 must be initialised to 0. + + - GCSCRE0_EL1 must be initialised to 0. + + - If EL3 is present: + + - SCR_EL3.GCSEn (bit 39) must be initialised to 0b1. + + - If EL2 is present: + + - GCSCR_EL2 must be initialised to 0. + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.GCSEn must be initialised to 0b1. + + - HFGITR_EL2.nGCSEPP (bit 59) must be initialised to 0b1. + + - HFGITR_EL2.nGCSSTR_EL1 (bit 58) must be initialised to 0b1. + + - HFGITR_EL2.nGCSPUSHM_EL1 (bit 57) must be initialised to 0b1. + + - HFGRTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1. + + - HFGRTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1. + + - HFGWTR_EL2.nGCS_EL1 (bit 53) must be initialised to 0b1. + + - HFGWTR_EL2.nGCS_EL0 (bit 52) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 694f67fa07d1..2ff922a406ad 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The kernel exposes the presence of these features to userspace through a set of flags called hwcaps, exposed in the auxiliary vector. -Userspace software can test for features by acquiring the AT_HWCAP or -AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant -flags are set, e.g.:: +Userspace software can test for features by acquiring the AT_HWCAP, +AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing +whether the relevant flags are set, e.g.:: bool floating_point_is_present(void) { @@ -170,6 +170,10 @@ HWCAP_PACG ID_AA64ISAR1_EL1.GPI == 0b0001, as described by Documentation/arch/arm64/pointer-authentication.rst. +HWCAP_GCS + Functionality implied by ID_AA64PFR1_EL1.GCS == 0b1, as + described by Documentation/arch/arm64/gcs.rst. + HWCAP2_DCPODP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst new file mode 100644 index 000000000000..1f65a3193e77 --- /dev/null +++ b/Documentation/arch/arm64/gcs.rst @@ -0,0 +1,227 @@ +=============================================== +Guarded Control Stack support for AArch64 Linux +=============================================== + +This document outlines briefly the interface provided to userspace by Linux in +order to support use of the ARM Guarded Control Stack (GCS) feature. + +This is an outline of the most important features and issues only and not +intended to be exhaustive. + + + +1. General +----------- + +* GCS is an architecture feature intended to provide greater protection + against return oriented programming (ROP) attacks and to simplify the + implementation of features that need to collect stack traces such as + profiling. + +* When GCS is enabled a separate guarded control stack is maintained by the + PE which is writeable only through specific GCS operations. This + stores the call stack only, when a procedure call instruction is + performed the current PC is pushed onto the GCS and on RET the + address in the LR is verified against that on the top of the GCS. + +* When active the current GCS pointer is stored in the system register + GCSPR_EL0. This is readable by userspace but can only be updated + via specific GCS instructions. + +* The architecture provides instructions for switching between guarded + control stacks with checks to ensure that the new stack is a valid + target for switching. + +* The functionality of GCS is similar to that provided by the x86 Shadow + Stack feature, due to sharing of userspace interfaces the ABI refers to + shadow stacks rather than GCS. + +* Support for GCS is reported to userspace via HWCAP_GCS in the aux vector + AT_HWCAP2 entry. + +* GCS is enabled per thread. While there is support for disabling GCS + at runtime this should be done with great care. + +* GCS memory access faults are reported as normal memory access faults. + +* GCS specific errors (those reported with EC 0x2d) will be reported as + SIGSEGV with a si_code of SEGV_CPERR (control protection error). + +* GCS is supported only for AArch64. + +* On systems where GCS is supported GCSPR_EL0 is always readable by EL0 + regardless of the GCS configuration for the thread. + +* The architecture supports enabling GCS without verifying that return values + in LR match those in the GCS, the LR will be ignored. This is not supported + by Linux. + + + +2. Enabling and disabling Guarded Control Stacks +------------------------------------------------- + +* GCS is enabled and disabled for a thread via the PR_SET_SHADOW_STACK_STATUS + prctl(), this takes a single flags argument specifying which GCS features + should be used. + +* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack + and enables GCS for the thread, enabling the functionality controlled by + GCSCRE0_EL1.{nTR, RVCHKEN, PCRSEL}. + +* When set the PR_SHADOW_STACK_PUSH flag enables the functionality controlled + by GCSCRE0_EL1.PUSHMEn, allowing explicit GCS pushes. + +* When set the PR_SHADOW_STACK_WRITE flag enables the functionality controlled + by GCSCRE0_EL1.STREn, allowing explicit stores to the Guarded Control Stack. + +* Any unknown flags will cause PR_SET_SHADOW_STACK_STATUS to return -EINVAL. + +* PR_LOCK_SHADOW_STACK_STATUS is passed a bitmask of features with the same + values as used for PR_SET_SHADOW_STACK_STATUS. Any future changes to the + status of the specified GCS mode bits will be rejected. + +* PR_LOCK_SHADOW_STACK_STATUS allows any bit to be locked, this allows + userspace to prevent changes to any future features. + +* There is no support for a process to remove a lock that has been set for + it. + +* PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS affect only the + thread that called them, any other running threads will be unaffected. + +* New threads inherit the GCS configuration of the thread that created them. + +* GCS is disabled on exec(). + +* The current GCS configuration for a thread may be read with the + PR_GET_SHADOW_STACK_STATUS prctl(), this returns the same flags that + are passed to PR_SET_SHADOW_STACK_STATUS. + +* If GCS is disabled for a thread after having previously been enabled then + the stack will remain allocated for the lifetime of the thread. At present + any attempt to reenable GCS for the thread will be rejected, this may be + revisited in future. + +* It should be noted that since enabling GCS will result in GCS becoming + active immediately it is not normally possible to return from the function + that invoked the prctl() that enabled GCS. It is expected that the normal + usage will be that GCS is enabled very early in execution of a program. + + + +3. Allocation of Guarded Control Stacks +---------------------------------------- + +* When GCS is enabled for a thread a new Guarded Control Stack will be + allocated for it of half the standard stack size or 2 gigabytes, + whichever is smaller. + +* When a new thread is created by a thread which has GCS enabled then a + new Guarded Control Stack will be allocated for the new thread with + half the size of the standard stack. + +* When a stack is allocated by enabling GCS or during thread creation then + the top 8 bytes of the stack will be initialised to 0 and GCSPR_EL0 will + be set to point to the address of this 0 value, this can be used to + detect the top of the stack. + +* Additional Guarded Control Stacks can be allocated using the + map_shadow_stack() system call. + +* Stacks allocated using map_shadow_stack() can optionally have an end of + stack marker and cap placed at the top of the stack. If the flag + SHADOW_STACK_SET_TOKEN is specified a cap will be placed on the stack, + if SHADOW_STACK_SET_MARKER is not specified the cap will be the top 8 + bytes of the stack and if it is specified then the cap will be the next + 8 bytes. While specifying just SHADOW_STACK_SET_MARKER by itself is + valid since the marker is all bits 0 it has no observable effect. + +* Stacks allocated using map_shadow_stack() must have a size which is a + multiple of 8 bytes larger than 8 bytes and must be 8 bytes aligned. + +* An address can be specified to map_shadow_stack(), if one is provided then + it must be aligned to a page boundary. + +* When a thread is freed the Guarded Control Stack initially allocated for + that thread will be freed. Note carefully that if the stack has been + switched this may not be the stack currently in use by the thread. + + +4. Signal handling +-------------------- + +* A new signal frame record gcs_context encodes the current GCS mode and + pointer for the interrupted context on signal delivery. This will always + be present on systems that support GCS. + +* The record contains a flag field which reports the current GCS configuration + for the interrupted context as PR_GET_SHADOW_STACK_STATUS would. + +* The signal handler is run with the same GCS configuration as the interrupted + context. + +* When GCS is enabled for the interrupted thread a signal handling specific + GCS cap token will be written to the GCS, this is an architectural GCS cap + with the token type (bits 0..11) all clear. The GCSPR_EL0 reported in the + signal frame will point to this cap token. + +* The signal handler will use the same GCS as the interrupted context. + +* When GCS is enabled on signal entry a frame with the address of the signal + return handler will be pushed onto the GCS, allowing return from the signal + handler via RET as normal. This will not be reported in the gcs_context in + the signal frame. + + +5. Signal return +----------------- + +When returning from a signal handler: + +* If there is a gcs_context record in the signal frame then the GCS flags + and GCSPR_EL0 will be restored from that context prior to further + validation. + +* If there is no gcs_context record in the signal frame then the GCS + configuration will be unchanged. + +* If GCS is enabled on return from a signal handler then GCSPR_EL0 must + point to a valid GCS signal cap record, this will be popped from the + GCS prior to signal return. + +* If the GCS configuration is locked when returning from a signal then any + attempt to change the GCS configuration will be treated as an error. This + is true even if GCS was not enabled prior to signal entry. + +* GCS may be disabled via signal return but any attempt to enable GCS via + signal return will be rejected. + + +6. ptrace extensions +--------------------- + +* A new regset NT_ARM_GCS is defined for use with PTRACE_GETREGSET and + PTRACE_SETREGSET. + +* The GCS mode, including enable and disable, may be configured via ptrace. + If GCS is enabled via ptrace no new GCS will be allocated for the thread. + +* Configuration via ptrace ignores locking of GCS mode bits. + + +7. ELF coredump extensions +--------------------------- + +* NT_ARM_GCS notes will be added to each coredump for each thread of the + dumped process. The contents will be equivalent to the data that would + have been read if a PTRACE_GETREGSET of the corresponding type were + executed for each thread when the coredump was generated. + + + +8. /proc extensions +-------------------- + +* Guarded Control Stack pages will include "ss" in their VmFlags in + /proc//smaps. diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index 78544de0a8a9..6a012c98bdcd 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -10,16 +10,19 @@ ARM64 Architecture acpi_object_usage amu arm-acpi + arm-cca asymmetric-32bit booting cpu-feature-registers cpu-hotplug elf_hwcaps + gcs hugetlbpage kdump legacy_instructions memory memory-tagging-extension + mops perf pointer-authentication ptdump diff --git a/Documentation/arch/arm64/mops.rst b/Documentation/arch/arm64/mops.rst new file mode 100644 index 000000000000..2ef5b147f8dc --- /dev/null +++ b/Documentation/arch/arm64/mops.rst @@ -0,0 +1,44 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================== +Memory copy/set instructions (MOPS) +=================================== + +A MOPS memory copy/set operation consists of three consecutive CPY* or SET* +instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE). + +A main or epilogue instruction can take a MOPS exception for various reasons, +for example when a task is migrated to a CPU with a different MOPS +implementation, or when the instruction's alignment and size requirements are +not met. The software exception handler is then expected to reset the registers +and restart execution from the prologue instruction. Normally this is handled +by the kernel. + +For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in +the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM). + +.. _arm64_mops_hyp: + +Hypervisor requirements +----------------------- + +A hypervisor running a Linux guest must handle all MOPS exceptions from the +guest kernel, as Linux may not be able to handle the exception at all times. +For example, a MOPS exception can be taken when the hypervisor migrates a vCPU +to another physical CPU with a different MOPS implementation. + +To do this, the hypervisor must: + + - Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor. + + - Have an exception handler that implements the algorithm from the Arm ARM + rules CNTMJ and MWFQH. + + - Set the guest's PSTATE.SS to 0 in the exception handler, to handle a + potential step of the current instruction. + + Note: Clearing PSTATE.SS is needed so that a single step exception is taken + on the next instruction (the prologue instruction). Otherwise prologue + would get silently stepped over and the single step exception taken on the + main instruction. Note that if the guest instruction is not being stepped + then clearing PSTATE.SS has no effect. diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index be317d457417..b2fa01f85cb5 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -346,6 +346,10 @@ The regset data starts with struct user_za_header, containing: * Writes to NT_ARM_ZT will set PSTATE.ZA to 1. +* If any register data is provided along with SME_PT_VL_ONEXEC then the + registers data will be interpreted with the current vector length, not + the vector length configured for use on exec. + 8. ELF coredump extensions --------------------------- diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst index 8d8837fc39ec..28152492c29c 100644 --- a/Documentation/arch/arm64/sve.rst +++ b/Documentation/arch/arm64/sve.rst @@ -402,6 +402,10 @@ The regset data starts with struct user_sve_header, containing: streaming mode and any SETREGSET of NT_ARM_SSVE will enter streaming mode if the target was not in streaming mode. +* If any register data is provided along with SVE_PT_VL_ONEXEC then the + registers data will be interpreted with the current vector length, not + the vector length configured for use on exec. + * The effect of writing a partial, incomplete payload is unspecified. diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 528544d0a161..a148ff54f2b8 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -74,6 +74,7 @@ properties: - qcom,krait-pmu - qcom,scorpion-pmu - qcom,scorpion-mp-pmu + - samsung,mongoose-pmu interrupts: # Don't know how many CPUs, so no constraints to specify diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml index 37e8b98f2cdc..8597ea625edb 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml @@ -31,7 +31,9 @@ properties: - const: fsl,imx8dxl-ddr-pmu - const: fsl,imx8-ddr-pmu - items: - - const: fsl,imx95-ddr-pmu + - enum: + - fsl,imx91-ddr-pmu + - fsl,imx95-ddr-pmu - const: fsl,imx93-ddr-pmu reg: diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index e834779d9611..6a882c57a7e7 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -579,7 +579,7 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking - ss shadow stack page + ss shadow/guarded control stack page sl sealed == ======================================= diff --git a/MAINTAINERS b/MAINTAINERS index c58ec9f1ec2d..82161bc70b51 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13816,6 +13816,12 @@ S: Supported F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ +MARVELL PEM PMU DRIVER +M: Linu Cherian +M: Gowthami Thiagarajan +S: Supported +F: drivers/perf/marvell_pem_pmu.c + MARVELL PRESTERA ETHERNET SWITCH DRIVER M: Taras Chornyi S: Supported diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index f63ba8986b24..2ec0e5e83fc9 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -212,6 +212,8 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, PMUSERENR); } +static inline void write_pmuacr(u64 val) {} + static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) @@ -231,6 +233,7 @@ static inline void kvm_vcpu_pmu_resync_el0(void) {} #define ARMV8_PMU_DFR_VER_V3P1 0x4 #define ARMV8_PMU_DFR_VER_V3P4 0x5 #define ARMV8_PMU_DFR_VER_V3P5 0x6 +#define ARMV8_PMU_DFR_VER_V3P9 0x9 #define ARMV8_PMU_DFR_VER_IMP_DEF 0xF static inline bool pmuv3_implemented(int pmuver) @@ -249,6 +252,11 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ARMV8_PMU_DFR_VER_V3P5; } +static inline bool is_pmuv3p9(int pmuver) +{ + return pmuver >= ARMV8_PMU_DFR_VER_V3P9; +} + static inline u64 read_pmceid0(void) { u64 val = read_sysreg(PMCEID0); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 22f8a7bca6d2..d743737bf9ce 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CC_PLATFORM select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -38,12 +39,15 @@ config ARM64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_HW_PTE_YOUNG select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY + select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -2159,6 +2163,9 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu # "ARMv8.7 architectural features" +config AS_HAS_MOPS + def_bool $(as-instr,.arch_extension mops) + menu "ARMv8.9 architectural features" config ARM64_POE @@ -2180,8 +2187,44 @@ config ARCH_PKEY_BITS int default 3 +config ARM64_HAFT + bool "Support for Hardware managed Access Flag for Table Descriptors" + depends on ARM64_HW_AFDBM + default y + help + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access + Flag for Table descriptors. When enabled an architectural executed + memory access will update the Access Flag in each Table descriptor + which is accessed during the translation table walk and for which + the Access Flag is 0. The Access Flag of the Table descriptor use + the same bit of PTE_AF. + + The feature will only be enabled if all the CPUs in the system + support this feature. If unsure, say Y. + endmenu # "ARMv8.9 architectural features" +menu "v9.4 architectural features" + +config ARM64_GCS + bool "Enable support for Guarded Control Stack (GCS)" + default y + select ARCH_HAS_USER_SHADOW_STACK + select ARCH_USES_HIGH_VMA_FLAGS + depends on !UPROBES + help + Guarded Control Stack (GCS) provides support for a separate + stack with restricted access which contains only return + addresses. This can be used to harden against some attacks + by comparing return address used by the program with what is + stored in the GCS, and may also be used to efficiently obtain + the call stack for applications such as profiling. + + The feature is detected at runtime, and will remain disabled + if the system does not implement the feature. + +endmenu # "v9.4 architectural features" + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h index 468a049bc63b..8a777dec8d88 100644 --- a/arch/arm64/include/asm/arm_pmuv3.h +++ b/arch/arm64/include/asm/arm_pmuv3.h @@ -152,6 +152,11 @@ static inline void write_pmuserenr(u32 val) write_sysreg(val, pmuserenr_el0); } +static inline void write_pmuacr(u64 val) +{ + write_sysreg_s(val, SYS_PMUACR_EL1); +} + static inline u64 read_pmceid0(void) { return read_sysreg(pmceid0_el0); @@ -178,4 +183,9 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5; } +static inline bool is_pmuv3p9(int pmuver) +{ + return pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P9; +} + #endif diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bc0b0d75acef..3d8d534a7a77 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -248,13 +248,6 @@ alternative_endif ldr \dst, [\dst, \tmp] .endm -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - /* * read_ctr - read CTR_EL0. If the system has mismatched register fields, * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index a6e5b07b64fd..a08a1212ffbb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -42,6 +42,8 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_BTI); case ARM64_HAS_TLB_RANGE: return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); + case ARM64_HAS_S1POE: + return IS_ENABLED(CONFIG_ARM64_POE); case ARM64_UNMAP_KERNEL_AT_EL0: return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); case ARM64_WORKAROUND_843419: diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 3d261cc123c1..3d63c20ccefc 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include #include -#define MAX_CPU_FEATURES 128 +#define MAX_CPU_FEATURES 192 #define cpu_feature(x) KERNEL_HWCAP_ ## x #define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 @@ -438,6 +438,7 @@ void cpu_set_feature(unsigned int num); bool cpu_have_feature(unsigned int num); unsigned long cpu_get_elf_hwcap(void); unsigned long cpu_get_elf_hwcap2(void); +unsigned long cpu_get_elf_hwcap3(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) @@ -834,8 +835,19 @@ static inline bool system_supports_lpa2(void) static inline bool system_supports_poe(void) { - return IS_ENABLED(CONFIG_ARM64_POE) && - alternative_has_cap_unlikely(ARM64_HAS_S1POE); + return alternative_has_cap_unlikely(ARM64_HAS_S1POE); +} + +static inline bool system_supports_gcs(void) +{ + return IS_ENABLED(CONFIG_ARM64_GCS) && + alternative_has_cap_unlikely(ARM64_HAS_GCS); +} + +static inline bool system_supports_haft(void) +{ + return IS_ENABLED(CONFIG_ARM64_HAFT) && + cpus_have_final_cap(ARM64_HAFT); } int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2f..fbb5c99eb2f9 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -132,7 +132,7 @@ static inline void local_daif_inherit(struct pt_regs *regs) trace_hardirqs_on(); if (system_uses_irq_prio_masking()) - gic_write_pmr(regs->pmr_save); + gic_write_pmr(regs->pmr); /* * We can't use local_daif_restore(regs->pstate) here as diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 13d437bcbf58..8f6ba31b8658 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -105,6 +105,7 @@ void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); int kernel_active_single_step(void); void kernel_rewind_single_step(struct pt_regs *regs); +void kernel_fastforward_single_step(struct pt_regs *regs); #ifdef CONFIG_HAVE_HW_BREAKPOINT int reinstall_suspended_bps(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index e0ffdf13a18b..27086a81eae3 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -27,6 +27,14 @@ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS + + /* Enable GCS if supported */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_GCSEn + +.Lset_hcrx_\@: msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: .endm @@ -200,6 +208,16 @@ orr x0, x0, #HFGxTR_EL2_nPOR_EL0 .Lskip_poe_fgt_\@: + /* GCS depends on PIE so we don't check it if PIE is absent */ + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable traps of access to GCS registers at EL0 and EL1 */ + orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK + orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK + +.Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 msr_s SYS_HFGITR_EL2, xzr @@ -215,6 +233,17 @@ .Lskip_fgt_\@: .endm +.macro __init_el2_gcs + mrs_s x1, SYS_ID_AA64PFR1_EL1 + ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 + cbz x1, .Lskip_gcs_\@ + + /* Ensure GCS is not enabled when we start trying to do BLs */ + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr +.Lskip_gcs_\@: +.endm + .macro __init_el2_nvhe_prepare_eret mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 @@ -240,6 +269,7 @@ __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt + __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index da6d2c1c0b03..d1b1a33f9a8b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -51,7 +51,8 @@ #define ESR_ELx_EC_FP_EXC32 UL(0x28) /* Unallocated EC: 0x29 - 0x2B */ #define ESR_ELx_EC_FP_EXC64 UL(0x2C) -/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_GCS UL(0x2D) +/* Unallocated EC: 0x2E */ #define ESR_ELx_EC_SERROR UL(0x2F) #define ESR_ELx_EC_BREAKPT_LOW UL(0x30) #define ESR_ELx_EC_BREAKPT_CUR UL(0x31) @@ -386,6 +387,31 @@ #define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) #define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) +/* ISS field definitions for GCS */ +#define ESR_ELx_ExType_SHIFT (20) +#define ESR_ELx_ExType_MASK GENMASK(23, 20) +#define ESR_ELx_Raddr_SHIFT (10) +#define ESR_ELx_Raddr_MASK GENMASK(14, 10) +#define ESR_ELx_Rn_SHIFT (5) +#define ESR_ELx_Rn_MASK GENMASK(9, 5) +#define ESR_ELx_Rvalue_SHIFT 5 +#define ESR_ELx_Rvalue_MASK GENMASK(9, 5) +#define ESR_ELx_IT_SHIFT (0) +#define ESR_ELx_IT_MASK GENMASK(4, 0) + +#define ESR_ELx_ExType_DATA_CHECK 0 +#define ESR_ELx_ExType_EXLOCK 1 +#define ESR_ELx_ExType_STR 2 + +#define ESR_ELx_IT_RET 0 +#define ESR_ELx_IT_GCSPOPM 1 +#define ESR_ELx_IT_RET_KEYA 2 +#define ESR_ELx_IT_RET_KEYB 3 +#define ESR_ELx_IT_GCSSS1 4 +#define ESR_ELx_IT_GCSSS2 5 +#define ESR_ELx_IT_GCSPOPCX 6 +#define ESR_ELx_IT_GCSPOPX 7 + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index f296662590c7..d48fc16584cd 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -57,6 +57,8 @@ void do_el0_undef(struct pt_regs *regs, unsigned long esr); void do_el1_undef(struct pt_regs *regs, unsigned long esr); void do_el0_bti(struct pt_regs *regs); void do_el1_bti(struct pt_regs *regs, unsigned long esr); +void do_el0_gcs(struct pt_regs *regs, unsigned long esr); +void do_el1_gcs(struct pt_regs *regs, unsigned long esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr, struct pt_regs *regs); void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs); @@ -73,6 +75,7 @@ void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); +void do_el1_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_signal(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/gcs.h b/arch/arm64/include/asm/gcs.h new file mode 100644 index 000000000000..f50660603ecf --- /dev/null +++ b/arch/arm64/include/asm/gcs.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ +#ifndef __ASM_GCS_H +#define __ASM_GCS_H + +#include +#include + +struct kernel_clone_args; +struct ksignal; + +static inline void gcsb_dsync(void) +{ + asm volatile(".inst 0xd503227f" : : : "memory"); +} + +static inline void gcsstr(u64 *addr, u64 val) +{ + register u64 *_addr __asm__ ("x0") = addr; + register long _val __asm__ ("x1") = val; + + /* GCSSTTR x1, x0 */ + asm volatile( + ".inst 0xd91f1c01\n" + : + : "rZ" (_val), "r" (_addr) + : "memory"); +} + +static inline void gcsss1(u64 Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline u64 gcsss2(void) +{ + u64 Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK \ + (PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH) + +#ifdef CONFIG_ARM64_GCS + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE; +} + +void gcs_set_el0_mode(struct task_struct *task); +void gcs_free(struct task_struct *task); +void gcs_preserve_current_state(void); +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); + +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + unsigned long cur_val = task->thread.gcs_el0_mode; + + cur_val &= task->thread.gcs_el0_locked; + new_val &= task->thread.gcs_el0_locked; + + if (cur_val != new_val) + return -EBUSY; + + return 0; +} + +#else + +static inline bool task_gcs_el0_enabled(struct task_struct *task) +{ + return false; +} + +static inline void gcs_set_el0_mode(struct task_struct *task) { } +static inline void gcs_free(struct task_struct *task) { } +static inline void gcs_preserve_current_state(void) { } +static inline unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + return -ENOTSUPP; +} +static inline int gcs_check_locked(struct task_struct *task, + unsigned long new_val) +{ + return 0; +} + +#endif + +#endif diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 293f880865e8..c6dff3e69539 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -11,6 +11,7 @@ #define __ASM_HUGETLB_H #include +#include #include #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION @@ -21,6 +22,13 @@ extern bool arch_hugetlb_migration_supported(struct hstate *h); static inline void arch_clear_hugetlb_flags(struct folio *folio) { clear_bit(PG_dcache_clean, &folio->flags); + +#ifdef CONFIG_ARM64_MTE + if (system_supports_mte()) { + clear_bit(PG_mte_tagged, &folio->flags); + clear_bit(PG_mte_lock, &folio->flags); + } +#endif } #define arch_clear_hugetlb_flags arch_clear_hugetlb_flags diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index a775adddecf2..2b6c61c608e2 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -92,6 +92,7 @@ #define KERNEL_HWCAP_SB __khwcap_feature(SB) #define KERNEL_HWCAP_PACA __khwcap_feature(PACA) #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) +#define KERNEL_HWCAP_GCS __khwcap_feature(GCS) #define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) @@ -159,17 +160,21 @@ #define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2) #define KERNEL_HWCAP_POE __khwcap2_feature(POE) +#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) + /* * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. */ #define ELF_HWCAP cpu_get_elf_hwcap() #define ELF_HWCAP2 cpu_get_elf_hwcap2() +#define ELF_HWCAP3 cpu_get_elf_hwcap3() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) #define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) -extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +#define COMPAT_ELF_HWCAP3 (compat_elf_hwcap3) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3; #endif enum { diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 8c0a36f72d6f..e390c432f546 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -353,6 +353,7 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) +__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400) __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) @@ -575,6 +576,11 @@ static __always_inline u32 aarch64_insn_gen_nop(void) return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); } +static __always_inline bool aarch64_insn_is_nop(u32 insn) +{ + return insn == aarch64_insn_gen_nop(); +} + u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 1ada23a6ec19..8688343b71f2 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * Generic IO read/write. These perform native-endian accesses. @@ -318,4 +319,11 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap +static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size) +{ + if (unlikely(is_realm_world())) + return __arm64_is_protected_mmio(phys_addr, size); + return false; +} + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index bf05a77873a4..fd5a08450b12 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -26,7 +26,6 @@ #define SWAPPER_SKIP_LEVEL 0 #endif #define SWAPPER_BLOCK_SIZE (UL(1) << SWAPPER_BLOCK_SHIFT) -#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3) #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL) #define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL) diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h index b0c9a86b13a4..f8f78f622dd2 100644 --- a/arch/arm64/include/asm/mem_encrypt.h +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -2,6 +2,8 @@ #ifndef __ASM_MEM_ENCRYPT_H #define __ASM_MEM_ENCRYPT_H +#include + struct arm64_mem_crypt_ops { int (*encrypt)(unsigned long addr, int numpages); int (*decrypt)(unsigned long addr, int numpages); @@ -12,4 +14,11 @@ int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); +int realm_register_memory_enc_ops(void); + +static inline bool force_dma_unencrypted(struct device *dev) +{ + return is_realm_world(); +} + #endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 798d965760d4..1d53022fc7e1 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -41,9 +41,12 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file, * backed by tags-capable memory. The vm_flags may be overridden by a * filesystem supporting MTE (RAM-based). */ - if (system_supports_mte() && - ((flags & MAP_ANONYMOUS) || shmem_file(file))) - return VM_MTE_ALLOWED; + if (system_supports_mte()) { + if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) + return VM_MTE_ALLOWED; + if (shmem_file(file)) + return VM_MTE_ALLOWED; + } return 0; } @@ -66,11 +69,26 @@ static inline bool arch_validate_prot(unsigned long prot, static inline bool arch_validate_flags(unsigned long vm_flags) { - if (!system_supports_mte()) - return true; + if (system_supports_mte()) { + /* + * only allow VM_MTE if VM_MTE_ALLOWED has been set + * previously + */ + if ((vm_flags & VM_MTE) && !(vm_flags & VM_MTE_ALLOWED)) + return false; + } + + if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { + /* An executable GCS isn't a good idea. */ + if (vm_flags & VM_EXEC) + return false; + + /* The memory management core should prevent this */ + VM_WARN_ON(vm_flags & VM_SHARED); + } + + return true; - /* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */ - return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED); } #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 7c09d47e09cb..48b3d9553b67 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -311,6 +312,14 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return por_el0_allows_pkey(vma_pkey(vma), write, execute); } +#define deactivate_mm deactivate_mm +static inline void deactivate_mm(struct task_struct *tsk, + struct mm_struct *mm) +{ + gcs_free(tsk); +} + + #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 0f84518632b4..6567df8ec8ca 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -41,6 +41,8 @@ void mte_free_tag_storage(char *storage); static inline void set_page_mte_tagged(struct page *page) { + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + /* * Ensure that the tags written prior to this function are visible * before the page flags update. @@ -53,6 +55,8 @@ static inline bool page_mte_tagged(struct page *page) { bool ret = test_bit(PG_mte_tagged, &page->flags); + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + /* * If the page is tagged, ensure ordering with a likely subsequent * read of the tags. @@ -76,6 +80,8 @@ static inline bool page_mte_tagged(struct page *page) */ static inline bool try_page_mte_tagging(struct page *page) { + VM_WARN_ON_ONCE(folio_test_hugetlb(page_folio(page))); + if (!test_and_set_bit(PG_mte_lock, &page->flags)) return true; @@ -157,6 +163,67 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_ARM64_MTE) +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * Ensure that the tags written prior to this function are visible + * before the folio flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &folio->flags); + +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + bool ret = test_bit(PG_mte_tagged, &folio->flags); + + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + /* + * If the folio is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + VM_WARN_ON_ONCE(!folio_test_hugetlb(folio)); + + if (!test_and_set_bit(PG_mte_lock, &folio->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&folio->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} +#else +static inline void folio_set_hugetlb_mte_tagged(struct folio *folio) +{ +} + +static inline bool folio_test_hugetlb_mte_tagged(struct folio *folio) +{ + return false; +} + +static inline bool folio_try_hugetlb_mte_tagging(struct folio *folio) +{ + return false; +} +#endif + static inline void mte_disable_tco_entry(struct task_struct *task) { if (!system_supports_mte()) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 8ff5f2a2579e..e75422864d1b 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -28,7 +28,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - pudval_t pudval = PUD_TYPE_TABLE; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF; pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); @@ -50,7 +50,7 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - p4dval_t p4dval = P4D_TYPE_TABLE; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF; p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); @@ -79,7 +79,7 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp) { - pgdval_t pgdval = PGD_TYPE_TABLE; + pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_AF; pgdval |= (mm == &init_mm) ? PGD_TABLE_UXN : PGD_TABLE_PXN; __pgd_populate(pgdp, __pa(p4dp), pgdval); @@ -127,14 +127,16 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { VM_BUG_ON(mm == &init_mm); - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); + __pmd_populate(pmdp, page_to_phys(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN); } #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fd330c1db289..c78a988cca93 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -99,6 +99,7 @@ #define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0) #define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0) +#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59) #define PGD_TABLE_UXN (_AT(pgdval_t, 1) << 60) @@ -110,6 +111,7 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -121,6 +123,7 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -131,6 +134,7 @@ #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2a11d0c10760..9f9cf13bbd95 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -35,7 +35,6 @@ #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_MAYBE_NG | PTE_MAYBE_SHARED | PTE_AF) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_MAYBE_NG | PMD_MAYBE_SHARED | PMD_SECT_AF) @@ -68,8 +67,12 @@ #include #include +#include extern bool arm64_use_ng_mappings; +extern unsigned long prot_ns_shared; + +#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0) #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) @@ -144,15 +147,23 @@ static inline bool __pure lpa2_is_enabled(void) /* 6: PTE_PXN | PTE_WRITE */ /* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ /* 8: PAGE_KERNEL_ROX PTE_UXN */ -/* 9: PTE_UXN | PTE_USER */ +/* 9: PAGE_GCS_RO PTE_UXN | PTE_USER */ /* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ -/* b: PTE_UXN | PTE_WRITE | PTE_USER */ +/* b: PAGE_GCS PTE_UXN | PTE_WRITE | PTE_USER */ /* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ /* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ /* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ /* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ +#define _PAGE_GCS (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_WRITE | PTE_USER) +#define _PAGE_GCS_RO (_PAGE_DEFAULT | PTE_NG | PTE_UXN | PTE_USER) + +#define PAGE_GCS __pgprot(_PAGE_GCS) +#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO) + #define PIE_E0 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \ @@ -160,6 +171,8 @@ static inline bool __pure lpa2_is_enabled(void) PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O)) #define PIE_E1 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c329ea061dc9..6986345b537a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -265,8 +265,7 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - pte = set_pte_bit(pte, __pgprot(PTE_CONT)); - return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); + return set_pte_bit(pte, __pgprot(PTE_CONT)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -338,7 +337,7 @@ static inline pte_t __ptep_get(pte_t *ptep) } extern void __sync_icache_dcache(pte_t pteval); -bool pgattr_change_is_safe(u64 old, u64 new); +bool pgattr_change_is_safe(pteval_t old, pteval_t new); /* * PTE bits configuration in the presence of hardware Dirty Bit Management @@ -438,11 +437,6 @@ static inline void __set_ptes(struct mm_struct *mm, } } -/* - * Huge pte definitions. - */ -#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) - /* * Hugetlb definitions. */ @@ -684,6 +678,11 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, #define pgprot_nx(prot) \ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) +#define pgprot_decrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) +#define pgprot_encrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, 0) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -927,6 +926,9 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) { + /* Ensure that 'p4dp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); + return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); } @@ -1051,6 +1053,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) { + /* Ensure that 'pgdp' indexes a page table according to 'addr' */ + VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); + return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); } @@ -1259,15 +1264,17 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, return young; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ + VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) @@ -1502,6 +1509,10 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, */ #define arch_has_hw_pte_young cpu_has_hw_af +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG +#define arch_has_hw_nonleaf_pmd_young system_supports_haft +#endif + /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 006946745352..d49368886309 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -9,21 +9,18 @@ #include -typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); -/* architecture specific copy of original instruction */ struct arch_probe_insn { - probe_opcode_t *insn; - pstate_check_t *pstate_cc; probes_handler_t *handler; - /* restore address after step xol */ - unsigned long restore; }; #ifdef CONFIG_KPROBES -typedef u32 kprobe_opcode_t; +typedef __le32 kprobe_opcode_t; struct arch_specific_insn { struct arch_probe_insn api; + kprobe_opcode_t *xol_insn; + /* restore address after step xol */ + unsigned long xol_restore; }; #endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 1438424f0064..1bf1a3b16e88 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -185,6 +185,13 @@ struct thread_struct { u64 svcr; u64 tpidr2_el0; u64 por_el0; +#ifdef CONFIG_ARM64_GCS + unsigned int gcs_el0_mode; + unsigned int gcs_el0_locked; + u64 gcspr_el0; + u64 gcs_base; + u64 gcs_size; +#endif }; static inline unsigned int thread_get_vl(struct thread_struct *thread, @@ -285,22 +292,44 @@ void tls_preserve_current_state(void); .fpsimd_cpu = NR_CPUS, \ } -static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) +static inline void start_thread_common(struct pt_regs *regs, unsigned long pc, + unsigned long pstate) { - s32 previous_syscall = regs->syscallno; - memset(regs, 0, sizeof(*regs)); - regs->syscallno = previous_syscall; - regs->pc = pc; + /* + * Ensure all GPRs are zeroed, and initialize PC + PSTATE. + * The SP (or compat SP) will be initialized later. + */ + regs->user_regs = (struct user_pt_regs) { + .pc = pc, + .pstate = pstate, + }; + /* + * To allow the syscalls:sys_exit_execve tracepoint we need to preserve + * syscallno, but do not need orig_x0 or the original GPRs. + */ + regs->orig_x0 = 0; + + /* + * An exec from a kernel thread won't have an existing PMR value. + */ if (system_uses_irq_prio_masking()) - regs->pmr_save = GIC_PRIO_IRQON; + regs->pmr = GIC_PRIO_IRQON; + + /* + * The pt_regs::stackframe field must remain valid throughout this + * function as a stacktrace can be taken at any time. Any user or + * kernel task should have a valid final frame. + */ + WARN_ON_ONCE(regs->stackframe.record.fp != 0); + WARN_ON_ONCE(regs->stackframe.record.lr != 0); + WARN_ON_ONCE(regs->stackframe.type != FRAME_META_TYPE_FINAL); } static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_MODE_EL0t; + start_thread_common(regs, pc, PSR_MODE_EL0t); spectre_v4_enable_task_mitigation(current); regs->sp = sp; } @@ -309,15 +338,13 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - start_thread_common(regs, pc); - regs->pstate = PSR_AA32_MODE_USR; + unsigned long pstate = PSR_AA32_MODE_USR; if (pc & 1) - regs->pstate |= PSR_AA32_T_BIT; - -#ifdef __AARCH64EB__ - regs->pstate |= PSR_AA32_E_BIT; -#endif + pstate |= PSR_AA32_T_BIT; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + pstate |= PSR_AA32_E_BIT; + start_thread_common(regs, pc, pstate); spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 0abe975d68a8..47ff8654c5ec 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -98,6 +98,8 @@ #include #include +#include + /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -149,8 +151,7 @@ static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) /* * This struct defines the way the registers are stored on the stack during an - * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for - * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs. + * exception. struct user_pt_regs must form a prefix of struct pt_regs. */ struct pt_regs { union { @@ -163,23 +164,20 @@ struct pt_regs { }; }; u64 orig_x0; -#ifdef __AARCH64EB__ - u32 unused2; s32 syscallno; -#else - s32 syscallno; - u32 unused2; -#endif + u32 pmr; + u64 sdei_ttbr1; - /* Only valid when ARM64_HAS_GIC_PRIO_MASKING is enabled. */ - u64 pmr_save; - u64 stackframe[2]; + struct frame_record_meta stackframe; /* Only valid for some EL1 exceptions. */ u64 lockdep_hardirqs; u64 exit_rcu; }; +/* For correct stack alignment, pt_regs has to be a multiple of 16 bytes. */ +static_assert(IS_ALIGNED(sizeof(struct pt_regs), 16)); + static inline bool in_syscall(struct pt_regs const *regs) { return regs->syscallno != NO_SYSCALL; @@ -213,7 +211,7 @@ static inline void forget_syscall(struct pt_regs *regs) #define irqs_priority_unmasked(regs) \ (system_uses_irq_prio_masking() ? \ - (regs)->pmr_save == GIC_PRIO_IRQON : \ + (regs)->pmr == GIC_PRIO_IRQON : \ true) #define interrupts_enabled(regs) \ diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h new file mode 100644 index 000000000000..188cbb9b23f5 --- /dev/null +++ b/arch/arm64/include/asm/rsi.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 ARM Ltd. + */ + +#ifndef __ASM_RSI_H_ +#define __ASM_RSI_H_ + +#include +#include +#include + +DECLARE_STATIC_KEY_FALSE(rsi_present); + +void __init arm64_rsi_init(void); + +bool __arm64_is_protected_mmio(phys_addr_t base, size_t size); + +static inline bool is_realm_world(void) +{ + return static_branch_unlikely(&rsi_present); +} + +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end, + enum ripas state, unsigned long flags) +{ + unsigned long ret; + phys_addr_t top; + + while (start != end) { + ret = rsi_set_addr_range_state(start, end, state, flags, &top); + if (ret || top < start || top > end) + return -EINVAL; + start = top; + } + + return 0; +} + +/* + * Convert the specified range to RAM. Do not use this if you rely on the + * contents of a page that may already be in RAM state. + */ +static inline int rsi_set_memory_range_protected(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_CHANGE_DESTROYED); +} + +/* + * Convert the specified range to RAM. Do not convert any pages that may have + * been DESTROYED, without our permission. + */ +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_NO_CHANGE_DESTROYED); +} + +static inline int rsi_set_memory_range_shared(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY, + RSI_CHANGE_DESTROYED); +} +#endif /* __ASM_RSI_H_ */ diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h new file mode 100644 index 000000000000..e6a211001bd3 --- /dev/null +++ b/arch/arm64/include/asm/rsi_cmds.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_CMDS_H +#define __ASM_RSI_CMDS_H + +#include + +#include + +#define RSI_GRANULE_SHIFT 12 +#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT) + +enum ripas { + RSI_RIPAS_EMPTY = 0, + RSI_RIPAS_RAM = 1, + RSI_RIPAS_DESTROYED = 2, + RSI_RIPAS_DEV = 3, +}; + +static inline unsigned long rsi_request_version(unsigned long req, + unsigned long *out_lower, + unsigned long *out_higher) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res); + + if (out_lower) + *out_lower = res.a1; + if (out_higher) + *out_higher = res.a2; + + return res.a0; +} + +static inline unsigned long rsi_get_realm_config(struct realm_config *cfg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg), + 0, 0, 0, 0, 0, 0, &res); + return res.a0; +} + +static inline unsigned long rsi_ipa_state_get(phys_addr_t start, + phys_addr_t end, + enum ripas *state, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_GET, + start, end, 0, 0, 0, 0, 0, + &res); + + if (res.a0 == RSI_SUCCESS) { + if (top) + *top = res.a1; + if (state) + *state = res.a2; + } + + return res.a0; +} + +static inline long rsi_set_addr_range_state(phys_addr_t start, + phys_addr_t end, + enum ripas state, + unsigned long flags, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state, + flags, 0, 0, 0, &res); + + if (top) + *top = res.a1; + + if (res.a2 != RSI_ACCEPT) + return -EPERM; + + return res.a0; +} + +/** + * rsi_attestation_token_init - Initialise the operation to retrieve an + * attestation token. + * + * @challenge: The challenge data to be used in the attestation token + * generation. + * @size: Size of the challenge data in bytes. + * + * Initialises the attestation token generation and returns an upper bound + * on the attestation token size that can be used to allocate an adequate + * buffer. The caller is expected to subsequently call + * rsi_attestation_token_continue() to retrieve the attestation token data on + * the same CPU. + * + * Returns: + * On success, returns the upper limit of the attestation report size. + * Otherwise, -EINVAL + */ +static inline long +rsi_attestation_token_init(const u8 *challenge, unsigned long size) +{ + struct arm_smccc_1_2_regs regs = { 0 }; + + /* The challenge must be at least 32bytes and at most 64bytes */ + if (!challenge || size < 32 || size > 64) + return -EINVAL; + + regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT; + memcpy(®s.a1, challenge, size); + arm_smccc_1_2_smc(®s, ®s); + + if (regs.a0 == RSI_SUCCESS) + return regs.a1; + + return -EINVAL; +} + +/** + * rsi_attestation_token_continue - Continue the operation to retrieve an + * attestation token. + * + * @granule: {I}PA of the Granule to which the token will be written. + * @offset: Offset within Granule to start of buffer in bytes. + * @size: The size of the buffer. + * @len: The number of bytes written to the buffer. + * + * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller + * is expected to call rsi_attestation_token_init() before calling this + * function to retrieve the attestation token. + * + * Return: + * * %RSI_SUCCESS - Attestation token retrieved successfully. + * * %RSI_INCOMPLETE - Token generation is not complete. + * * %RSI_ERROR_INPUT - A parameter was not valid. + * * %RSI_ERROR_STATE - Attestation not in progress. + */ +static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule, + unsigned long offset, + unsigned long size, + unsigned long *len) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE, + granule, offset, size, 0, &res); + + if (len) + *len = res.a1; + return res.a0; +} + +#endif /* __ASM_RSI_CMDS_H */ diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h new file mode 100644 index 000000000000..6cb070eca9e9 --- /dev/null +++ b/arch/arm64/include/asm/rsi_smc.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_SMC_H_ +#define __ASM_RSI_SMC_H_ + +#include + +/* + * This file describes the Realm Services Interface (RSI) Application Binary + * Interface (ABI) for SMC calls made from within the Realm to the RMM and + * serviced by the RMM. + */ + +/* + * The major version number of the RSI implementation. This is increased when + * the binary format or semantics of the SMC calls change. + */ +#define RSI_ABI_VERSION_MAJOR UL(1) + +/* + * The minor version number of the RSI implementation. This is increased when + * a bug is fixed, or a feature is added without breaking binary compatibility. + */ +#define RSI_ABI_VERSION_MINOR UL(0) + +#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \ + RSI_ABI_VERSION_MINOR) + +#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define RSI_SUCCESS UL(0) +#define RSI_ERROR_INPUT UL(1) +#define RSI_ERROR_STATE UL(2) +#define RSI_INCOMPLETE UL(3) +#define RSI_ERROR_UNKNOWN UL(4) + +#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + n) + +/* + * Returns RSI version. + * + * arg1 == Requested interface revision + * ret0 == Status / error + * ret1 == Lower implemented interface revision + * ret2 == Higher implemented interface revision + */ +#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190) + +/* + * Read feature register. + * + * arg1 == Feature register index + * ret0 == Status / error + * ret1 == Feature register value + */ +#define SMC_RSI_FEATURES SMC_RSI_FID(0x191) + +/* + * Read measurement for the current Realm. + * + * arg1 == Index, which measurements slot to read + * ret0 == Status / error + * ret1 == Measurement value, bytes: 0 - 7 + * ret2 == Measurement value, bytes: 8 - 15 + * ret3 == Measurement value, bytes: 16 - 23 + * ret4 == Measurement value, bytes: 24 - 31 + * ret5 == Measurement value, bytes: 32 - 39 + * ret6 == Measurement value, bytes: 40 - 47 + * ret7 == Measurement value, bytes: 48 - 55 + * ret8 == Measurement value, bytes: 56 - 63 + */ +#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192) + +/* + * Extend Realm Extensible Measurement (REM) value. + * + * arg1 == Index, which measurements slot to extend + * arg2 == Size of realm measurement in bytes, max 64 bytes + * arg3 == Measurement value, bytes: 0 - 7 + * arg4 == Measurement value, bytes: 8 - 15 + * arg5 == Measurement value, bytes: 16 - 23 + * arg6 == Measurement value, bytes: 24 - 31 + * arg7 == Measurement value, bytes: 32 - 39 + * arg8 == Measurement value, bytes: 40 - 47 + * arg9 == Measurement value, bytes: 48 - 55 + * arg10 == Measurement value, bytes: 56 - 63 + * ret0 == Status / error + */ +#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193) + +/* + * Initialize the operation to retrieve an attestation token. + * + * arg1 == Challenge value, bytes: 0 - 7 + * arg2 == Challenge value, bytes: 8 - 15 + * arg3 == Challenge value, bytes: 16 - 23 + * arg4 == Challenge value, bytes: 24 - 31 + * arg5 == Challenge value, bytes: 32 - 39 + * arg6 == Challenge value, bytes: 40 - 47 + * arg7 == Challenge value, bytes: 48 - 55 + * arg8 == Challenge value, bytes: 56 - 63 + * ret0 == Status / error + * ret1 == Upper bound of token size in bytes + */ +#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194) + +/* + * Continue the operation to retrieve an attestation token. + * + * arg1 == The IPA of token buffer + * arg2 == Offset within the granule of the token buffer + * arg3 == Size of the granule buffer + * ret0 == Status / error + * ret1 == Length of token bytes copied to the granule buffer + */ +#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) + +#ifndef __ASSEMBLY__ + +struct realm_config { + union { + struct { + unsigned long ipa_bits; /* Width of IPA in bits */ + unsigned long hash_algo; /* Hash algorithm */ + }; + u8 pad[0x200]; + }; + union { + u8 rpv[64]; /* Realm Personalization Value */ + u8 pad2[0xe00]; + }; + /* + * The RMM requires the configuration structure to be aligned to a 4k + * boundary, ensure this happens by aligning this structure. + */ +} __aligned(0x1000); + +#endif /* __ASSEMBLY__ */ + +/* + * Read configuration for the current Realm. + * + * arg1 == struct realm_config addr + * ret0 == Status / error + */ +#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196) + +/* + * Request RIPAS of a target IPA range to be changed to a specified value. + * + * arg1 == Base IPA address of target region + * arg2 == Top of the region + * arg3 == RIPAS value + * arg4 == flags + * ret0 == Status / error + * ret1 == Top of modified IPA range + * ret2 == Whether the Host accepted or rejected the request + */ +#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197) + +#define RSI_NO_CHANGE_DESTROYED UL(0) +#define RSI_CHANGE_DESTROYED UL(1) + +#define RSI_ACCEPT UL(0) +#define RSI_REJECT UL(1) + +/* + * Get RIPAS of a target IPA range. + * + * arg1 == Base IPA of target region + * arg2 == End of target IPA region + * ret0 == Status / error + * ret1 == Top of IPA region which has the reported RIPAS value + * ret2 == RIPAS value + */ +#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198) + +/* + * Make a Host call. + * + * arg1 == IPA of host call structure + * ret0 == Status / error + */ +#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199) + +#endif /* __ASM_RSI_SMC_H_ */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 2e010ea76be2..a76f9b387a26 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -46,8 +46,14 @@ static inline void dynamic_scs_init(void) static inline void dynamic_scs_init(void) {} #endif +enum { + EDYNSCS_INVALID_CIE_HEADER = 1, + EDYNSCS_INVALID_CIE_SDATA_SIZE = 2, + EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE = 3, + EDYNSCS_INVALID_CFA_OPCODE = 4, +}; + int __pi_scs_patch(const u8 eh_frame[], int size); -asmlinkage void __pi_scs_patch_vmlinux(void); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h index 917761feeffd..37774c793006 100644 --- a/arch/arm64/include/asm/set_memory.h +++ b/arch/arm64/include/asm/set_memory.h @@ -15,4 +15,7 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + #endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h index f63dc654e545..821a8fdd31af 100644 --- a/arch/arm64/include/asm/stacktrace/common.h +++ b/arch/arm64/include/asm/stacktrace/common.h @@ -60,13 +60,27 @@ static inline void unwind_init_common(struct unwind_state *state) state->stack = stackinfo_get_unknown(); } -static struct stack_info *unwind_find_next_stack(const struct unwind_state *state, - unsigned long sp, - unsigned long size) +/** + * unwind_find_stack() - Find the accessible stack which entirely contains an + * object. + * + * @state: the current unwind state. + * @sp: the base address of the object. + * @size: the size of the object. + * + * Return: a pointer to the relevant stack_info if found; NULL otherwise. + */ +static struct stack_info *unwind_find_stack(struct unwind_state *state, + unsigned long sp, + unsigned long size) { - for (int i = 0; i < state->nr_stacks; i++) { - struct stack_info *info = &state->stacks[i]; + struct stack_info *info = &state->stack; + if (stackinfo_on_stack(info, sp, size)) + return info; + + for (int i = 0; i < state->nr_stacks; i++) { + info = &state->stacks[i]; if (stackinfo_on_stack(info, sp, size)) return info; } @@ -75,36 +89,31 @@ static struct stack_info *unwind_find_next_stack(const struct unwind_state *stat } /** - * unwind_consume_stack() - Check if an object is on an accessible stack, - * updating stack boundaries so that future unwind steps cannot consume this - * object again. + * unwind_consume_stack() - Update stack boundaries so that future unwind steps + * cannot consume this object again. * * @state: the current unwind state. + * @info: the stack_info of the stack containing the object. * @sp: the base address of the object. * @size: the size of the object. * * Return: 0 upon success, an error code otherwise. */ -static inline int unwind_consume_stack(struct unwind_state *state, - unsigned long sp, - unsigned long size) +static inline void unwind_consume_stack(struct unwind_state *state, + struct stack_info *info, + unsigned long sp, + unsigned long size) { - struct stack_info *next; - - if (stackinfo_on_stack(&state->stack, sp, size)) - goto found; - - next = unwind_find_next_stack(state, sp, size); - if (!next) - return -EINVAL; + struct stack_info tmp; /* * Stack transitions are strictly one-way, and once we've * transitioned from one stack to another, it's never valid to * unwind back to the old stack. * - * Remove the current stack from the list of stacks so that it cannot - * be found on a subsequent transition. + * Destroy the old stack info so that it cannot be found upon a + * subsequent transition. If the stack has not changed, we'll + * immediately restore the current stack info. * * Note that stacks can nest in several valid orders, e.g. * @@ -115,16 +124,15 @@ static inline int unwind_consume_stack(struct unwind_state *state, * ... so we do not check the specific order of stack * transitions. */ - state->stack = *next; - *next = stackinfo_get_unknown(); + tmp = *info; + *info = stackinfo_get_unknown(); + state->stack = tmp; -found: /* * Future unwind steps can only consume stack above this frame record. * Update the current stack to start immediately above it. */ state->stack.low = sp + size; - return 0; } /** @@ -137,21 +145,25 @@ static inline int unwind_consume_stack(struct unwind_state *state, static inline int unwind_next_frame_record(struct unwind_state *state) { + struct stack_info *info; + struct frame_record *record; unsigned long fp = state->fp; - int err; if (fp & 0x7) return -EINVAL; - err = unwind_consume_stack(state, fp, 16); - if (err) - return err; + info = unwind_find_stack(state, fp, sizeof(*record)); + if (!info) + return -EINVAL; + + unwind_consume_stack(state, info, fp, sizeof(*record)); /* * Record this frame record's values. */ - state->fp = READ_ONCE(*(unsigned long *)(fp)); - state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); + record = (struct frame_record *)fp; + state->fp = READ_ONCE(record->fp); + state->pc = READ_ONCE(record->lr); return 0; } diff --git a/arch/arm64/include/asm/stacktrace/frame.h b/arch/arm64/include/asm/stacktrace/frame.h new file mode 100644 index 000000000000..0ee0f6ba0fd8 --- /dev/null +++ b/arch/arm64/include/asm/stacktrace/frame.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_STACKTRACE_FRAME_H +#define __ASM_STACKTRACE_FRAME_H + +/* + * - FRAME_META_TYPE_NONE + * + * This value is reserved. + * + * - FRAME_META_TYPE_FINAL + * + * The record is the last entry on the stack. + * Unwinding should terminate successfully. + * + * - FRAME_META_TYPE_PT_REGS + * + * The record is embedded within a struct pt_regs, recording the registers at + * an arbitrary point in time. + * Unwinding should consume pt_regs::pc, followed by pt_regs::lr. + * + * Note: all other values are reserved and should result in unwinding + * terminating with an error. + */ +#define FRAME_META_TYPE_NONE 0 +#define FRAME_META_TYPE_FINAL 1 +#define FRAME_META_TYPE_PT_REGS 2 + +#ifndef __ASSEMBLY__ +/* + * A standard AAPCS64 frame record. + */ +struct frame_record { + u64 fp; + u64 lr; +}; + +/* + * A metadata frame record indicating a special unwind. + * The record::{fp,lr} fields must be zero to indicate the presence of + * metadata. + */ +struct frame_record_meta { + struct frame_record record; + u64 type; +}; +#endif /* __ASSEMBLY */ + +#endif /* __ASM_STACKTRACE_FRAME_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9ea97dddefc4..9c98ff448bd9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1101,6 +1101,26 @@ /* Initial value for Permission Overlay Extension for EL0 */ #define POR_EL0_INIT POE_RXW +/* + * Definitions for Guarded Control Stack + */ + +#define GCS_CAP_ADDR_MASK GENMASK(63, 12) +#define GCS_CAP_ADDR_SHIFT 12 +#define GCS_CAP_ADDR_WIDTH 52 +#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x) + +#define GCS_CAP_TOKEN_MASK GENMASK(11, 0) +#define GCS_CAP_TOKEN_SHIFT 0 +#define GCS_CAP_TOKEN_WIDTH 12 +#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x) + +#define GCS_CAP_VALID_TOKEN 0x1 +#define GCS_CAP_IN_PROGRESS_TOKEN 0x5 + +#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 95fbc8c05607..bc94e036a26b 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -431,6 +431,23 @@ do { \ #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); +static inline bool __flush_tlb_range_limit_excess(unsigned long start, + unsigned long end, unsigned long pages, unsigned long stride) +{ + /* + * When the system does not support TLB range based flush + * operation, (MAX_DVM_OPS - 1) pages can be handled. But + * with TLB range based operation, MAX_TLBI_RANGE_PAGES + * pages can be handled. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_DVM_OPS * stride)) || + pages > MAX_TLBI_RANGE_PAGES) + return true; + + return false; +} + static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, @@ -442,15 +459,7 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, end = round_up(end, stride); pages = (end - start) >> PAGE_SHIFT; - /* - * When not uses TLB range ops, we can handle up to - * (MAX_DVM_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * MAX_TLBI_RANGE_PAGES pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_DVM_OPS * stride)) || - pages > MAX_TLBI_RANGE_PAGES) { + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_mm(vma->vm_mm); return; } @@ -492,19 +501,21 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long addr; + const unsigned long stride = PAGE_SIZE; + unsigned long pages; - if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { + start = round_down(start, stride); + end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; + + if (__flush_tlb_range_limit_excess(start, end, pages, stride)) { flush_tlb_all(); return; } - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); + __flush_tlb_range_op(vaale1is, start, pages, stride, 0, + TLBI_TTL_UNKNOWN, false, lpa2_is_enabled()); dsb(ish); isb(); } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 1aa4ecb73429..5b91803201ef 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -502,4 +502,44 @@ static inline size_t probe_subpage_writeable(const char __user *uaddr, #endif /* CONFIG_ARCH_HAS_SUBPAGE_FAULTS */ +#ifdef CONFIG_ARM64_GCS + +static inline int gcssttr(unsigned long __user *addr, unsigned long val) +{ + register unsigned long __user *_addr __asm__ ("x0") = addr; + register unsigned long _val __asm__ ("x1") = val; + int err = 0; + + /* GCSSTTR x1, x0 */ + asm volatile( + "1: .inst 0xd91f1c01\n" + "2: \n" + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) + : "+r" (err) + : "rZ" (_val), "r" (_addr) + : "memory"); + + return err; +} + +static inline void put_user_gcs(unsigned long val, unsigned long __user *addr, + int *err) +{ + int ret; + + if (!access_ok((char __user *)addr, sizeof(u64))) { + *err = -EFAULT; + return; + } + + uaccess_ttbr0_enable(); + ret = gcssttr(addr, val); + if (ret != 0) + *err = ret; + uaccess_ttbr0_disable(); +} + + +#endif /* CONFIG_ARM64_GCS */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 055381b2c615..48d46b768eae 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -21,7 +21,7 @@ * HWCAP flags - for AT_HWCAP * * Bits 62 and 63 are reserved for use by libc. - * Bits 32-61 are unallocated for potential use by libc. + * Bits 33-61 are unallocated for potential use by libc. */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) @@ -55,6 +55,7 @@ #define HWCAP_SB (1 << 29) #define HWCAP_PACA (1 << 30) #define HWCAP_PACG (1UL << 31) +#define HWCAP_GCS (1UL << 32) /* * HWCAP2 flags - for AT_HWCAP2 @@ -124,4 +125,8 @@ #define HWCAP2_SME_SF8DP2 (1UL << 62) #define HWCAP2_POE (1UL << 63) +/* + * HWCAP3 flags - for AT_HWCAP3 + */ + #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7fa2f7036aa7..0f39ba4f3efd 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -324,6 +324,14 @@ struct user_za_header { #define ZA_PT_SIZE(vq) \ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq)) +/* GCS state (NT_ARM_GCS) */ + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI__ASM_PTRACE_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index bb7af77a30a7..d42f7a92238b 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -183,6 +183,15 @@ struct zt_context { __u16 __reserved[3]; }; +#define GCS_MAGIC 0x47435300 + +struct gcs_context { + struct _aarch64_ctx head; + __u64 gcspr; + __u64 features_enabled; + __u64 reserved; +}; + #endif /* !__ASSEMBLY__ */ #include diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2b112f3b7510..71c29a2a2f19 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -33,7 +33,8 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o idle.o patching.o pi/ + syscall.o proton-pack.o idle.o patching.o pi/ \ + rsi.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b21dd24b8efc..f89df36ca4bd 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -12,15 +12,12 @@ #include #include #include -#include #include -#include #include #include #include #include #include -#include #include #include #include @@ -28,8 +25,6 @@ int main(void) { - DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); - BLANK(); DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); @@ -79,8 +74,9 @@ int main(void) DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); - DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); + DEFINE(S_PMR, offsetof(struct pt_regs, pmr)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_STACKFRAME_TYPE, offsetof(struct pt_regs, stackframe.type)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -99,25 +95,6 @@ int main(void) DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); BLANK(); #endif -#ifdef CONFIG_COMPAT - DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); - DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); - BLANK(); -#endif - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); - BLANK(); - DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); - DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); - BLANK(); - DEFINE(VM_EXEC, VM_EXEC); - BLANK(); - DEFINE(PAGE_SZ, PAGE_SIZE); - BLANK(); - DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); - DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); - BLANK(); - DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); - BLANK(); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 718728a85430..351aa825ec40 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -103,6 +103,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; +unsigned int compat_elf_hwcap3 __read_mostly; #endif DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); @@ -228,6 +229,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_XS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), @@ -291,6 +293,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_GCS), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_GCS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), @@ -2358,6 +2362,14 @@ static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused) } #endif +#ifdef CONFIG_ARM64_GCS +static void cpu_enable_gcs(const struct arm64_cpu_capabilities *__unused) +{ + /* GCSPR_EL0 is always readable */ + write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); +} +#endif + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2590,6 +2602,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpus = &dbm_cpus, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, +#endif +#ifdef CONFIG_ARM64_HAFT + { + .desc = "Hardware managed Access Flag for Table Descriptors", + /* + * Contrary to the page/block access flag, the table access flag + * cannot be emulated in software (no access fault will occur). + * Therefore this should be used only if it's supported system + * wide. + */ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAFT, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) + }, #endif { .desc = "CRC32 instructions", @@ -2889,6 +2916,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_poe, ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP) }, +#endif +#ifdef CONFIG_ARM64_GCS + { + .desc = "Guarded Control Stack (GCS)", + .capability = ARM64_HAS_GCS, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .cpu_enable = cpu_enable_gcs, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, GCS, IMP) + }, #endif {}, }; @@ -3005,6 +3042,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), +#endif +#ifdef CONFIG_ARM64_GCS + HWCAP_CAP(ID_AA64PFR1_EL1, GCS, IMP, CAP_HWCAP, KERNEL_HWCAP_GCS), #endif HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI @@ -3499,6 +3539,11 @@ unsigned long cpu_get_elf_hwcap2(void) return elf_hwcap[1]; } +unsigned long cpu_get_elf_hwcap3(void) +{ + return elf_hwcap[2]; +} + static void __init setup_boot_cpu_capabilities(void) { /* diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 44718d0482b3..f2f92c6b1c85 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -80,6 +80,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SB] = "sb", [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_GCS] = "gcs", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 024a7b245056..58f047de3e1c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -303,7 +303,6 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) { struct break_hook *hook; struct list_head *list; - int (*fn)(struct pt_regs *regs, unsigned long esr) = NULL; list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; @@ -313,10 +312,10 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) */ list_for_each_entry_rcu(hook, list, node) { if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm) - fn = hook->fn; + return hook->fn(regs, esr); } - return fn ? fn(regs, esr) : DBG_HOOK_ERROR; + return DBG_HOOK_ERROR; } NOKPROBE_SYMBOL(call_break_hook); @@ -441,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs) set_regs_spsr_ss(regs); } +void kernel_fastforward_single_step(struct pt_regs *regs) +{ + clear_regs_spsr_ss(regs); +} + /* ptrace API */ void user_enable_single_step(struct task_struct *task) { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 712718aed5dd..1d25d8899dbf 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -34,8 +34,16 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) u64 attr = md->attribute; u32 type = md->type; - if (type == EFI_MEMORY_MAPPED_IO) - return PROT_DEVICE_nGnRE; + if (type == EFI_MEMORY_MAPPED_IO) { + pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE); + + if (arm64_is_protected_mmio(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT)) + prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); + return pgprot_val(prot); + } if (region_is_misaligned(md)) { static bool __initdata code_is_misaligned; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3fcd9d080bf2..b260ddc4d3e9 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -463,6 +463,24 @@ static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_gcs(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + +static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_kernel_mode(regs); + local_daif_inherit(regs); + do_el1_mops(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -505,6 +523,12 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BTI: el1_bti(regs, esr); break; + case ESR_ELx_EC_GCS: + el1_gcs(regs, esr); + break; + case ESR_ELx_EC_MOPS: + el1_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_CUR: case ESR_ELx_EC_SOFTSTP_CUR: case ESR_ELx_EC_WATCHPT_CUR: @@ -684,6 +708,14 @@ static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) exit_to_user_mode(regs); } +static void noinstr el0_gcs(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_gcs(regs, esr); + exit_to_user_mode(regs); +} + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -766,6 +798,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_MOPS: el0_mops(regs, esr); break; + case ESR_ELx_EC_GCS: + el0_gcs(regs, esr); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ef0e127b149..5ae2a34b50bd 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -284,15 +285,16 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * For exceptions from EL0, create a final frame record. - * For exceptions from EL1, create a synthetic frame record so the - * interrupted code shows up in the backtrace. + * Create a metadata frame record. The unwinder will use this to + * identify and unwind exception boundaries. */ - .if \el == 0 stp xzr, xzr, [sp, #S_STACKFRAME] + .if \el == 0 + mov x0, #FRAME_META_TYPE_FINAL .else - stp x29, x22, [sp, #S_STACKFRAME] + mov x0, #FRAME_META_TYPE_PT_REGS .endif + str x0, [sp, #S_STACKFRAME_TYPE] add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN @@ -315,7 +317,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING alternative_else_nop_endif mrs_s x20, SYS_ICC_PMR_EL1 - str x20, [sp, #S_PMR_SAVE] + str w20, [sp, #S_PMR] mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 @@ -342,7 +344,7 @@ alternative_if_not ARM64_HAS_GIC_PRIO_MASKING b .Lskip_pmr_restore\@ alternative_else_nop_endif - ldr x20, [sp, #S_PMR_SAVE] + ldr w20, [sp, #S_PMR] msr_s SYS_ICC_PMR_EL1, x20 /* Ensure priority change is seen by redistributor */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 6d21971ae559..8c4c1a2186cc 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -386,7 +386,7 @@ static void task_fpsimd_load(void) * fpsimd_save_user_state() or memory corruption, we * should always record an explicit format * when we save. We always at least have the - * memory allocated for FPSMID registers so + * memory allocated for FPSIMD registers so * try that and hope for the best. */ WARN_ON_ONCE(1); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cb68adcabe07..5ab1970ee543 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -199,6 +200,8 @@ SYM_CODE_END(preserve_boot_args) sub sp, sp, #PT_REGS_SIZE stp xzr, xzr, [sp, #S_STACKFRAME] + mov \tmp1, #FRAME_META_TYPE_FINAL + str \tmp1, [sp, #S_STACKFRAME_TYPE] add x29, sp, #S_STACKFRAME scs_load_current diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 7b11d84f533c..18749e9a6c2d 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -266,9 +266,15 @@ static int swsusp_mte_save_tags(void) max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { struct page *page = pfn_to_online_page(pfn); + struct folio *folio; if (!page) continue; + folio = page_folio(page); + + if (folio_test_hugetlb(folio) && + !folio_test_hugetlb_mte_tagged(folio)) + continue; if (!page_mte_tagged(page)) continue; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 36b25af56324..06bb680bfe97 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -462,14 +462,20 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { const Elf_Shdr *s; + int ret; + s = find_section(hdr, sechdrs, ".altinstructions"); if (s) apply_alternatives_module((void *)s->sh_addr, s->sh_size); if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); - if (s) - __pi_scs_patch((void *)s->sh_addr, s->sh_size); + if (s) { + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); + if (ret) + pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", + me->name, ret); + } } return module_init_ftrace_plt(hdr, sechdrs, me); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 6174671be7c1..2fbfd27ff5f2 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -38,7 +38,24 @@ EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); void mte_sync_tags(pte_t pte, unsigned int nr_pages) { struct page *page = pte_page(pte); - unsigned int i; + struct folio *folio = page_folio(page); + unsigned long i; + + if (folio_test_hugetlb(folio)) { + unsigned long nr = folio_nr_pages(folio); + + /* Hugetlb MTE flags are set for head page only */ + if (folio_try_hugetlb_mte_tagging(folio)) { + for (i = 0; i < nr; i++, page++) + mte_clear_page_tags(page_address(page)); + folio_set_hugetlb_mte_tagged(folio); + } + + /* ensure the tags are visible before the PTE is set */ + smp_wmb(); + + return; + } /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { @@ -410,6 +427,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, void *maddr; struct page *page = get_user_page_vma_remote(mm, addr, gup_flags, &vma); + struct folio *folio; if (IS_ERR(page)) { err = PTR_ERR(page); @@ -428,7 +446,12 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, put_page(page); break; } - WARN_ON_ONCE(!page_mte_tagged(page)); + + folio = page_folio(page); + if (folio_test_hugetlb(folio)) + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + else + WARN_ON_ONCE(!page_mte_tagged(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index 29d4b6244a6f..22159251eb3a 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -38,6 +38,15 @@ struct ftr_set_desc { #define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f } +static const struct ftr_set_desc mmfr0 __prel64_initconst = { + .name = "id_aa64mmfr0", + .override = &id_aa64mmfr0_override, + .fields = { + FIELD("ecv", ID_AA64MMFR0_EL1_ECV_SHIFT, NULL), + {} + }, +}; + static bool __init mmfr1_vh_filter(u64 val) { /* @@ -133,6 +142,7 @@ static const struct ftr_set_desc pfr1 __prel64_initconst = { .override = &id_aa64pfr1_override, .fields = { FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ), + FIELD("gcs", ID_AA64PFR1_EL1_GCS_SHIFT, NULL), FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), {} @@ -196,6 +206,7 @@ static const struct ftr_set_desc sw_features __prel64_initconst = { static const PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = { + { &mmfr0 }, { &mmfr1 }, { &mmfr2 }, { &pfr0 }, @@ -215,6 +226,7 @@ static const struct { { "arm64.nosve", "id_aa64pfr0.sve=0" }, { "arm64.nosme", "id_aa64pfr1.sme=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, + { "arm64.nogcs", "id_aa64pfr1.gcs=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0 " diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c index 5410b2cac590..2b69e3beeef8 100644 --- a/arch/arm64/kernel/pi/map_range.c +++ b/arch/arm64/kernel/pi/map_range.c @@ -30,7 +30,7 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, u64 va_offset) { u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX; - u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK; + pteval_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK; int lshift = (3 - level) * (PAGE_SHIFT - 3); u64 lmask = (PAGE_SIZE << lshift) - 1; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 49d8b40e61bc..55d0cd64ef71 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -50,6 +50,10 @@ bool dynamic_scs_is_enabled; #define DW_CFA_GNU_negative_offset_extended 0x2f #define DW_CFA_hi_user 0x3f +#define DW_EH_PE_sdata4 0x0b +#define DW_EH_PE_sdata8 0x0c +#define DW_EH_PE_pcrel 0x10 + enum { PACIASP = 0xd503233f, AUTIASP = 0xd50323bf, @@ -120,7 +124,12 @@ struct eh_frame { union { struct { // CIE u8 version; - u8 augmentation_string[]; + u8 augmentation_string[3]; + u8 code_alignment_factor; + u8 data_alignment_factor; + u8 return_address_register; + u8 augmentation_data_size; + u8 fde_pointer_format; }; struct { // FDE @@ -128,30 +137,39 @@ struct eh_frame { s32 range; u8 opcodes[]; }; + + struct { // FDE + s64 initial_loc64; + s64 range64; + u8 opcodes64[]; + }; }; }; static int scs_handle_fde_frame(const struct eh_frame *frame, - bool fde_has_augmentation_data, int code_alignment_factor, + bool use_sdata8, bool dry_run) { int size = frame->size - offsetof(struct eh_frame, opcodes) + 4; u64 loc = (u64)offset_to_ptr(&frame->initial_loc); const u8 *opcode = frame->opcodes; + int l; - if (fde_has_augmentation_data) { - int l; - - // assume single byte uleb128_t - if (WARN_ON(*opcode & BIT(7))) - return -ENOEXEC; - - l = *opcode++; - opcode += l; - size -= l + 1; + if (use_sdata8) { + loc = (u64)&frame->initial_loc64 + frame->initial_loc64; + opcode = frame->opcodes64; + size -= 8; } + // assume single byte uleb128_t for augmentation data size + if (*opcode & BIT(7)) + return EDYNSCS_INVALID_FDE_AUGM_DATA_SIZE; + + l = *opcode++; + opcode += l; + size -= l + 1; + /* * Starting from 'loc', apply the CFA opcodes that advance the location * pointer, and identify the locations of the PAC instructions. @@ -201,7 +219,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, break; default: - return -ENOEXEC; + return EDYNSCS_INVALID_CFA_OPCODE; } } return 0; @@ -209,12 +227,12 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, int scs_patch(const u8 eh_frame[], int size) { + int code_alignment_factor = 1; + bool fde_use_sdata8 = false; const u8 *p = eh_frame; while (size > 4) { const struct eh_frame *frame = (const void *)p; - bool fde_has_augmentation_data = true; - int code_alignment_factor = 1; int ret; if (frame->size == 0 || @@ -223,28 +241,47 @@ int scs_patch(const u8 eh_frame[], int size) break; if (frame->cie_id_or_pointer == 0) { - const u8 *p = frame->augmentation_string; - - /* a 'z' in the augmentation string must come first */ - fde_has_augmentation_data = *p == 'z'; + /* + * Require presence of augmentation data (z) with a + * specifier for the size of the FDE initial_loc and + * range fields (R), and nothing else. + */ + if (strcmp(frame->augmentation_string, "zR")) + return EDYNSCS_INVALID_CIE_HEADER; /* * The code alignment factor is a uleb128 encoded field * but given that the only sensible values are 1 or 4, - * there is no point in decoding the whole thing. + * there is no point in decoding the whole thing. Also + * sanity check the size of the data alignment factor + * field, and the values of the return address register + * and augmentation data size fields. */ - p += strlen(p) + 1; - if (!WARN_ON(*p & BIT(7))) - code_alignment_factor = *p; + if ((frame->code_alignment_factor & BIT(7)) || + (frame->data_alignment_factor & BIT(7)) || + frame->return_address_register != 30 || + frame->augmentation_data_size != 1) + return EDYNSCS_INVALID_CIE_HEADER; + + code_alignment_factor = frame->code_alignment_factor; + + switch (frame->fde_pointer_format) { + case DW_EH_PE_pcrel | DW_EH_PE_sdata4: + fde_use_sdata8 = false; + break; + case DW_EH_PE_pcrel | DW_EH_PE_sdata8: + fde_use_sdata8 = true; + break; + default: + return EDYNSCS_INVALID_CIE_SDATA_SIZE; + } } else { - ret = scs_handle_fde_frame(frame, - fde_has_augmentation_data, - code_alignment_factor, - true); + ret = scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, true); if (ret) return ret; - scs_handle_fde_frame(frame, fde_has_augmentation_data, - code_alignment_factor, false); + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 3496d6169e59..6438bf62e753 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * Instructions which load PC relative literals are not going to work * when executed from an XOL slot. Instructions doing an exclusive * load/store are not going to complete successfully when single-step - * exception handling happens in the middle of the sequence. + * exception handling happens in the middle of the sequence. Memory + * copy/set instructions require that all three instructions be placed + * consecutively in memory. */ if (aarch64_insn_uses_literal(insn) || - aarch64_insn_is_exclusive(insn)) + aarch64_insn_is_exclusive(insn) || + aarch64_insn_is_mops(insn)) return false; return true; @@ -73,8 +76,17 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. */ enum probe_insn __kprobes -arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api) +arm_probe_decode_insn(u32 insn, struct arch_probe_insn *api) { + /* + * While 'nop' instruction can execute in the out-of-line slot, + * simulating them in breakpoint handling offers better performance. + */ + if (aarch64_insn_is_nop(insn)) { + api->handler = simulate_nop; + return INSN_GOOD_NO_SLOT; + } + /* * Instructions reading or modifying the PC won't work from the XOL * slot. @@ -133,8 +145,8 @@ enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { enum probe_insn decoded; - probe_opcode_t insn = le32_to_cpu(*addr); - probe_opcode_t *scan_end = NULL; + u32 insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_end = NULL; unsigned long size = 0, offset = 0; struct arch_probe_insn *api = &asi->api; diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h index 8b758c5a2062..0e4195de8206 100644 --- a/arch/arm64/kernel/probes/decode-insn.h +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -28,6 +28,6 @@ enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); #endif enum probe_insn __kprobes -arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi); +arm_probe_decode_insn(u32 insn, struct arch_probe_insn *asi); #endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 4268678d0e86..48d88e07611d 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -43,7 +43,7 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { - kprobe_opcode_t *addr = p->ainsn.api.insn; + kprobe_opcode_t *addr = p->ainsn.xol_insn; /* * Prepare insn slot, Mark Rutland points out it depends on a coupe of @@ -64,20 +64,20 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) * the BRK exception handler, so it is unnecessary to generate * Contex-Synchronization-Event via ISB again. */ - aarch64_insn_patch_text_nosync(addr, p->opcode); + aarch64_insn_patch_text_nosync(addr, le32_to_cpu(p->opcode)); aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS); /* * Needs restoring of return address after stepping xol. */ - p->ainsn.api.restore = (unsigned long) p->addr + + p->ainsn.xol_restore = (unsigned long) p->addr + sizeof(kprobe_opcode_t); } static void __kprobes arch_prepare_simulate(struct kprobe *p) { /* This instructions is not executed xol. No need to adjust the PC */ - p->ainsn.api.restore = 0; + p->ainsn.xol_restore = 0; } static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) @@ -85,7 +85,7 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); if (p->ainsn.api.handler) - p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs); + p->ainsn.api.handler(le32_to_cpu(p->opcode), (long)p->addr, regs); /* single step simulated, now go for post processing */ post_kprobe_handler(p, kcb, regs); @@ -99,7 +99,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; /* copy instruction */ - p->opcode = le32_to_cpu(*p->addr); + p->opcode = *p->addr; if (search_exception_tables(probe_addr)) return -EINVAL; @@ -110,18 +110,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; case INSN_GOOD_NO_SLOT: /* insn need simulation */ - p->ainsn.api.insn = NULL; + p->ainsn.xol_insn = NULL; break; case INSN_GOOD: /* instruction uses slot */ - p->ainsn.api.insn = get_insn_slot(); - if (!p->ainsn.api.insn) + p->ainsn.xol_insn = get_insn_slot(); + if (!p->ainsn.xol_insn) return -ENOMEM; break; } /* prepare the instruction */ - if (p->ainsn.api.insn) + if (p->ainsn.xol_insn) arch_prepare_ss_slot(p); else arch_prepare_simulate(p); @@ -142,15 +142,16 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) void __kprobes arch_disarm_kprobe(struct kprobe *p) { void *addr = p->addr; + u32 insn = le32_to_cpu(p->opcode); - aarch64_insn_patch_text(&addr, &p->opcode, 1); + aarch64_insn_patch_text(&addr, &insn, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) { - if (p->ainsn.api.insn) { - free_insn_slot(p->ainsn.api.insn, 0); - p->ainsn.api.insn = NULL; + if (p->ainsn.xol_insn) { + free_insn_slot(p->ainsn.xol_insn, 0); + p->ainsn.xol_insn = NULL; } } @@ -205,9 +206,9 @@ static void __kprobes setup_singlestep(struct kprobe *p, } - if (p->ainsn.api.insn) { + if (p->ainsn.xol_insn) { /* prepare for single stepping */ - slot = (unsigned long)p->ainsn.api.insn; + slot = (unsigned long)p->ainsn.xol_insn; kprobes_save_local_irqflag(kcb, regs); instruction_pointer_set(regs, slot); @@ -245,8 +246,8 @@ static void __kprobes post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs) { /* return addr restore if non-branching insn */ - if (cur->ainsn.api.restore != 0) - instruction_pointer_set(regs, cur->ainsn.api.restore); + if (cur->ainsn.xol_restore != 0) + instruction_pointer_set(regs, cur->ainsn.xol_restore); /* restore back original saved kprobe variables and continue */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -348,7 +349,7 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned long esr) struct kprobe *cur = kprobe_running(); if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) && - ((unsigned long)&cur->ainsn.api.insn[1] == addr)) { + ((unsigned long)&cur->ainsn.xol_insn[1] == addr)) { kprobes_restore_local_irqflag(kcb, regs); post_kprobe_handler(cur, kcb, regs); diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c index b65334ab79d2..4c6d2d712fbd 100644 --- a/arch/arm64/kernel/probes/simulate-insn.c +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -196,3 +196,9 @@ simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) instruction_pointer_set(regs, instruction_pointer(regs) + 4); } + +void __kprobes +simulate_nop(u32 opcode, long addr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h index e065dc92218e..efb2803ec943 100644 --- a/arch/arm64/kernel/probes/simulate-insn.h +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -16,5 +16,6 @@ void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_nop(u32 opcode, long addr, struct pt_regs *regs); #endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a2f137a595fc..cb3d05af36e3 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -17,12 +17,20 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *xol_page_kaddr = kmap_atomic(page); void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK); + /* + * Initial cache maintenance of the xol page done via set_pte_at(). + * Subsequent CMOs only needed if the xol slot changes. + */ + if (!memcmp(dst, src, len)) + goto done; + /* Initialize the slot */ memcpy(dst, src, len); /* flush caches (dcache/icache) */ sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); +done: kunmap_atomic(xol_page_kaddr); } @@ -34,7 +42,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { - probe_opcode_t insn; + u32 insn; /* TODO: Currently we do not support AARCH32 instruction probing */ if (mm->context.flags & MMCF_AARCH32) @@ -102,7 +110,7 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - probe_opcode_t insn; + u32 insn; unsigned long addr; if (!auprobe->simulate) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 3e7c8c8195c3..2968a33bb3bc 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -227,7 +228,7 @@ void __show_regs(struct pt_regs *regs) printk("sp : %016llx\n", sp); if (system_uses_irq_prio_masking()) - printk("pmr_save: %08llx\n", regs->pmr_save); + printk("pmr: %08x\n", regs->pmr); i = top_reg; @@ -280,6 +281,51 @@ static void flush_poe(void) write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0); } +#ifdef CONFIG_ARM64_GCS + +static void flush_gcs(void) +{ + if (!system_supports_gcs()) + return; + + gcs_free(current); + current->thread.gcs_el0_mode = 0; + write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); + write_sysreg_s(0, SYS_GCSPR_EL0); +} + +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + unsigned long gcs; + + if (!system_supports_gcs()) + return 0; + + p->thread.gcs_base = 0; + p->thread.gcs_size = 0; + + gcs = gcs_alloc_thread_stack(p, args); + if (IS_ERR_VALUE(gcs)) + return PTR_ERR((void *)gcs); + + p->thread.gcs_el0_mode = current->thread.gcs_el0_mode; + p->thread.gcs_el0_locked = current->thread.gcs_el0_locked; + + return 0; +} + +#else + +static void flush_gcs(void) { } +static int copy_thread_gcs(struct task_struct *p, + const struct kernel_clone_args *args) +{ + return 0; +} + +#endif + void flush_thread(void) { fpsimd_flush_thread(); @@ -287,11 +333,13 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); flush_poe(); + flush_gcs(); } void arch_release_task_struct(struct task_struct *tsk) { fpsimd_release_task(tsk); + gcs_free(tsk); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) @@ -355,6 +403,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long stack_start = args->stack; unsigned long tls = args->tls; struct pt_regs *childregs = task_pt_regs(p); + int ret; memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); @@ -399,6 +448,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.uw.tp_value = tls; p->thread.tpidr2_el0 = 0; } + + ret = copy_thread_gcs(p, args); + if (ret != 0) + return ret; } else { /* * A kthread has no context to ERET to, so ensure any buggy @@ -409,6 +462,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT; + childregs->stackframe.type = FRAME_META_TYPE_FINAL; p->thread.cpu_context.x19 = (unsigned long)args->fn; p->thread.cpu_context.x20 = (unsigned long)args->fn_arg; @@ -422,7 +476,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * For the benefit of the unwinder, set up childregs->stackframe * as the final frame for the new task. */ - p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; + p->thread.cpu_context.fp = (unsigned long)&childregs->stackframe; ptrace_hw_copy_thread(p); @@ -442,7 +496,7 @@ static void tls_thread_switch(struct task_struct *next) if (is_compat_thread(task_thread_info(next))) write_sysreg(next->thread.uw.tp_value, tpidrro_el0); - else if (!arm64_kernel_unmapped_at_el0()) + else write_sysreg(0, tpidrro_el0); write_sysreg(*task_user_tls(next), tpidr_el0); @@ -487,6 +541,46 @@ static void entry_task_switch(struct task_struct *next) __this_cpu_write(__entry_task, next); } +#ifdef CONFIG_ARM64_GCS + +void gcs_preserve_current_state(void) +{ + current->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); +} + +static void gcs_thread_switch(struct task_struct *next) +{ + if (!system_supports_gcs()) + return; + + /* GCSPR_EL0 is always readable */ + gcs_preserve_current_state(); + write_sysreg_s(next->thread.gcspr_el0, SYS_GCSPR_EL0); + + if (current->thread.gcs_el0_mode != next->thread.gcs_el0_mode) + gcs_set_el0_mode(next); + + /* + * Ensure that GCS memory effects of the 'prev' thread are + * ordered before other memory accesses with release semantics + * (or preceded by a DMB) on the current PE. In addition, any + * memory accesses with acquire semantics (or succeeded by a + * DMB) are ordered before GCS memory effects of the 'next' + * thread. This will ensure that the GCS memory effects are + * visible to other PEs in case of migration. + */ + if (task_gcs_el0_enabled(current) || task_gcs_el0_enabled(next)) + gcsb_dsync(); +} + +#else + +static void gcs_thread_switch(struct task_struct *next) +{ +} + +#endif + /* * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0} @@ -583,6 +677,7 @@ struct task_struct *__switch_to(struct task_struct *prev, cntkctl_thread_switch(prev, next); ptrauth_thread_switch_user(next); permission_overlay_switch(next); + gcs_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index b756578aeaee..e4437f62a2cd 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -898,7 +899,11 @@ static int sve_set_common(struct task_struct *target, if (ret) goto out; - /* Actual VL set may be less than the user asked for: */ + /* + * Actual VL set may be different from what the user asked + * for, or we may have configured the _ONEXEC VL not the + * current VL: + */ vq = sve_vq_from_vl(task_get_vl(target, type)); /* Enter/exit streaming mode */ @@ -1125,7 +1130,11 @@ static int za_set(struct task_struct *target, if (ret) goto out; - /* Actual VL set may be less than the user asked for: */ + /* + * Actual VL set may be different from what the user asked + * for, or we may have configured the _ONEXEC rather than + * current VL: + */ vq = sve_vq_from_vl(task_get_sme_vl(target)); /* Ensure there is some SVE storage for streaming mode */ @@ -1473,6 +1482,52 @@ static int poe_set(struct task_struct *target, const struct } #endif +#ifdef CONFIG_ARM64_GCS +static int gcs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + if (target == current) + gcs_preserve_current_state(); + + user_gcs.features_enabled = target->thread.gcs_el0_mode; + user_gcs.features_locked = target->thread.gcs_el0_locked; + user_gcs.gcspr_el0 = target->thread.gcspr_el0; + + return membuf_write(&to, &user_gcs, sizeof(user_gcs)); +} + +static int gcs_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + struct user_gcs user_gcs; + + if (!system_supports_gcs()) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); + if (ret) + return ret; + + if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + target->thread.gcs_el0_mode = user_gcs.features_enabled; + target->thread.gcs_el0_locked = user_gcs.features_locked; + target->thread.gcspr_el0 = user_gcs.gcspr_el0; + + return 0; +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1503,7 +1558,10 @@ enum aarch64_regset { REGSET_TAGGED_ADDR_CTRL, #endif #ifdef CONFIG_ARM64_POE - REGSET_POE + REGSET_POE, +#endif +#ifdef CONFIG_ARM64_GCS + REGSET_GCS, #endif }; @@ -1674,6 +1732,16 @@ static const struct user_regset aarch64_regsets[] = { .set = poe_set, }, #endif +#ifdef CONFIG_ARM64_GCS + [REGSET_GCS] = { + .core_note_type = NT_ARM_GCS, + .n = sizeof(struct user_gcs) / sizeof(u64), + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = gcs_get, + .set = gcs_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c new file mode 100644 index 000000000000..3031f25c32ef --- /dev/null +++ b/arch/arm64/kernel/rsi.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct realm_config config; + +unsigned long prot_ns_shared; +EXPORT_SYMBOL(prot_ns_shared); + +DEFINE_STATIC_KEY_FALSE_RO(rsi_present); +EXPORT_SYMBOL(rsi_present); + +bool cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return is_realm_world(); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(cc_platform_has); + +static bool rsi_version_matches(void) +{ + unsigned long ver_lower, ver_higher; + unsigned long ret = rsi_request_version(RSI_ABI_VERSION, + &ver_lower, + &ver_higher); + + if (ret == SMCCC_RET_NOT_SUPPORTED) + return false; + + if (ret != RSI_SUCCESS) { + pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n", + RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR, + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower), + RSI_ABI_VERSION_GET_MAJOR(ver_higher), + RSI_ABI_VERSION_GET_MINOR(ver_higher)); + return false; + } + + pr_info("RME: Using RSI version %lu.%lu\n", + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower)); + + return true; +} + +static void __init arm64_rsi_setup_memory(void) +{ + u64 i; + phys_addr_t start, end; + + /* + * Iterate over the available memory ranges and convert the state to + * protected memory. We should take extra care to ensure that we DO NOT + * permit any "DESTROYED" pages to be converted to "RAM". + * + * panic() is used because if the attempt to switch the memory to + * protected has failed here, then future accesses to the memory are + * simply going to be reflected as a SEA (Synchronous External Abort) + * which we can't handle. Bailing out early prevents the guest limping + * on and dying later. + */ + for_each_mem_range(i, &start, &end) { + if (rsi_set_memory_range_protected_safe(start, end)) { + panic("Failed to set memory range to protected: %pa-%pa", + &start, &end); + } + } +} + +bool __arm64_is_protected_mmio(phys_addr_t base, size_t size) +{ + enum ripas ripas; + phys_addr_t end, top; + + /* Overflow ? */ + if (WARN_ON(base + size <= base)) + return false; + + end = ALIGN(base + size, RSI_GRANULE_SIZE); + base = ALIGN_DOWN(base, RSI_GRANULE_SIZE); + + while (base < end) { + if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top))) + break; + if (WARN_ON(top <= base)) + break; + if (ripas != RSI_RIPAS_DEV) + break; + base = top; + } + + return base >= end; +} +EXPORT_SYMBOL(__arm64_is_protected_mmio); + +static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot) +{ + if (__arm64_is_protected_mmio(phys, size)) + *prot = pgprot_encrypted(*prot); + else + *prot = pgprot_decrypted(*prot); + + return 0; +} + +void __init arm64_rsi_init(void) +{ + if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC) + return; + if (!rsi_version_matches()) + return; + if (WARN_ON(rsi_get_realm_config(&config))) + return; + prot_ns_shared = BIT(config.ipa_bits - 1); + + if (arm64_ioremap_prot_hook_register(realm_ioremap_hook)) + return; + + if (realm_register_memory_enc_ops()) + return; + + arm64_rsi_setup_memory(); + + static_branch_enable(&rsi_present); +} + diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b22d28ec8028..b5e1e306fa51 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -351,6 +352,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) else psci_acpi_init(); + arm64_rsi_init(); + init_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c7d311d8b92a..14ac6fdb872b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,15 @@ #include #include +#ifdef CONFIG_ARM64_GCS +#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK) + +static bool gcs_signal_cap_valid(u64 addr, u64 val) +{ + return val == GCS_SIGNAL_CAP(addr); +} +#endif + /* * Do a signal return; undo the signal stack. These are aligned to 128-bit. */ @@ -43,11 +53,6 @@ struct rt_sigframe { struct ucontext uc; }; -struct frame_record { - u64 fp; - u64 lr; -}; - struct rt_sigframe_user_layout { struct rt_sigframe __user *sigframe; struct frame_record __user *next_frame; @@ -57,6 +62,7 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; + unsigned long gcs_offset; unsigned long sve_offset; unsigned long tpidr2_offset; unsigned long za_offset; @@ -79,7 +85,6 @@ struct user_access_state { u64 por_el0; }; -#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) #define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) #define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) @@ -242,6 +247,8 @@ struct user_ctxs { u32 fpmr_size; struct poe_context __user *poe; u32 poe_size; + struct gcs_context __user *gcs; + u32 gcs_size; }; static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) @@ -689,6 +696,82 @@ extern int restore_zt_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SME */ +#ifdef CONFIG_ARM64_GCS + +static int preserve_gcs_context(struct gcs_context __user *ctx) +{ + int err = 0; + u64 gcspr = read_sysreg_s(SYS_GCSPR_EL0); + + /* + * If GCS is enabled we will add a cap token to the frame, + * include it in the GCSPR_EL0 we report to support stack + * switching via sigreturn if GCS is enabled. We do not allow + * enabling via sigreturn so the token is only relevant for + * threads with GCS enabled. + */ + if (task_gcs_el0_enabled(current)) + gcspr -= 8; + + __put_user_error(GCS_MAGIC, &ctx->head.magic, err); + __put_user_error(sizeof(*ctx), &ctx->head.size, err); + __put_user_error(gcspr, &ctx->gcspr, err); + __put_user_error(0, &ctx->reserved, err); + __put_user_error(current->thread.gcs_el0_mode, + &ctx->features_enabled, err); + + return err; +} + +static int restore_gcs_context(struct user_ctxs *user) +{ + u64 gcspr, enabled; + int err = 0; + + if (user->gcs_size != sizeof(*user->gcs)) + return -EINVAL; + + __get_user_error(gcspr, &user->gcs->gcspr, err); + __get_user_error(enabled, &user->gcs->features_enabled, err); + if (err) + return err; + + /* Don't allow unknown modes */ + if (enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + err = gcs_check_locked(current, enabled); + if (err != 0) + return err; + + /* Don't allow enabling */ + if (!task_gcs_el0_enabled(current) && + (enabled & PR_SHADOW_STACK_ENABLE)) + return -EINVAL; + + /* If we are disabling disable everything */ + if (!(enabled & PR_SHADOW_STACK_ENABLE)) + enabled = 0; + + current->thread.gcs_el0_mode = enabled; + + /* + * We let userspace set GCSPR_EL0 to anything here, we will + * validate later in gcs_restore_signal(). + */ + write_sysreg_s(gcspr, SYS_GCSPR_EL0); + + return 0; +} + +#else /* ! CONFIG_ARM64_GCS */ + +/* Turn any non-optimised out attempts to use these into a link error: */ +extern int preserve_gcs_context(void __user *ctx); +extern int restore_gcs_context(struct user_ctxs *user); + +#endif /* ! CONFIG_ARM64_GCS */ + static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) { @@ -707,6 +790,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->zt = NULL; user->fpmr = NULL; user->poe = NULL; + user->gcs = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -823,6 +907,17 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpmr_size = size; break; + case GCS_MAGIC: + if (!system_supports_gcs()) + goto invalid; + + if (user->gcs) + goto invalid; + + user->gcs = (struct gcs_context __user *)head; + user->gcs_size = size; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -943,6 +1038,9 @@ static int restore_sigframe(struct pt_regs *regs, err = restore_fpsimd_context(&user); } + if (err == 0 && system_supports_gcs() && user.gcs) + err = restore_gcs_context(&user); + if (err == 0 && system_supports_tpidr2() && user.tpidr2) err = restore_tpidr2_context(&user); @@ -961,6 +1059,58 @@ static int restore_sigframe(struct pt_regs *regs, return err; } +#ifdef CONFIG_ARM64_GCS +static int gcs_restore_signal(void) +{ + unsigned long __user *gcspr_el0; + u64 cap; + int ret; + + if (!system_supports_gcs()) + return 0; + + if (!(current->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE)) + return 0; + + gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0); + + /* + * Ensure that any changes to the GCS done via GCS operations + * are visible to the normal reads we do to validate the + * token. + */ + gcsb_dsync(); + + /* + * GCSPR_EL0 should be pointing at a capped GCS, read the cap. + * We don't enforce that this is in a GCS page, if it is not + * then faults will be generated on GCS operations - the main + * concern is to protect GCS pages. + */ + ret = copy_from_user(&cap, gcspr_el0, sizeof(cap)); + if (ret) + return -EFAULT; + + /* + * Check that the cap is the actual GCS before replacing it. + */ + if (!gcs_signal_cap_valid((u64)gcspr_el0, cap)) + return -EINVAL; + + /* Invalidate the token to prevent reuse */ + put_user_gcs(0, (__user void*)gcspr_el0, &ret); + if (ret != 0) + return -EFAULT; + + write_sysreg_s(gcspr_el0 + 1, SYS_GCSPR_EL0); + + return 0; +} + +#else +static int gcs_restore_signal(void) { return 0; } +#endif + SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); @@ -985,6 +1135,9 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_sigframe(regs, frame, &ua_state)) goto badframe; + if (gcs_restore_signal()) + goto badframe; + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -1024,6 +1177,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } +#ifdef CONFIG_ARM64_GCS + if (system_supports_gcs() && (add_all || current->thread.gcspr_el0)) { + err = sigframe_alloc(user, &user->gcs_offset, + sizeof(struct gcs_context)); + if (err) + return err; + } +#endif + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; @@ -1132,6 +1294,12 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } + if (system_supports_gcs() && err == 0 && user->gcs_offset) { + struct gcs_context __user *gcs_ctx = + apply_user_offset(user, user->gcs_offset); + err |= preserve_gcs_context(gcs_ctx); + } + /* Scalable Vector Extension state (including streaming), if present */ if ((system_supports_sve() || system_supports_sme()) && err == 0 && user->sve_offset) { @@ -1154,7 +1322,7 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_fpmr_context(fpmr_ctx); } - if (system_supports_poe() && err == 0 && user->poe_offset) { + if (system_supports_poe() && err == 0) { struct poe_context __user *poe_ctx = apply_user_offset(user, user->poe_offset); @@ -1249,7 +1417,48 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, return 0; } -static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, +#ifdef CONFIG_ARM64_GCS + +static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) +{ + unsigned long __user *gcspr_el0; + int ret = 0; + + if (!system_supports_gcs()) + return 0; + + if (!task_gcs_el0_enabled(current)) + return 0; + + /* + * We are entering a signal handler, current register state is + * active. + */ + gcspr_el0 = (unsigned long __user *)read_sysreg_s(SYS_GCSPR_EL0); + + /* + * Push a cap and the GCS entry for the trampoline onto the GCS. + */ + put_user_gcs((unsigned long)sigtramp, gcspr_el0 - 2, &ret); + put_user_gcs(GCS_SIGNAL_CAP(gcspr_el0 - 1), gcspr_el0 - 1, &ret); + if (ret != 0) + return ret; + + gcspr_el0 -= 2; + write_sysreg_s((unsigned long)gcspr_el0, SYS_GCSPR_EL0); + + return 0; +} +#else + +static int gcs_signal_entry(__sigrestore_t sigtramp, struct ksignal *ksig) +{ + return 0; +} + +#endif + +static int setup_return(struct pt_regs *regs, struct ksignal *ksig, struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; @@ -1257,7 +1466,7 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->regs[0] = usig; regs->sp = (unsigned long)user->sigframe; regs->regs[29] = (unsigned long)&user->next_frame->fp; - regs->pc = (unsigned long)ka->sa.sa_handler; + regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* * Signal delivery is a (wacky) indirect function call in @@ -1297,12 +1506,14 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, sme_smstop(); } - if (ka->sa.sa_flags & SA_RESTORER) - sigtramp = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + sigtramp = ksig->ka.sa.sa_restorer; else sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); regs->regs[30] = (unsigned long)sigtramp; + + return gcs_signal_entry(sigtramp, ksig); } static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, @@ -1327,7 +1538,7 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, err |= __save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigframe(&user, regs, set, &ua_state); if (err == 0) { - setup_return(regs, &ksig->ka, &user, usig); + err = setup_return(regs, ksig, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, &ksig->info); regs->regs[1] = (unsigned long)&frame->info; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 2729faaee4b4..caef85462acb 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -20,6 +20,23 @@ #include #include +enum kunwind_source { + KUNWIND_SOURCE_UNKNOWN, + KUNWIND_SOURCE_FRAME, + KUNWIND_SOURCE_CALLER, + KUNWIND_SOURCE_TASK, + KUNWIND_SOURCE_REGS_PC, + KUNWIND_SOURCE_REGS_LR, +}; + +union unwind_flags { + unsigned long all; + struct { + unsigned long fgraph : 1, + kretprobe : 1; + }; +}; + /* * Kernel unwind state * @@ -37,6 +54,9 @@ struct kunwind_state { #ifdef CONFIG_KRETPROBES struct llist_node *kr_cur; #endif + enum kunwind_source source; + union unwind_flags flags; + struct pt_regs *regs; }; static __always_inline void @@ -45,6 +65,9 @@ kunwind_init(struct kunwind_state *state, { unwind_init_common(&state->common); state->task = task; + state->source = KUNWIND_SOURCE_UNKNOWN; + state->flags.all = 0; + state->regs = NULL; } /* @@ -60,8 +83,10 @@ kunwind_init_from_regs(struct kunwind_state *state, { kunwind_init(state, current); + state->regs = regs; state->common.fp = regs->regs[29]; state->common.pc = regs->pc; + state->source = KUNWIND_SOURCE_REGS_PC; } /* @@ -79,6 +104,7 @@ kunwind_init_from_caller(struct kunwind_state *state) state->common.fp = (unsigned long)__builtin_frame_address(1); state->common.pc = (unsigned long)__builtin_return_address(0); + state->source = KUNWIND_SOURCE_CALLER; } /* @@ -99,6 +125,7 @@ kunwind_init_from_task(struct kunwind_state *state, state->common.fp = thread_saved_fp(task); state->common.pc = thread_saved_pc(task); + state->source = KUNWIND_SOURCE_TASK; } static __always_inline int @@ -114,6 +141,7 @@ kunwind_recover_return_address(struct kunwind_state *state) if (WARN_ON_ONCE(state->common.pc == orig_pc)) return -EINVAL; state->common.pc = orig_pc; + state->flags.fgraph = 1; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -124,12 +152,110 @@ kunwind_recover_return_address(struct kunwind_state *state) (void *)state->common.fp, &state->kr_cur); state->common.pc = orig_pc; + state->flags.kretprobe = 1; } #endif /* CONFIG_KRETPROBES */ return 0; } +static __always_inline +int kunwind_next_regs_pc(struct kunwind_state *state) +{ + struct stack_info *info; + unsigned long fp = state->common.fp; + struct pt_regs *regs; + + regs = container_of((u64 *)fp, struct pt_regs, stackframe.record.fp); + + info = unwind_find_stack(&state->common, (unsigned long)regs, sizeof(*regs)); + if (!info) + return -EINVAL; + + unwind_consume_stack(&state->common, info, (unsigned long)regs, + sizeof(*regs)); + + state->regs = regs; + state->common.pc = regs->pc; + state->common.fp = regs->regs[29]; + state->source = KUNWIND_SOURCE_REGS_PC; + return 0; +} + +static __always_inline int +kunwind_next_regs_lr(struct kunwind_state *state) +{ + /* + * The stack for the regs was consumed by kunwind_next_regs_pc(), so we + * cannot consume that again here, but we know the regs are safe to + * access. + */ + state->common.pc = state->regs->regs[30]; + state->common.fp = state->regs->regs[29]; + state->regs = NULL; + state->source = KUNWIND_SOURCE_REGS_LR; + + return 0; +} + +static __always_inline int +kunwind_next_frame_record_meta(struct kunwind_state *state) +{ + struct task_struct *tsk = state->task; + unsigned long fp = state->common.fp; + struct frame_record_meta *meta; + struct stack_info *info; + + info = unwind_find_stack(&state->common, fp, sizeof(*meta)); + if (!info) + return -EINVAL; + + meta = (struct frame_record_meta *)fp; + switch (READ_ONCE(meta->type)) { + case FRAME_META_TYPE_FINAL: + if (meta == &task_pt_regs(tsk)->stackframe) + return -ENOENT; + WARN_ON_ONCE(1); + return -EINVAL; + case FRAME_META_TYPE_PT_REGS: + return kunwind_next_regs_pc(state); + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} + +static __always_inline int +kunwind_next_frame_record(struct kunwind_state *state) +{ + unsigned long fp = state->common.fp; + struct frame_record *record; + struct stack_info *info; + unsigned long new_fp, new_pc; + + if (fp & 0x7) + return -EINVAL; + + info = unwind_find_stack(&state->common, fp, sizeof(*record)); + if (!info) + return -EINVAL; + + record = (struct frame_record *)fp; + new_fp = READ_ONCE(record->fp); + new_pc = READ_ONCE(record->lr); + + if (!new_fp && !new_pc) + return kunwind_next_frame_record_meta(state); + + unwind_consume_stack(&state->common, info, fp, sizeof(*record)); + + state->common.fp = new_fp; + state->common.pc = new_pc; + state->source = KUNWIND_SOURCE_FRAME; + + return 0; +} + /* * Unwind from one frame record (A) to the next frame record (B). * @@ -140,15 +266,24 @@ kunwind_recover_return_address(struct kunwind_state *state) static __always_inline int kunwind_next(struct kunwind_state *state) { - struct task_struct *tsk = state->task; - unsigned long fp = state->common.fp; int err; - /* Final frame; nothing to unwind */ - if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) - return -ENOENT; + state->flags.all = 0; + + switch (state->source) { + case KUNWIND_SOURCE_FRAME: + case KUNWIND_SOURCE_CALLER: + case KUNWIND_SOURCE_TASK: + case KUNWIND_SOURCE_REGS_LR: + err = kunwind_next_frame_record(state); + break; + case KUNWIND_SOURCE_REGS_PC: + err = kunwind_next_regs_lr(state); + break; + default: + err = -EINVAL; + } - err = unwind_next_frame_record(&state->common); if (err) return err; @@ -294,10 +429,33 @@ noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u6 kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL); } -static bool dump_backtrace_entry(void *arg, unsigned long where) +static const char *state_source_string(const struct kunwind_state *state) { + switch (state->source) { + case KUNWIND_SOURCE_FRAME: return NULL; + case KUNWIND_SOURCE_CALLER: return "C"; + case KUNWIND_SOURCE_TASK: return "T"; + case KUNWIND_SOURCE_REGS_PC: return "P"; + case KUNWIND_SOURCE_REGS_LR: return "L"; + default: return "U"; + } +} + +static bool dump_backtrace_entry(const struct kunwind_state *state, void *arg) +{ + const char *source = state_source_string(state); + union unwind_flags flags = state->flags; + bool has_info = source || flags.all; char *loglvl = arg; - printk("%s %pSb\n", loglvl, (void *)where); + + printk("%s %pSb%s%s%s%s%s\n", loglvl, + (void *)state->common.pc, + has_info ? " (" : "", + source ? source : "", + flags.fgraph ? "F" : "", + flags.kretprobe ? "K" : "", + has_info ? ")" : ""); + return true; } @@ -316,7 +474,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, return; printk("%sCall trace:\n", loglvl); - arch_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); + kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs); put_task_stack(tsk); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 563cbce11126..ee318f6df647 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -506,6 +506,16 @@ void do_el1_bti(struct pt_regs *regs, unsigned long esr) die("Oops - BTI", regs, esr); } +void do_el0_gcs(struct pt_regs *regs, unsigned long esr) +{ + force_signal_inject(SIGSEGV, SEGV_CPERR, regs->pc, 0); +} + +void do_el1_gcs(struct pt_regs *regs, unsigned long esr) +{ + die("Oops - GCS", regs, esr); +} + void do_el0_fpac(struct pt_regs *regs, unsigned long esr) { force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr); @@ -531,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr) user_fastforward_single_step(current); } +void do_el1_mops(struct pt_regs *regs, unsigned long esr) +{ + arm64_mops_reset_regs(®s->user_regs, esr); + + kernel_fastforward_single_step(regs); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ @@ -852,6 +869,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_MOPS] = "MOPS", [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", + [ESR_ELx_EC_GCS] = "Guarded Control Stack", [ESR_ELx_EC_SERROR] = "SError", [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 58d89d997d05..f84c71f04d9e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -287,6 +287,9 @@ SECTIONS __initdata_end = .; __init_end = .; + .data.rel.ro : { *(.data.rel.ro) } + ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!") + _data = .; _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) @@ -343,9 +346,6 @@ SECTIONS *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) } ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") - - .data.rel.ro : { *(.data.rel.ro) } - ASSERT(SIZEOF(.data.rel.ro) == 0, "Unexpected RELRO detected!") } #include "image-vars.h" diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 962f985977c2..e738a353b20e 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1055,6 +1055,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, void *maddr; unsigned long num_tags; struct page *page; + struct folio *folio; if (is_error_noslot_pfn(pfn)) { ret = -EFAULT; @@ -1068,10 +1069,13 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, ret = -EFAULT; goto out; } + folio = page_folio(page); maddr = page_address(page); if (!write) { - if (page_mte_tagged(page)) + if ((folio_test_hugetlb(folio) && + folio_test_hugetlb_mte_tagged(folio)) || + page_mte_tagged(page)) num_tags = mte_copy_tags_to_user(tags, maddr, MTE_GRANULES_PER_PAGE); else @@ -1085,14 +1089,20 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, * __set_ptes() in the VMM but still overriding the * tags, hence ignoring the return value. */ - try_page_mte_tagging(page); + if (folio_test_hugetlb(folio)) + folio_try_hugetlb_mte_tagging(folio); + else + try_page_mte_tagging(page); num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); /* uaccess failed, don't leave stale tags */ if (num_tags != MTE_GRANULES_PER_PAGE) mte_clear_page_tags(maddr); - set_page_mte_tagged(page); + if (folio_test_hugetlb(folio)) + folio_set_hugetlb_mte_tagged(folio); + else + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0f7658aefa1a..56d9a7f414fe 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1402,10 +1402,21 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, { unsigned long i, nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(pfn); + struct folio *folio = page_folio(page); if (!kvm_has_mte(kvm)) return; + if (folio_test_hugetlb(folio)) { + /* Hugetlb has MTE flags set on head page only */ + if (folio_try_hugetlb_mte_tagging(folio)) { + for (i = 0; i < nr_pages; i++, page++) + mte_clear_page_tags(page_address(page)); + folio_set_hugetlb_mte_tagged(folio); + } + return; + } + for (i = 0; i < nr_pages; i++, page++) { if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 13e6a2829116..8e882f479d98 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -13,7 +13,7 @@ endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -obj-$(CONFIG_CRC32) += crc32.o +obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index ebde40e7fa2b..bd6f7d5eb6eb 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -15,6 +15,19 @@ * x0 - dest */ SYM_FUNC_START(__pi_clear_page) +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + + mov x1, #PAGE_SIZE + setpn [x0]!, x1!, xzr + setmn [x0]!, x1!, xzr + seten [x0]!, x1!, xzr + ret +.Lno_mops: +#endif mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 6a56d7cf309d..e6374e7e5511 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,19 @@ * x1 - src */ SYM_FUNC_START(__pi_copy_page) +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +alternative_if_not ARM64_HAS_MOPS + b .Lno_mops +alternative_else_nop_endif + + mov x2, #PAGE_SIZE + cpypwn [x0]!, [x1]!, x2! + cpymwn [x0]!, [x1]!, x2! + cpyewn [x0]!, [x1]!, x2! + ret +.Lno_mops: +#endif ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c new file mode 100644 index 000000000000..295ae3e6b997 --- /dev/null +++ b/arch/arm64/lib/crc32-glue.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include +#include +#include +#include + +#include + +// The minimum input length to consider the 4-way interleaved code path +static const size_t min_len = 1024; + +asmlinkage u32 crc32_le_arm64(u32 crc, unsigned char const *p, size_t len); +asmlinkage u32 crc32c_le_arm64(u32 crc, unsigned char const *p, size_t len); +asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char const *p, size_t len); + +asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); +asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); +asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); + +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) + return crc32_le_base(crc, p, len); + + if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + kernel_neon_begin(); + crc = crc32_le_arm64_4way(crc, p, len); + kernel_neon_end(); + + p += round_down(len, 64); + len %= 64; + + if (!len) + return crc; + } + + return crc32_le_arm64(crc, p, len); +} + +u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +{ + if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) + return __crc32c_le_base(crc, p, len); + + if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + kernel_neon_begin(); + crc = crc32c_le_arm64_4way(crc, p, len); + kernel_neon_end(); + + p += round_down(len, 64); + len %= 64; + + if (!len) + return crc; + } + + return crc32c_le_arm64(crc, p, len); +} + +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) + return crc32_be_base(crc, p, len); + + if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + kernel_neon_begin(); + crc = crc32_be_arm64_4way(crc, p, len); + kernel_neon_end(); + + p += round_down(len, 64); + len %= 64; + + if (!len) + return crc; + } + + return crc32_be_arm64(crc, p, len); +} diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index 8340dccff46f..68825317460f 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -1,54 +1,60 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Accelerated CRC32(C) using AArch64 CRC instructions + * Accelerated CRC32(C) using AArch64 CRC and PMULL instructions * - * Copyright (C) 2016 - 2018 Linaro Ltd + * Copyright (C) 2016 - 2018 Linaro Ltd. + * Copyright (C) 2024 Google LLC + * + * Author: Ard Biesheuvel */ #include -#include #include - .arch armv8-a+crc + .cpu generic+crc+crypto - .macro byteorder, reg, be - .if \be -CPU_LE( rev \reg, \reg ) - .else -CPU_BE( rev \reg, \reg ) - .endif + .macro bitle, reg .endm - .macro byteorder16, reg, be - .if \be -CPU_LE( rev16 \reg, \reg ) - .else -CPU_BE( rev16 \reg, \reg ) - .endif - .endm - - .macro bitorder, reg, be - .if \be + .macro bitbe, reg rbit \reg, \reg - .endif .endm - .macro bitorder16, reg, be - .if \be - rbit \reg, \reg - lsr \reg, \reg, #16 - .endif + .macro bytele, reg .endm - .macro bitorder8, reg, be - .if \be + .macro bytebe, reg rbit \reg, \reg lsr \reg, \reg, #24 - .endif .endm - .macro __crc32, c, be=0 - bitorder w0, \be + .macro hwordle, reg +CPU_BE( rev16 \reg, \reg ) + .endm + + .macro hwordbe, reg +CPU_LE( rev \reg, \reg ) + rbit \reg, \reg +CPU_BE( lsr \reg, \reg, #16 ) + .endm + + .macro le, regs:vararg + .irp r, \regs +CPU_BE( rev \r, \r ) + .endr + .endm + + .macro be, regs:vararg + .irp r, \regs +CPU_LE( rev \r, \r ) + .endr + .irp r, \regs + rbit \r, \r + .endr + .endm + + .macro __crc32, c, order=le + bit\order w0 cmp x2, #16 b.lt 8f // less than 16 bytes @@ -61,14 +67,7 @@ CPU_BE( rev16 \reg, \reg ) add x8, x8, x1 add x1, x1, x7 ldp x5, x6, [x8] - byteorder x3, \be - byteorder x4, \be - byteorder x5, \be - byteorder x6, \be - bitorder x3, \be - bitorder x4, \be - bitorder x5, \be - bitorder x6, \be + \order x3, x4, x5, x6 tst x7, #8 crc32\c\()x w8, w0, x3 @@ -96,65 +95,268 @@ CPU_BE( rev16 \reg, \reg ) 32: ldp x3, x4, [x1], #32 sub x2, x2, #32 ldp x5, x6, [x1, #-16] - byteorder x3, \be - byteorder x4, \be - byteorder x5, \be - byteorder x6, \be - bitorder x3, \be - bitorder x4, \be - bitorder x5, \be - bitorder x6, \be + \order x3, x4, x5, x6 crc32\c\()x w0, w0, x3 crc32\c\()x w0, w0, x4 crc32\c\()x w0, w0, x5 crc32\c\()x w0, w0, x6 cbnz x2, 32b -0: bitorder w0, \be +0: bit\order w0 ret 8: tbz x2, #3, 4f ldr x3, [x1], #8 - byteorder x3, \be - bitorder x3, \be + \order x3 crc32\c\()x w0, w0, x3 4: tbz x2, #2, 2f ldr w3, [x1], #4 - byteorder w3, \be - bitorder w3, \be + \order w3 crc32\c\()w w0, w0, w3 2: tbz x2, #1, 1f ldrh w3, [x1], #2 - byteorder16 w3, \be - bitorder16 w3, \be + hword\order w3 crc32\c\()h w0, w0, w3 1: tbz x2, #0, 0f ldrb w3, [x1] - bitorder8 w3, \be + byte\order w3 crc32\c\()b w0, w0, w3 -0: bitorder w0, \be +0: bit\order w0 ret .endm .align 5 -SYM_FUNC_START(crc32_le) -alternative_if_not ARM64_HAS_CRC32 - b crc32_le_base -alternative_else_nop_endif +SYM_FUNC_START(crc32_le_arm64) __crc32 -SYM_FUNC_END(crc32_le) +SYM_FUNC_END(crc32_le_arm64) .align 5 -SYM_FUNC_START(__crc32c_le) -alternative_if_not ARM64_HAS_CRC32 - b __crc32c_le_base -alternative_else_nop_endif +SYM_FUNC_START(crc32c_le_arm64) __crc32 c -SYM_FUNC_END(__crc32c_le) +SYM_FUNC_END(crc32c_le_arm64) .align 5 -SYM_FUNC_START(crc32_be) -alternative_if_not ARM64_HAS_CRC32 - b crc32_be_base -alternative_else_nop_endif - __crc32 be=1 -SYM_FUNC_END(crc32_be) +SYM_FUNC_START(crc32_be_arm64) + __crc32 order=be +SYM_FUNC_END(crc32_be_arm64) + + in .req x1 + len .req x2 + + /* + * w0: input CRC at entry, output CRC at exit + * x1: pointer to input buffer + * x2: length of input in bytes + */ + .macro crc4way, insn, table, order=le + bit\order w0 + lsr len, len, #6 // len := # of 64-byte blocks + + /* Process up to 64 blocks of 64 bytes at a time */ +.La\@: mov x3, #64 + cmp len, #64 + csel x3, x3, len, hi // x3 := min(len, 64) + sub len, len, x3 + + /* Divide the input into 4 contiguous blocks */ + add x4, x3, x3, lsl #1 // x4 := 3 * x3 + add x7, in, x3, lsl #4 // x7 := in + 16 * x3 + add x8, in, x3, lsl #5 // x8 := in + 32 * x3 + add x9, in, x4, lsl #4 // x9 := in + 16 * x4 + + /* Load the folding coefficients from the lookup table */ + adr_l x5, \table - 12 // entry 0 omitted + add x5, x5, x4, lsl #2 // x5 += 12 * x3 + ldp s0, s1, [x5] + ldr s2, [x5, #8] + + /* Zero init partial CRCs for this iteration */ + mov w4, wzr + mov w5, wzr + mov w6, wzr + mov x17, xzr + +.Lb\@: sub x3, x3, #1 + \insn w6, w6, x17 + ldp x10, x11, [in], #16 + ldp x12, x13, [x7], #16 + ldp x14, x15, [x8], #16 + ldp x16, x17, [x9], #16 + + \order x10, x11, x12, x13, x14, x15, x16, x17 + + /* Apply the CRC transform to 4 16-byte blocks in parallel */ + \insn w0, w0, x10 + \insn w4, w4, x12 + \insn w5, w5, x14 + \insn w6, w6, x16 + \insn w0, w0, x11 + \insn w4, w4, x13 + \insn w5, w5, x15 + cbnz x3, .Lb\@ + + /* Combine the 4 partial results into w0 */ + mov v3.d[0], x0 + mov v4.d[0], x4 + mov v5.d[0], x5 + pmull v0.1q, v0.1d, v3.1d + pmull v1.1q, v1.1d, v4.1d + pmull v2.1q, v2.1d, v5.1d + eor v0.8b, v0.8b, v1.8b + eor v0.8b, v0.8b, v2.8b + mov x5, v0.d[0] + eor x5, x5, x17 + \insn w0, w6, x5 + + mov in, x9 + cbnz len, .La\@ + + bit\order w0 + ret + .endm + + .align 5 +SYM_FUNC_START(crc32c_le_arm64_4way) + crc4way crc32cx, .L0 +SYM_FUNC_END(crc32c_le_arm64_4way) + + .align 5 +SYM_FUNC_START(crc32_le_arm64_4way) + crc4way crc32x, .L1 +SYM_FUNC_END(crc32_le_arm64_4way) + + .align 5 +SYM_FUNC_START(crc32_be_arm64_4way) + crc4way crc32x, .L1, be +SYM_FUNC_END(crc32_be_arm64_4way) + + .section .rodata, "a", %progbits + .align 6 +.L0: .long 0xddc0152b, 0xba4fc28e, 0x493c7d27 + .long 0x0715ce53, 0x9e4addf8, 0xba4fc28e + .long 0xc96cfdc0, 0x0715ce53, 0xddc0152b + .long 0xab7aff2a, 0x0d3b6092, 0x9e4addf8 + .long 0x299847d5, 0x878a92a7, 0x39d3b296 + .long 0xb6dd949b, 0xab7aff2a, 0x0715ce53 + .long 0xa60ce07b, 0x83348832, 0x47db8317 + .long 0xd270f1a2, 0xb9e02b86, 0x0d3b6092 + .long 0x65863b64, 0xb6dd949b, 0xc96cfdc0 + .long 0xb3e32c28, 0xbac2fd7b, 0x878a92a7 + .long 0xf285651c, 0xce7f39f4, 0xdaece73e + .long 0x271d9844, 0xd270f1a2, 0xab7aff2a + .long 0x6cb08e5c, 0x2b3cac5d, 0x2162d385 + .long 0xcec3662e, 0x1b03397f, 0x83348832 + .long 0x8227bb8a, 0xb3e32c28, 0x299847d5 + .long 0xd7a4825c, 0xdd7e3b0c, 0xb9e02b86 + .long 0xf6076544, 0x10746f3c, 0x18b33a4e + .long 0x98d8d9cb, 0x271d9844, 0xb6dd949b + .long 0x57a3d037, 0x93a5f730, 0x78d9ccb7 + .long 0x3771e98f, 0x6b749fb2, 0xbac2fd7b + .long 0xe0ac139e, 0xcec3662e, 0xa60ce07b + .long 0x6f345e45, 0xe6fc4e6a, 0xce7f39f4 + .long 0xa2b73df1, 0xb0cd4768, 0x61d82e56 + .long 0x86d8e4d2, 0xd7a4825c, 0xd270f1a2 + .long 0xa90fd27a, 0x0167d312, 0xc619809d + .long 0xca6ef3ac, 0x26f6a60a, 0x2b3cac5d + .long 0x4597456a, 0x98d8d9cb, 0x65863b64 + .long 0xc9c8b782, 0x68bce87a, 0x1b03397f + .long 0x62ec6c6d, 0x6956fc3b, 0xebb883bd + .long 0x2342001e, 0x3771e98f, 0xb3e32c28 + .long 0xe8b6368b, 0x2178513a, 0x064f7f26 + .long 0x9ef68d35, 0x170076fa, 0xdd7e3b0c + .long 0x0b0bf8ca, 0x6f345e45, 0xf285651c + .long 0x02ee03b2, 0xff0dba97, 0x10746f3c + .long 0x135c83fd, 0xf872e54c, 0xc7a68855 + .long 0x00bcf5f6, 0x86d8e4d2, 0x271d9844 + .long 0x58ca5f00, 0x5bb8f1bc, 0x8e766a0c + .long 0xded288f8, 0xb3af077a, 0x93a5f730 + .long 0x37170390, 0xca6ef3ac, 0x6cb08e5c + .long 0xf48642e9, 0xdd66cbbb, 0x6b749fb2 + .long 0xb25b29f2, 0xe9e28eb4, 0x1393e203 + .long 0x45cddf4e, 0xc9c8b782, 0xcec3662e + .long 0xdfd94fb2, 0x93e106a4, 0x96c515bb + .long 0x021ac5ef, 0xd813b325, 0xe6fc4e6a + .long 0x8e1450f7, 0x2342001e, 0x8227bb8a + .long 0xe0cdcf86, 0x6d9a4957, 0xb0cd4768 + .long 0x613eee91, 0xd2c3ed1a, 0x39c7ff35 + .long 0xbedc6ba1, 0x9ef68d35, 0xd7a4825c + .long 0x0cd1526a, 0xf2271e60, 0x0ab3844b + .long 0xd6c3a807, 0x2664fd8b, 0x0167d312 + .long 0x1d31175f, 0x02ee03b2, 0xf6076544 + .long 0x4be7fd90, 0x363bd6b3, 0x26f6a60a + .long 0x6eeed1c9, 0x5fabe670, 0xa741c1bf + .long 0xb3a6da94, 0x00bcf5f6, 0x98d8d9cb + .long 0x2e7d11a7, 0x17f27698, 0x49c3cc9c + .long 0x889774e1, 0xaa7c7ad5, 0x68bce87a + .long 0x8a074012, 0xded288f8, 0x57a3d037 + .long 0xbd0bb25f, 0x6d390dec, 0x6956fc3b + .long 0x3be3c09b, 0x6353c1cc, 0x42d98888 + .long 0x465a4eee, 0xf48642e9, 0x3771e98f + .long 0x2e5f3c8c, 0xdd35bc8d, 0xb42ae3d9 + .long 0xa52f58ec, 0x9a5ede41, 0x2178513a + .long 0x47972100, 0x45cddf4e, 0xe0ac139e + .long 0x359674f7, 0xa51b6135, 0x170076fa + +.L1: .long 0xaf449247, 0x81256527, 0xccaa009e + .long 0x57c54819, 0x1d9513d7, 0x81256527 + .long 0x3f41287a, 0x57c54819, 0xaf449247 + .long 0xf5e48c85, 0x910eeec1, 0x1d9513d7 + .long 0x1f0c2cdd, 0x9026d5b1, 0xae0b5394 + .long 0x71d54a59, 0xf5e48c85, 0x57c54819 + .long 0x1c63267b, 0xfe807bbd, 0x0cbec0ed + .long 0xd31343ea, 0xe95c1271, 0x910eeec1 + .long 0xf9d9c7ee, 0x71d54a59, 0x3f41287a + .long 0x9ee62949, 0xcec97417, 0x9026d5b1 + .long 0xa55d1514, 0xf183c71b, 0xd1df2327 + .long 0x21aa2b26, 0xd31343ea, 0xf5e48c85 + .long 0x9d842b80, 0xeea395c4, 0x3c656ced + .long 0xd8110ff1, 0xcd669a40, 0xfe807bbd + .long 0x3f9e9356, 0x9ee62949, 0x1f0c2cdd + .long 0x1d6708a0, 0x0c30f51d, 0xe95c1271 + .long 0xef82aa68, 0xdb3935ea, 0xb918a347 + .long 0xd14bcc9b, 0x21aa2b26, 0x71d54a59 + .long 0x99cce860, 0x356d209f, 0xff6f2fc2 + .long 0xd8af8e46, 0xc352f6de, 0xcec97417 + .long 0xf1996890, 0xd8110ff1, 0x1c63267b + .long 0x631bc508, 0xe95c7216, 0xf183c71b + .long 0x8511c306, 0x8e031a19, 0x9b9bdbd0 + .long 0xdb3839f3, 0x1d6708a0, 0xd31343ea + .long 0x7a92fffb, 0xf7003835, 0x4470ac44 + .long 0x6ce68f2a, 0x00eba0c8, 0xeea395c4 + .long 0x4caaa263, 0xd14bcc9b, 0xf9d9c7ee + .long 0xb46f7cff, 0x9a1b53c8, 0xcd669a40 + .long 0x60290934, 0x81b6f443, 0x6d40f445 + .long 0x8e976a7d, 0xd8af8e46, 0x9ee62949 + .long 0xdcf5088a, 0x9dbdc100, 0x145575d5 + .long 0x1753ab84, 0xbbf2f6d6, 0x0c30f51d + .long 0x255b139e, 0x631bc508, 0xa55d1514 + .long 0xd784eaa8, 0xce26786c, 0xdb3935ea + .long 0x6d2c864a, 0x8068c345, 0x2586d334 + .long 0x02072e24, 0xdb3839f3, 0x21aa2b26 + .long 0x06689b0a, 0x5efd72f5, 0xe0575528 + .long 0x1e52f5ea, 0x4117915b, 0x356d209f + .long 0x1d3d1db6, 0x6ce68f2a, 0x9d842b80 + .long 0x3796455c, 0xb8e0e4a8, 0xc352f6de + .long 0xdf3a4eb3, 0xc55a2330, 0xb84ffa9c + .long 0x28ae0976, 0xb46f7cff, 0xd8110ff1 + .long 0x9764bc8d, 0xd7e7a22c, 0x712510f0 + .long 0x13a13e18, 0x3e9a43cd, 0xe95c7216 + .long 0xb8ee242e, 0x8e976a7d, 0x3f9e9356 + .long 0x0c540e7b, 0x753c81ff, 0x8e031a19 + .long 0x9924c781, 0xb9220208, 0x3edcde65 + .long 0x3954de39, 0x1753ab84, 0x1d6708a0 + .long 0xf32238b5, 0xbec81497, 0x9e70b943 + .long 0xbbd2cd2c, 0x0925d861, 0xf7003835 + .long 0xcc401304, 0xd784eaa8, 0xef82aa68 + .long 0x4987e684, 0x6044fbb0, 0x00eba0c8 + .long 0x3aa11427, 0x18fe3b4a, 0x87441142 + .long 0x297aad60, 0x02072e24, 0xd14bcc9b + .long 0xf60c5e51, 0x6ef6f487, 0x5b7fdd0a + .long 0x632d78c5, 0x3fc33de4, 0x9a1b53c8 + .long 0x25b8822a, 0x1e52f5ea, 0x99cce860 + .long 0xd4fc84bc, 0x1af62fb8, 0x81b6f443 + .long 0x5690aa32, 0xa91fdefb, 0x688a110e + .long 0x1357a093, 0x3796455c, 0xd8af8e46 + .long 0x798fdd33, 0xaaa18a37, 0x357b9517 + .long 0xc2815395, 0x54d42691, 0x9dbdc100 + .long 0x21cfc0f7, 0x28ae0976, 0xf1996890 + .long 0xa0decef3, 0x7b4aa8b7, 0xbbf2f6d6 diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 4ab48d49c451..9b99106fb95f 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,7 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START(__pi_memcpy) +SYM_FUNC_START_LOCAL(__pi_memcpy_generic) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -238,7 +238,24 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy_generic) + +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +SYM_FUNC_START(__pi_memcpy) +alternative_if_not ARM64_HAS_MOPS + b __pi_memcpy_generic +alternative_else_nop_endif + + mov dst, dstin + cpyp [dst]!, [src]!, count! + cpym [dst]!, [src]!, count! + cpye [dst]!, [src]!, count! + ret SYM_FUNC_END(__pi_memcpy) +#else +SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic) +#endif SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a5aebe82ad73..97157da65ec6 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -26,6 +26,7 @@ */ dstin .req x0 +val_x .req x1 val .req w1 count .req x2 tmp1 .req x3 @@ -42,7 +43,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START(__pi_memset) +SYM_FUNC_START_LOCAL(__pi_memset_generic) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -201,7 +202,24 @@ SYM_FUNC_START(__pi_memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret +SYM_FUNC_END(__pi_memset_generic) + +#ifdef CONFIG_AS_HAS_MOPS + .arch_extension mops +SYM_FUNC_START(__pi_memset) +alternative_if_not ARM64_HAS_MOPS + b __pi_memset_generic +alternative_else_nop_endif + + mov dst, dstin + setp [dst]!, count!, val_x + setm [dst]!, count!, val_x + sete [dst]!, count!, val_x + ret SYM_FUNC_END(__pi_memset) +#else +SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic) +#endif SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 2fc8c6dd0407..fc92170a8f37 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o +obj-$(CONFIG_ARM64_GCS) += gcs.o KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index a7bb20055ce0..87b3f1a25535 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -18,15 +18,40 @@ void copy_highpage(struct page *to, struct page *from) { void *kto = page_address(to); void *kfrom = page_address(from); + struct folio *src = page_folio(from); + struct folio *dst = page_folio(to); + unsigned int i, nr_pages; copy_page(kto, kfrom); if (kasan_hw_tags_enabled()) page_kasan_tag_reset(to); - if (system_supports_mte() && page_mte_tagged(from)) { + if (!system_supports_mte()) + return; + + if (folio_test_hugetlb(src) && + folio_test_hugetlb_mte_tagged(src)) { + if (!folio_try_hugetlb_mte_tagging(dst)) + return; + + /* + * Populate tags for all subpages. + * + * Don't assume the first page is head page since + * huge page copy may start from any subpage. + */ + nr_pages = folio_nr_pages(src); + for (i = 0; i < nr_pages; i++) { + kfrom = page_address(folio_page(src, i)); + kto = page_address(folio_page(dst, i)); + mte_copy_page_tags(kto, kfrom); + } + folio_set_hugetlb_mte_tagged(dst); + } else if (page_mte_tagged(from)) { /* It's a new page, shouldn't have been tagged yet */ WARN_ON_ONCE(!try_page_mte_tagging(to)); + mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8b281cf308b3..c2f89a678ac0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -504,6 +504,14 @@ static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, false); } +static bool is_gcs_fault(unsigned long esr) +{ + if (!esr_is_data_abort(esr)) + return false; + + return ESR_ELx_ISS2(esr) & ESR_ELx_GCS; +} + static bool is_el0_instruction_abort(unsigned long esr) { return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; @@ -518,6 +526,23 @@ static bool is_write_abort(unsigned long esr) return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); } +static bool is_invalid_gcs_access(struct vm_area_struct *vma, u64 esr) +{ + if (!system_supports_gcs()) + return false; + + if (unlikely(is_gcs_fault(esr))) { + /* GCS accesses must be performed on a GCS page */ + if (!(vma->vm_flags & VM_SHADOW_STACK)) + return true; + } else if (unlikely(vma->vm_flags & VM_SHADOW_STACK)) { + /* Only GCS operations can write to a GCS page */ + return esr_is_data_abort(esr) && is_write_abort(esr); + } + + return false; +} + static int __kprobes do_page_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { @@ -554,6 +579,14 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_gcs_fault(esr)) { + /* + * The GCS permission on a page implies both read and + * write so always handle any GCS fault as a write fault, + * we need to trigger CoW even for GCS reads. + */ + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; } else if (is_write_abort(esr)) { /* It was write fault */ vm_flags = VM_WRITE; @@ -587,6 +620,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, if (!vma) goto lock_mmap; + if (is_invalid_gcs_access(vma, esr)) { + vma_end_read(vma); + fault = 0; + si_code = SEGV_ACCERR; + goto bad_area; + } + if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); fault = 0; diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index de1e09d986ad..c5c5425791da 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -47,7 +47,8 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) if (pmd_none(pmd)) { ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; - __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + __pmd_populate(pmdp, __pa_symbol(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF); } } @@ -59,7 +60,8 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, pmd_t *pmdp; if (pud_none(pud)) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), + PUD_TYPE_TABLE | PUD_TABLE_AF); pmdp = pmd_offset_kimg(pudp, addr); do { @@ -86,7 +88,8 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, } if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), + P4D_TYPE_TABLE | P4D_TABLE_AF); pudp = pud_offset_kimg(p4dp, addr); early_fixmap_init_pmd(pudp, addr, end); diff --git a/arch/arm64/mm/gcs.c b/arch/arm64/mm/gcs.c new file mode 100644 index 000000000000..5c46ec527b1c --- /dev/null +++ b/arch/arm64/mm/gcs.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include +#include +#include + +static unsigned long alloc_gcs(unsigned long addr, unsigned long size) +{ + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + struct mm_struct *mm = current->mm; + unsigned long mapped_addr, unused; + + if (addr) + flags |= MAP_FIXED_NOREPLACE; + + mmap_write_lock(mm); + mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags, + VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); + mmap_write_unlock(mm); + + return mapped_addr; +} + +static unsigned long gcs_size(unsigned long size) +{ + if (size) + return PAGE_ALIGN(size); + + /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */ + size = PAGE_ALIGN(min_t(unsigned long long, + rlimit(RLIMIT_STACK) / 2, SZ_2G)); + return max(PAGE_SIZE, size); +} + +unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args) +{ + unsigned long addr, size; + + if (!system_supports_gcs()) + return 0; + + if (!task_gcs_el0_enabled(tsk)) + return 0; + + if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) { + tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); + return 0; + } + + size = args->stack_size / 2; + + size = gcs_size(size); + addr = alloc_gcs(0, size); + if (IS_ERR_VALUE(addr)) + return addr; + + tsk->thread.gcs_base = addr; + tsk->thread.gcs_size = size; + tsk->thread.gcspr_el0 = addr + size - sizeof(u64); + + return addr; +} + +SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) +{ + unsigned long alloc_size; + unsigned long __user *cap_ptr; + unsigned long cap_val; + int ret = 0; + int cap_offset; + + if (!system_supports_gcs()) + return -EOPNOTSUPP; + + if (flags & ~(SHADOW_STACK_SET_TOKEN | SHADOW_STACK_SET_MARKER)) + return -EINVAL; + + if (!PAGE_ALIGNED(addr)) + return -EINVAL; + + if (size == 8 || !IS_ALIGNED(size, 8)) + return -EINVAL; + + /* + * An overflow would result in attempting to write the restore token + * to the wrong location. Not catastrophic, but just return the right + * error code and block it. + */ + alloc_size = PAGE_ALIGN(size); + if (alloc_size < size) + return -EOVERFLOW; + + addr = alloc_gcs(addr, alloc_size); + if (IS_ERR_VALUE(addr)) + return addr; + + /* + * Put a cap token at the end of the allocated region so it + * can be switched to. + */ + if (flags & SHADOW_STACK_SET_TOKEN) { + /* Leave an extra empty frame as a top of stack marker? */ + if (flags & SHADOW_STACK_SET_MARKER) + cap_offset = 2; + else + cap_offset = 1; + + cap_ptr = (unsigned long __user *)(addr + size - + (cap_offset * sizeof(unsigned long))); + cap_val = GCS_CAP(cap_ptr); + + put_user_gcs(cap_val, cap_ptr, &ret); + if (ret != 0) { + vm_munmap(addr, size); + return -EFAULT; + } + + /* + * Ensure the new cap is ordered before standard + * memory accesses to the same location. + */ + gcsb_dsync(); + } + + return addr; +} + +/* + * Apply the GCS mode configured for the specified task to the + * hardware. + */ +void gcs_set_el0_mode(struct task_struct *task) +{ + u64 gcscre0_el1 = GCSCRE0_EL1_nTR; + + if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE) + gcscre0_el1 |= GCSCRE0_EL1_RVCHKEN | GCSCRE0_EL1_PCRSEL; + + if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_WRITE) + gcscre0_el1 |= GCSCRE0_EL1_STREn; + + if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_PUSH) + gcscre0_el1 |= GCSCRE0_EL1_PUSHMEn; + + write_sysreg_s(gcscre0_el1, SYS_GCSCRE0_EL1); +} + +void gcs_free(struct task_struct *task) +{ + if (!system_supports_gcs()) + return; + + /* + * When fork() with CLONE_VM fails, the child (tsk) already + * has a GCS allocated, and exit_thread() calls this function + * to free it. In this case the parent (current) and the + * child share the same mm struct. + */ + if (!task->mm || task->mm != current->mm) + return; + + if (task->thread.gcs_base) + vm_munmap(task->thread.gcs_base, task->thread.gcs_size); + + task->thread.gcspr_el0 = 0; + task->thread.gcs_base = 0; + task->thread.gcs_size = 0; +} + +int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg) +{ + unsigned long gcs, size; + int ret; + + if (!system_supports_gcs()) + return -EINVAL; + + if (is_compat_thread(task_thread_info(task))) + return -EINVAL; + + /* Reject unknown flags */ + if (arg & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) + return -EINVAL; + + ret = gcs_check_locked(task, arg); + if (ret != 0) + return ret; + + /* If we are enabling GCS then make sure we have a stack */ + if (arg & PR_SHADOW_STACK_ENABLE && + !task_gcs_el0_enabled(task)) { + /* Do not allow GCS to be reenabled */ + if (task->thread.gcs_base || task->thread.gcspr_el0) + return -EINVAL; + + if (task != current) + return -EBUSY; + + size = gcs_size(0); + gcs = alloc_gcs(0, size); + if (!gcs) + return -ENOMEM; + + task->thread.gcspr_el0 = gcs + size - sizeof(u64); + task->thread.gcs_base = gcs; + task->thread.gcs_size = size; + if (task == current) + write_sysreg_s(task->thread.gcspr_el0, + SYS_GCSPR_EL0); + } + + task->thread.gcs_el0_mode = arg; + if (task == current) + gcs_set_el0_mode(task); + + return 0; +} + +int arch_get_shadow_stack_status(struct task_struct *task, + unsigned long __user *arg) +{ + if (!system_supports_gcs()) + return -EINVAL; + + if (is_compat_thread(task_thread_info(task))) + return -EINVAL; + + return put_user(task->thread.gcs_el0_mode, arg); +} + +int arch_lock_shadow_stack_status(struct task_struct *task, + unsigned long arg) +{ + if (!system_supports_gcs()) + return -EINVAL; + + if (is_compat_thread(task_thread_info(task))) + return -EINVAL; + + /* + * We support locking unknown bits so applications can prevent + * any changes in a future proof manner. + */ + task->thread.gcs_el0_locked |= arg; + + return 0; +} diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 5f1e2103888b..3215adf48a1b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -361,14 +361,25 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) { size_t pagesize = 1UL << shift; - entry = pte_mkhuge(entry); - if (pagesize == CONT_PTE_SIZE) { - entry = pte_mkcont(entry); - } else if (pagesize == CONT_PMD_SIZE) { + switch (pagesize) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + entry = pud_pte(pud_mkhuge(pte_pud(entry))); + break; +#endif + case CONT_PMD_SIZE: entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); - } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + fallthrough; + case PMD_SIZE: + entry = pmd_pte(pmd_mkhuge(pte_pmd(entry))); + break; + case CONT_PTE_SIZE: + entry = pte_mkcont(entry); + break; + default: pr_warn("%s: unrecognized huge page size 0x%lx\n", __func__, pagesize); + break; } return entry; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 27a32ff15412..d21f67d67cf5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -366,8 +367,14 @@ void __init bootmem_init(void) */ void __init mem_init(void) { + unsigned int flags = SWIOTLB_VERBOSE; bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); + if (is_realm_world()) { + swiotlb = true; + flags |= SWIOTLB_FORCE; + } + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) { /* * If no bouncing needed for ZONE_DMA, reduce the swiotlb @@ -379,7 +386,8 @@ void __init mem_init(void) swiotlb = true; } - swiotlb_init(swiotlb, SWIOTLB_VERBOSE); + swiotlb_init(swiotlb, flags); + swiotlb_update_mem_attributes(); /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7e3ad97e27d8..07aeab8a7606 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -83,8 +83,15 @@ arch_initcall(adjust_protection_map); pgprot_t vm_get_page_prot(unsigned long vm_flags) { - pteval_t prot = pgprot_val(protection_map[vm_flags & + pteval_t prot; + + /* Short circuit GCS to avoid bloating the table. */ + if (system_supports_gcs() && (vm_flags & VM_SHADOW_STACK)) { + prot = _PAGE_GCS_RO; + } else { + prot = pgprot_val(protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]); + } if (vm_flags & VM_ARM64_BTI) prot |= PTE_GP; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e55b02fbddc8..e2739b69e11b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -119,7 +119,7 @@ static phys_addr_t __init early_pgtable_alloc(int shift) return phys; } -bool pgattr_change_is_safe(u64 old, u64 new) +bool pgattr_change_is_safe(pteval_t old, pteval_t new) { /* * The following mapping attributes may be updated in live @@ -201,7 +201,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { - pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; phys_addr_t pte_phys; if (flags & NO_EXEC_MAPPINGS) @@ -288,7 +288,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { - pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; phys_addr_t pmd_phys; if (flags & NO_EXEC_MAPPINGS) @@ -333,7 +333,7 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, pud_t *pudp; if (p4d_none(p4d)) { - p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF; phys_addr_t pud_phys; if (flags & NO_EXEC_MAPPINGS) @@ -391,7 +391,7 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t *p4dp; if (pgd_none(pgd)) { - pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN; + pgdval_t pgdval = PGD_TYPE_TABLE | PGD_TABLE_UXN | PGD_TABLE_AF; phys_addr_t p4d_phys; if (flags & NO_EXEC_MAPPINGS) diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0e270a1c51e6..6ae6ae806454 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -5,10 +5,12 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -23,14 +25,16 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED bool can_set_direct_map(void) { /* - * rodata_full and DEBUG_PAGEALLOC require linear map to be - * mapped at page granularity, so that it is possible to + * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear + * map to be mapped at page granularity, so that it is possible to * protect/unprotect single pages. * * KFENCE pool requires page-granular mapping if initialized late. + * + * Realms need to make pages shared/protected at page granularity. */ return rodata_full || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + arm64_kfence_can_set_direct_map() || is_realm_world(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -60,7 +64,13 @@ static int __change_memory_common(unsigned long start, unsigned long size, ret = apply_to_page_range(&init_mm, start, size, change_page_range, &data); - flush_tlb_kernel_range(start, start + size); + /* + * If the memory is being made valid without changing any other bits + * then a TLBI isn't required as a non-valid entry cannot be cached in + * the TLB. + */ + if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) + flush_tlb_kernel_range(start, start + size); return ret; } @@ -192,6 +202,86 @@ int set_direct_map_default_noflush(struct page *page) PAGE_SIZE, change_page_range, &data); } +static int __set_memory_enc_dec(unsigned long addr, + int numpages, + bool encrypt) +{ + unsigned long set_prot = 0, clear_prot = 0; + phys_addr_t start, end; + int ret; + + if (!is_realm_world()) + return 0; + + if (!__is_lm_address(addr)) + return -EINVAL; + + start = __virt_to_phys(addr); + end = start + numpages * PAGE_SIZE; + + if (encrypt) + clear_prot = PROT_NS_SHARED; + else + set_prot = PROT_NS_SHARED; + + /* + * Break the mapping before we make any changes to avoid stale TLB + * entries or Synchronous External Aborts caused by RIPAS_EMPTY + */ + ret = __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(set_prot), + __pgprot(clear_prot | PTE_VALID)); + + if (ret) + return ret; + + if (encrypt) + ret = rsi_set_memory_range_protected(start, end); + else + ret = rsi_set_memory_range_shared(start, end); + + if (ret) + return ret; + + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(PTE_VALID), + __pgprot(0)); +} + +static int realm_set_memory_encrypted(unsigned long addr, int numpages) +{ + int ret = __set_memory_enc_dec(addr, numpages, true); + + /* + * If the request to change state fails, then the only sensible cause + * of action for the caller is to leak the memory + */ + WARN(ret, "Failed to encrypt memory, %d pages will be leaked", + numpages); + + return ret; +} + +static int realm_set_memory_decrypted(unsigned long addr, int numpages) +{ + int ret = __set_memory_enc_dec(addr, numpages, false); + + WARN(ret, "Failed to decrypt memory, %d pages will be leaked", + numpages); + + return ret; +} + +static const struct arm64_mem_crypt_ops realm_crypt_ops = { + .encrypt = realm_set_memory_encrypted, + .decrypt = realm_set_memory_decrypted, +}; + +int realm_register_memory_enc_ops(void) +{ + return arm64_mem_crypt_ops_register(&realm_crypt_ops); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8abdc7fed321..b8edc5765441 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -465,10 +465,12 @@ SYM_FUNC_START(__cpu_setup) */ mair .req x17 tcr .req x16 + tcr2 .req x15 mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \ TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS + mov tcr2, xzr tcr_clear_errata_bits tcr, x9, x5 @@ -493,9 +495,14 @@ alternative_else_nop_endif * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update +#ifdef CONFIG_ARM64_HAFT + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT + b.lt 1f + orr tcr2, tcr2, TCR2_EL1x_HAFT +#endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair @@ -525,11 +532,16 @@ alternative_else_nop_endif #undef PTE_MAYBE_NG #undef PTE_MAYBE_SHARED - mov x0, TCR2_EL1x_PIE - msr REG_TCR2_EL1, x0 + orr tcr2, tcr2, TCR2_EL1x_PIE .Lskip_indirection: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, 1f + msr REG_TCR2_EL1, tcr2 +1: + /* * Prepare SCTLR */ @@ -538,4 +550,5 @@ alternative_else_nop_endif .unreq mair .unreq tcr + .unreq tcr2 SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 264c5f9b97d8..688fbe0271ca 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -80,10 +80,10 @@ static const struct ptdump_prot_bits pte_bits[] = { .set = "CON", .clear = " ", }, { - .mask = PTE_TABLE_BIT, - .val = PTE_TABLE_BIT, - .set = " ", - .clear = "BLK", + .mask = PTE_TABLE_BIT | PTE_VALID, + .val = PTE_VALID, + .set = "BLK", + .clear = " ", }, { .mask = PTE_UXN, .val = PTE_UXN, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index eedb5acc21ed..8dfb2fa51d12 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -29,6 +29,7 @@ HAS_EVT HAS_FPMR HAS_FGT HAS_FPSIMD +HAS_GCS HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 @@ -56,6 +57,7 @@ HAS_TLB_RANGE HAS_VA52 HAS_VIRT_HOST_EXTN HAS_WFXT +HAFT HW_DBM KVM_HVHE KVM_PROTECTED_MODE diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 8d637ac4b7c6..283279af932c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1238,6 +1238,7 @@ UnsignedEnum 11:8 PMUVer 0b0110 V3P5 0b0111 V3P7 0b1000 V3P8 + 0b1001 V3P9 0b1111 IMP_DEF EndEnum UnsignedEnum 7:4 TraceVer @@ -1648,6 +1649,8 @@ EndEnum UnsignedEnum 39:36 ETS 0b0000 NI 0b0001 IMP + 0b0010 ETS2 + 0b0011 ETS3 EndEnum UnsignedEnum 35:32 TWED 0b0000 NI @@ -1688,6 +1691,8 @@ UnsignedEnum 3:0 HAFDBS 0b0000 NI 0b0001 AF 0b0010 DBM + 0b0011 HAFT + 0b0100 HDBSS EndEnum EndSysreg @@ -2178,6 +2183,13 @@ Field 4 P Field 3:0 ALIGN EndSysreg +Sysreg PMUACR_EL1 3 0 9 14 4 +Res0 63:33 +Field 32 F0 +Field 31 C +Field 30:0 P +EndSysreg + Sysreg PMSELR_EL0 3 3 9 12 5 Res0 63:5 Field 4:0 SEL diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b9a7e8f39ac..ffa2bd12a78a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1954,6 +1954,7 @@ config X86_USER_SHADOW_STACK depends on AS_WRUSS depends on X86_64 select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_HAS_USER_SHADOW_STACK select X86_CET help Shadow stack protection is a hardware feature that detects function diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index 46cdc941f958..ac1e6277212b 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -5,9 +5,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #define MAP_ABOVE4G 0x80 /* only map above 4GB */ -/* Flags for map_shadow_stack(2) */ -#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ - #include #endif /* _ASM_X86_MMAN_H */ diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index c0e77c1c8e09..3561553eff8b 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -36,19 +36,25 @@ struct acpi_gtdt_descriptor { static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata; -static inline __init void *next_platform_timer(void *platform_timer) +static __init bool platform_timer_valid(void *platform_timer) { struct acpi_gtdt_header *gh = platform_timer; - platform_timer += gh->length; - if (platform_timer < acpi_gtdt_desc.gtdt_end) - return platform_timer; + return (platform_timer >= (void *)(acpi_gtdt_desc.gtdt + 1) && + platform_timer < acpi_gtdt_desc.gtdt_end && + gh->length != 0 && + platform_timer + gh->length <= acpi_gtdt_desc.gtdt_end); +} - return NULL; +static __init void *next_platform_timer(void *platform_timer) +{ + struct acpi_gtdt_header *gh = platform_timer; + + return platform_timer + gh->length; } #define for_each_platform_timer(_g) \ - for (_g = acpi_gtdt_desc.platform_timer; _g; \ + for (_g = acpi_gtdt_desc.platform_timer; platform_timer_valid(_g);\ _g = next_platform_timer(_g)) static inline bool is_timer_block(void *platform_timer) @@ -157,6 +163,7 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, { void *platform_timer; struct acpi_table_gtdt *gtdt; + int cnt = 0; gtdt = container_of(table, struct acpi_table_gtdt, header); acpi_gtdt_desc.gtdt = gtdt; @@ -176,12 +183,16 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, return 0; } - platform_timer = (void *)gtdt + gtdt->platform_timer_offset; - if (platform_timer < (void *)table + sizeof(struct acpi_table_gtdt)) { + acpi_gtdt_desc.platform_timer = (void *)gtdt + gtdt->platform_timer_offset; + for_each_platform_timer(platform_timer) + cnt++; + + if (cnt != gtdt->platform_timer_count) { + acpi_gtdt_desc.platform_timer = NULL; pr_err(FW_BUG "invalid timer data.\n"); return -EINVAL; } - acpi_gtdt_desc.platform_timer = platform_timer; + if (platform_timer_count) *platform_timer_count = gtdt->platform_timer_count; @@ -283,7 +294,7 @@ static int __init gtdt_parse_timer_block(struct acpi_gtdt_timer_block *block, if (frame->virt_irq > 0) acpi_unregister_gsi(gtdt_frame->virtual_timer_interrupt); frame->virt_irq = 0; - } while (i-- >= 0 && gtdt_frame--); + } while (i-- > 0 && gtdt_frame--); return -EINVAL; } @@ -352,7 +363,7 @@ static int __init gtdt_import_sbsa_gwdt(struct acpi_gtdt_watchdog *wd, } irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags); - res[2] = (struct resource)DEFINE_RES_IRQ(irq); + res[2] = DEFINE_RES_IRQ(irq); if (irq <= 0) { pr_warn("failed to map the Watchdog interrupt.\n"); nr_res--; diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index bab8ba64162f..4e268de351c4 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -284,4 +284,11 @@ config CXL_PMU If unsure say 'm'. +config MARVELL_PEM_PMU + tristate "MARVELL PEM PMU Support" + depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) + help + Enable support for PCIe Interface performance monitoring + on Marvell platform. + endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 8268f38e42c5..de71d2574857 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o +obj-$(CONFIG_MARVELL_PEM_PMU) += marvell_pem_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c index c6ff1bc7d336..99a0ef9817e0 100644 --- a/drivers/perf/alibaba_uncore_drw_pmu.c +++ b/drivers/perf/alibaba_uncore_drw_pmu.c @@ -782,7 +782,7 @@ static struct platform_driver ali_drw_pmu_driver = { .acpi_match_table = ali_drw_acpi_match, }, .probe = ali_drw_pmu_probe, - .remove_new = ali_drw_pmu_remove, + .remove = ali_drw_pmu_remove, }; static int __init ali_drw_pmu_init(void) diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c index 99cc791892bc..f33e9a456e85 100644 --- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c +++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c @@ -379,7 +379,7 @@ MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match); static struct platform_driver g12_ddr_pmu_driver = { .probe = g12_ddr_pmu_probe, - .remove_new = g12_ddr_pmu_remove, + .remove = g12_ddr_pmu_remove, .driver = { .name = "meson-g12-ddr-pmu", diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index c76bac668dea..1cc3214d6b6d 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1705,7 +1705,7 @@ static struct platform_driver cci_pmu_driver = { .suppress_bind_attrs = true, }, .probe = cci_pmu_probe, - .remove_new = cci_pmu_remove, + .remove = cci_pmu_remove, }; module_platform_driver(cci_pmu_driver); diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 5c66b9278862..d5fcea3d4328 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1529,7 +1529,7 @@ static struct platform_driver arm_ccn_driver = { .suppress_bind_attrs = true, }, .probe = arm_ccn_probe, - .remove_new = arm_ccn_remove, + .remove = arm_ccn_remove, }; static int __init arm_ccn_init(void) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 397a46410f7c..49bd811c6fd6 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2662,7 +2662,7 @@ static struct platform_driver arm_cmn_driver = { .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), }, .probe = arm_cmn_probe, - .remove_new = arm_cmn_remove, + .remove = arm_cmn_remove, }; static int __init arm_cmn_init(void) diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 2158a5975c90..81e8b97e9353 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -1282,7 +1282,7 @@ static struct platform_driver arm_cspmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_cspmu_device_probe, - .remove_new = arm_cspmu_device_remove, + .remove = arm_cspmu_device_remove, .id_table = arm_cspmu_id, }; diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 7e5f1d4fca0f..619cf937602f 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -750,7 +750,7 @@ static struct platform_driver dmc620_pmu_driver = { .suppress_bind_attrs = true, }, .probe = dmc620_pmu_device_probe, - .remove_new = dmc620_pmu_device_remove, + .remove = dmc620_pmu_device_remove, }; static int __init dmc620_pmu_init(void) diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index f2bd25a3470a..cb4fb59fe04b 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -787,7 +787,7 @@ static struct platform_driver dsu_pmu_driver = { .suppress_bind_attrs = true, }, .probe = dsu_pmu_device_probe, - .remove_new = dsu_pmu_device_remove, + .remove = dsu_pmu_device_remove, }; static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 0afe02f879b4..b5cc11abc962 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -770,18 +770,27 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) int i; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - /* Clear any unused counters to avoid leaking their contents */ - for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, - ARMPMU_MAX_HWEVENTS) { - if (i == ARMV8_PMU_CYCLE_IDX) - write_pmccntr(0); - else if (i == ARMV8_PMU_INSTR_IDX) - write_pmicntr(0); - else - armv8pmu_write_evcntr(i, 0); + if (is_pmuv3p9(cpu_pmu->pmuver)) { + u64 mask = 0; + for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_event_has_user_read(cpuc->events[i])) + mask |= BIT(i); + } + write_pmuacr(mask); + } else { + /* Clear any unused counters to avoid leaking their contents */ + for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, + ARMPMU_MAX_HWEVENTS) { + if (i == ARMV8_PMU_CYCLE_IDX) + write_pmccntr(0); + else if (i == ARMV8_PMU_INSTR_IDX) + write_pmicntr(0); + else + armv8pmu_write_evcntr(i, 0); + } } - update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); + update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_UEN); } static void armv8pmu_enable_event(struct perf_event *event) @@ -1364,6 +1373,8 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v3ae) PMUV3_INIT_SIMPLE(armv8_nvidia_carmel) PMUV3_INIT_SIMPLE(armv8_nvidia_denver) +PMUV3_INIT_SIMPLE(armv8_samsung_mongoose) + PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event) PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event) PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event) @@ -1409,6 +1420,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init}, {.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init}, {.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init}, + {.compatible = "samsung,mongoose-pmu", .data = armv8_samsung_mongoose_pmu_init}, {}, }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index d5fa92ba8373..b1510f660c7a 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -996,7 +996,7 @@ static struct platform_driver smmu_pmu_driver = { .suppress_bind_attrs = true, }, .probe = smmu_pmu_probe, - .remove_new = smmu_pmu_remove, + .remove = smmu_pmu_remove, .shutdown = smmu_pmu_shutdown, }; diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 3569050f9cf3..fd5b78732603 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -1280,7 +1280,7 @@ static struct platform_driver arm_spe_pmu_driver = { .suppress_bind_attrs = true, }, .probe = arm_spe_pmu_device_probe, - .remove_new = arm_spe_pmu_device_remove, + .remove = arm_spe_pmu_device_remove, }; static int __init arm_spe_pmu_init(void) diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index 43d68b69e630..bee4b5b52ec6 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -354,7 +354,7 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)), CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)), CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)), - /* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */ + /* CXL rev 3.0 Table 3-20 - D2H Response Encodings */ CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)), CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)), CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)), @@ -377,12 +377,14 @@ static struct attribute *cxl_pmu_event_attrs[] = { /* CXL rev 3.0 Table 13-5 directly lists these */ CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)), - /* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */ + /* CXL rev 3.1 Table 3-35 M2S Req Memory Opcodes */ CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdtee, CXL_PMU_GID_M2S_REQ, BIT(5)), + CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddatatee, CXL_PMU_GID_M2S_REQ, BIT(6)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)), CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)), @@ -404,10 +406,11 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)), CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)), - /* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */ + /* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */ CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpm, CXL_PMU_GID_S2M_NDR, BIT(3)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)), /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)), diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c index 4ca50f9b6dfe..9cbea9675e21 100644 --- a/drivers/perf/dwc_pcie_pmu.c +++ b/drivers/perf/dwc_pcie_pmu.c @@ -82,7 +82,6 @@ struct dwc_pcie_pmu { u16 ras_des_offset; u32 nr_lanes; - struct list_head pmu_node; struct hlist_node cpuhp_node; struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX]; int on_cpu; @@ -107,6 +106,7 @@ struct dwc_pcie_vendor_id { static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = { {.vendor_id = PCI_VENDOR_ID_ALIBABA }, + {.vendor_id = PCI_VENDOR_ID_AMPERE }, {.vendor_id = PCI_VENDOR_ID_QCOM }, {} /* terminator */ }; @@ -203,10 +203,10 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09), /* Group #1 */ - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22), - DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_pcie_tlp_data_payload, 0x20), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_pcie_tlp_data_payload, 0x21), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(tx_ccix_tlp_data_payload, 0x22), + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(rx_ccix_tlp_data_payload, 0x23), /* * Leave it to the user to specify the lane ID to avoid generating @@ -216,9 +216,9 @@ static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601), DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602), DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603), - DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604), - DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605), - DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nullified_tlp, 0x604), + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nullified_tlp, 0x605), + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tlp, 0x606), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701), DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702), diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 746b92330ca7..b989ffa95d69 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -846,7 +846,7 @@ static struct platform_driver imx_ddr_pmu_driver = { .suppress_bind_attrs = true, }, .probe = ddr_perf_probe, - .remove_new = ddr_perf_remove, + .remove = ddr_perf_remove, }; module_platform_driver(imx_ddr_pmu_driver); diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c index 69f920b1caf2..3c856d9a4e97 100644 --- a/drivers/perf/fsl_imx9_ddr_perf.c +++ b/drivers/perf/fsl_imx9_ddr_perf.c @@ -81,6 +81,10 @@ struct ddr_pmu { int id; }; +static const struct imx_ddr_devtype_data imx91_devtype_data = { + .identifier = "imx91", +}; + static const struct imx_ddr_devtype_data imx93_devtype_data = { .identifier = "imx93", }; @@ -100,6 +104,7 @@ static inline bool is_imx95(struct ddr_pmu *pmu) } static const struct of_device_id imx_ddr_pmu_dt_ids[] = { + { .compatible = "fsl,imx91-ddr-pmu", .data = &imx91_devtype_data }, { .compatible = "fsl,imx93-ddr-pmu", .data = &imx93_devtype_data }, { .compatible = "fsl,imx95-ddr-pmu", .data = &imx95_devtype_data }, { /* sentinel */ } @@ -848,7 +853,7 @@ static struct platform_driver imx_ddr_pmu_driver = { .suppress_bind_attrs = true, }, .probe = ddr_perf_probe, - .remove_new = ddr_perf_remove, + .remove = ddr_perf_remove, }; module_platform_driver(imx_ddr_pmu_driver); diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 0e923f94fa5b..3f3fb1de11f5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -358,7 +358,7 @@ static struct platform_driver hisi_cpa_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_cpa_pmu_probe, - .remove_new = hisi_cpa_pmu_remove, + .remove = hisi_cpa_pmu_remove, }; static int __init hisi_cpa_pmu_module_init(void) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index b804e3738113..a6ebf2ec99d3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -547,7 +547,7 @@ static struct platform_driver hisi_ddrc_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_ddrc_pmu_probe, - .remove_new = hisi_ddrc_pmu_remove, + .remove = hisi_ddrc_pmu_remove, }; static int __init hisi_ddrc_pmu_module_init(void) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 21e69b1cdd4d..32624872596f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -550,7 +550,7 @@ static struct platform_driver hisi_hha_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_hha_pmu_probe, - .remove_new = hisi_hha_pmu_remove, + .remove = hisi_hha_pmu_remove, }; static int __init hisi_hha_pmu_module_init(void) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 51ba76871097..c235b46ce873 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -584,7 +584,7 @@ static struct platform_driver hisi_l3c_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_l3c_pmu_probe, - .remove_new = hisi_l3c_pmu_remove, + .remove = hisi_l3c_pmu_remove, }; static int __init hisi_l3c_pmu_module_init(void) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 3cdb35c741f9..c0f5d7c73e06 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -538,7 +538,7 @@ static struct platform_driver hisi_pa_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_pa_pmu_probe, - .remove_new = hisi_pa_pmu_remove, + .remove = hisi_pa_pmu_remove, }; static int __init hisi_pa_pmu_module_init(void) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 765bbd61db26..c5f4764ee888 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -476,7 +476,7 @@ static struct platform_driver hisi_sllc_pmu_driver = { .suppress_bind_attrs = true, }, .probe = hisi_sllc_pmu_probe, - .remove_new = hisi_sllc_pmu_remove, + .remove = hisi_sllc_pmu_remove, }; static int __init hisi_sllc_pmu_module_init(void) diff --git a/drivers/perf/marvell_cn10k_ddr_pmu.c b/drivers/perf/marvell_cn10k_ddr_pmu.c index 94f1ebcd2a27..8860d9f687ae 100644 --- a/drivers/perf/marvell_cn10k_ddr_pmu.c +++ b/drivers/perf/marvell_cn10k_ddr_pmu.c @@ -732,7 +732,7 @@ static struct platform_driver cn10k_ddr_pmu_driver = { .suppress_bind_attrs = true, }, .probe = cn10k_ddr_perf_probe, - .remove_new = cn10k_ddr_perf_remove, + .remove = cn10k_ddr_perf_remove, }; static int __init cn10k_ddr_pmu_init(void) diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c index 9e635f355470..cda55ee35eee 100644 --- a/drivers/perf/marvell_cn10k_tad_pmu.c +++ b/drivers/perf/marvell_cn10k_tad_pmu.c @@ -383,7 +383,7 @@ static struct platform_driver tad_pmu_driver = { .suppress_bind_attrs = true, }, .probe = tad_pmu_probe, - .remove_new = tad_pmu_remove, + .remove = tad_pmu_remove, }; static int tad_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c new file mode 100644 index 000000000000..29fbcd1848e4 --- /dev/null +++ b/drivers/perf/marvell_pem_pmu.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Marvell PEM(PCIe RC) Performance Monitor Driver + * + * Copyright (C) 2024 Marvell. + */ + +#include +#include +#include +#include +#include +#include + +/* + * Each of these events maps to a free running 64 bit counter + * with no event control, but can be reset. + */ +enum pem_events { + IB_TLP_NPR, + IB_TLP_PR, + IB_TLP_CPL, + IB_TLP_DWORDS_NPR, + IB_TLP_DWORDS_PR, + IB_TLP_DWORDS_CPL, + IB_INFLIGHT, + IB_READS, + IB_REQ_NO_RO_NCB, + IB_REQ_NO_RO_EBUS, + OB_TLP_NPR, + OB_TLP_PR, + OB_TLP_CPL, + OB_TLP_DWORDS_NPR, + OB_TLP_DWORDS_PR, + OB_TLP_DWORDS_CPL, + OB_INFLIGHT, + OB_READS, + OB_MERGES_NPR, + OB_MERGES_PR, + OB_MERGES_CPL, + ATS_TRANS, + ATS_TRANS_LATENCY, + ATS_PRI, + ATS_PRI_LATENCY, + ATS_INV, + ATS_INV_LATENCY, + PEM_EVENTIDS_MAX +}; + +static u64 eventid_to_offset_table[] = { + [IB_TLP_NPR] = 0x0, + [IB_TLP_PR] = 0x8, + [IB_TLP_CPL] = 0x10, + [IB_TLP_DWORDS_NPR] = 0x100, + [IB_TLP_DWORDS_PR] = 0x108, + [IB_TLP_DWORDS_CPL] = 0x110, + [IB_INFLIGHT] = 0x200, + [IB_READS] = 0x300, + [IB_REQ_NO_RO_NCB] = 0x400, + [IB_REQ_NO_RO_EBUS] = 0x408, + [OB_TLP_NPR] = 0x500, + [OB_TLP_PR] = 0x508, + [OB_TLP_CPL] = 0x510, + [OB_TLP_DWORDS_NPR] = 0x600, + [OB_TLP_DWORDS_PR] = 0x608, + [OB_TLP_DWORDS_CPL] = 0x610, + [OB_INFLIGHT] = 0x700, + [OB_READS] = 0x800, + [OB_MERGES_NPR] = 0x900, + [OB_MERGES_PR] = 0x908, + [OB_MERGES_CPL] = 0x910, + [ATS_TRANS] = 0x2D18, + [ATS_TRANS_LATENCY] = 0x2D20, + [ATS_PRI] = 0x2D28, + [ATS_PRI_LATENCY] = 0x2D30, + [ATS_INV] = 0x2D38, + [ATS_INV_LATENCY] = 0x2D40, +}; + +struct pem_pmu { + struct pmu pmu; + void __iomem *base; + unsigned int cpu; + struct device *dev; + struct hlist_node node; +}; + +#define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu) + +static int eventid_to_offset(int eventid) +{ + return eventid_to_offset_table[eventid]; +} + +/* Events */ +static ssize_t pem_pmu_event_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define PEM_EVENT_ATTR(_name, _id) \ + (&((struct perf_pmu_events_attr[]) { \ + { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \ + .id = _id, } \ + })[0].attr.attr) + +static struct attribute *pem_perf_events_attrs[] = { + PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR), + PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR), + PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL), + PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR), + PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR), + PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL), + PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT), + PEM_EVENT_ATTR(ib_reads, IB_READS), + PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB), + PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS), + PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR), + PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR), + PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL), + PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR), + PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR), + PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL), + PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT), + PEM_EVENT_ATTR(ob_reads_partid, OB_READS), + PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR), + PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR), + PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL), + PEM_EVENT_ATTR(ats_trans, ATS_TRANS), + PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY), + PEM_EVENT_ATTR(ats_pri, ATS_PRI), + PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY), + PEM_EVENT_ATTR(ats_inv, ATS_INV), + PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY), + NULL +}; + +static struct attribute_group pem_perf_events_attr_group = { + .name = "events", + .attrs = pem_perf_events_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-5"); + +static struct attribute *pem_perf_format_attrs[] = { + &format_attr_event.attr, + NULL +}; + +static struct attribute_group pem_perf_format_attr_group = { + .name = "format", + .attrs = pem_perf_format_attrs, +}; + +/* cpumask */ +static ssize_t pem_perf_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pem_pmu *pmu = dev_get_drvdata(dev); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu)); +} + +static struct device_attribute pem_perf_cpumask_attr = + __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL); + +static struct attribute *pem_perf_cpumask_attrs[] = { + &pem_perf_cpumask_attr.attr, + NULL +}; + +static struct attribute_group pem_perf_cpumask_attr_group = { + .attrs = pem_perf_cpumask_attrs, +}; + +static const struct attribute_group *pem_perf_attr_groups[] = { + &pem_perf_events_attr_group, + &pem_perf_cpumask_attr_group, + &pem_perf_format_attr_group, + NULL +}; + +static int pem_perf_event_init(struct perf_event *event) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct perf_event *sibling; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (event->attr.config >= PEM_EVENTIDS_MAX) + return -EINVAL; + + if (is_sampling_event(event) || + event->attach_state & PERF_ATTACH_TASK) { + return -EOPNOTSUPP; + } + + if (event->cpu < 0) + return -EOPNOTSUPP; + + /* We must NOT create groups containing mixed PMUs */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) + return -EINVAL; + } + /* + * Set ownership of event to one CPU, same event can not be observed + * on multiple cpus at same time. + */ + event->cpu = pmu->cpu; + hwc->idx = -1; + return 0; +} + +static u64 pem_perf_read_counter(struct pem_pmu *pmu, + struct perf_event *event, int eventid) +{ + return readq_relaxed(pmu->base + eventid_to_offset(eventid)); +} + +static void pem_perf_event_update(struct perf_event *event) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 prev_count, new_count; + + do { + prev_count = local64_read(&hwc->prev_count); + new_count = pem_perf_read_counter(pmu, event, hwc->idx); + } while (local64_xchg(&hwc->prev_count, new_count) != prev_count); + + local64_add((new_count - prev_count), &event->count); +} + +static void pem_perf_event_start(struct perf_event *event, int flags) +{ + struct pem_pmu *pmu = to_pem_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int eventid = hwc->idx; + + /* + * All counters are free-running and associated with + * a fixed event to track in Hardware + */ + local64_set(&hwc->prev_count, + pem_perf_read_counter(pmu, event, eventid)); + + hwc->state = 0; +} + +static int pem_perf_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = event->attr.config; + if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX)) + return -EINVAL; + hwc->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + pem_perf_event_start(event, flags); + + return 0; +} + +static void pem_perf_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_UPDATE) + pem_perf_event_update(event); + + hwc->state |= PERF_HES_STOPPED; +} + +static void pem_perf_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + pem_perf_event_stop(event, PERF_EF_UPDATE); + hwc->idx = -1; +} + +static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node); + unsigned int target; + + if (cpu != pmu->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&pmu->pmu, cpu, target); + pmu->cpu = target; + return 0; +} + +static int pem_perf_probe(struct platform_device *pdev) +{ + struct pem_pmu *pem_pmu; + struct resource *res; + void __iomem *base; + char *name; + int ret; + + pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL); + if (!pem_pmu) + return -ENOMEM; + + pem_pmu->dev = &pdev->dev; + platform_set_drvdata(pdev, pem_pmu); + + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(base)) + return PTR_ERR(base); + + pem_pmu->base = base; + + pem_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .attr_groups = pem_perf_attr_groups, + .event_init = pem_perf_event_init, + .add = pem_perf_event_add, + .del = pem_perf_event_del, + .start = pem_perf_event_start, + .stop = pem_perf_event_stop, + .read = pem_perf_event_update, + }; + + /* Choose this cpu to collect perf data */ + pem_pmu->cpu = raw_smp_processor_id(); + + name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx", + res->start); + if (!name) + return -ENOMEM; + + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + + ret = perf_pmu_register(&pem_pmu->pmu, name, -1); + if (ret) + goto error; + + return 0; +error: + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + return ret; +} + +static void pem_perf_remove(struct platform_device *pdev) +{ + struct pem_pmu *pem_pmu = platform_get_drvdata(pdev); + + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + &pem_pmu->node); + + perf_pmu_unregister(&pem_pmu->pmu); +} + +#ifdef CONFIG_ACPI +static const struct acpi_device_id pem_pmu_acpi_match[] = { + {"MRVL000E", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match); +#endif + +static struct platform_driver pem_pmu_driver = { + .driver = { + .name = "pem-pmu", + .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match), + .suppress_bind_attrs = true, + }, + .probe = pem_perf_probe, + .remove = pem_perf_remove, +}; + +static int __init pem_pmu_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, + "perf/marvell/pem:online", NULL, + pem_pmu_offline_cpu); + if (ret) + return ret; + + ret = platform_driver_register(&pem_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE); + return ret; +} + +static void __exit pem_pmu_exit(void) +{ + platform_driver_unregister(&pem_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE); +} + +module_init(pem_pmu_init); +module_exit(pem_pmu_exit); + +MODULE_DESCRIPTION("Marvell PEM Perf driver"); +MODULE_AUTHOR("Gowthami Thiagarajan "); +MODULE_LICENSE("GPL"); diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 980e3051edd7..ea8c85729937 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -981,7 +981,7 @@ static struct platform_driver l2_cache_pmu_driver = { .suppress_bind_attrs = true, }, .probe = l2_cache_pmu_probe, - .remove_new = l2_cache_pmu_remove, + .remove = l2_cache_pmu_remove, }; static int __init register_l2_cache_pmu_driver(void) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index faf763d2c95c..cadd60221b8f 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -1010,7 +1010,7 @@ static struct platform_driver tx2_uncore_driver = { .suppress_bind_attrs = true, }, .probe = tx2_uncore_probe, - .remove_new = tx2_uncore_remove, + .remove = tx2_uncore_remove, }; static int __init tx2_uncore_driver_init(void) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index c01466ae1e3d..33b5497bdc06 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1943,7 +1943,7 @@ static void xgene_pmu_remove(struct platform_device *pdev) static struct platform_driver xgene_pmu_driver = { .probe = xgene_pmu_probe, - .remove_new = xgene_pmu_remove, + .remove = xgene_pmu_remove, .driver = { .name = "xgene-pmu", .of_match_table = xgene_pmu_of_match, diff --git a/drivers/virt/coco/Kconfig b/drivers/virt/coco/Kconfig index d9ff676bf48d..ff869d883d95 100644 --- a/drivers/virt/coco/Kconfig +++ b/drivers/virt/coco/Kconfig @@ -14,3 +14,5 @@ source "drivers/virt/coco/pkvm-guest/Kconfig" source "drivers/virt/coco/sev-guest/Kconfig" source "drivers/virt/coco/tdx-guest/Kconfig" + +source "drivers/virt/coco/arm-cca-guest/Kconfig" diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile index b69c30c1c720..c3d07cfc087e 100644 --- a/drivers/virt/coco/Makefile +++ b/drivers/virt/coco/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_EFI_SECRET) += efi_secret/ obj-$(CONFIG_ARM_PKVM_GUEST) += pkvm-guest/ obj-$(CONFIG_SEV_GUEST) += sev-guest/ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/ +obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/ diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig new file mode 100644 index 000000000000..9dd27c3ee215 --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/Kconfig @@ -0,0 +1,11 @@ +config ARM_CCA_GUEST + tristate "Arm CCA Guest driver" + depends on ARM64 + default m + select TSM_REPORTS + help + The driver provides userspace interface to request and + attestation report from the Realm Management Monitor(RMM). + + If you choose 'M' here, this module will be called + arm-cca-guest. diff --git a/drivers/virt/coco/arm-cca-guest/Makefile b/drivers/virt/coco/arm-cca-guest/Makefile new file mode 100644 index 000000000000..69eeba08e98a --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest.o diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c new file mode 100644 index 000000000000..488153879ec9 --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/** + * struct arm_cca_token_info - a descriptor for the token buffer. + * @challenge: Pointer to the challenge data + * @challenge_size: Size of the challenge data + * @granule: PA of the granule to which the token will be written + * @offset: Offset within granule to start of buffer in bytes + * @result: result of rsi_attestation_token_continue operation + */ +struct arm_cca_token_info { + void *challenge; + unsigned long challenge_size; + phys_addr_t granule; + unsigned long offset; + unsigned long result; +}; + +static void arm_cca_attestation_init(void *param) +{ + struct arm_cca_token_info *info; + + info = (struct arm_cca_token_info *)param; + + info->result = rsi_attestation_token_init(info->challenge, + info->challenge_size); +} + +/** + * arm_cca_attestation_continue - Retrieve the attestation token data. + * + * @param: pointer to the arm_cca_token_info + * + * Attestation token generation is a long running operation and therefore + * the token data may not be retrieved in a single call. Moreover, the + * token retrieval operation must be requested on the same CPU on which the + * attestation token generation was initialised. + * This helper function is therefore scheduled on the same CPU multiple + * times until the entire token data is retrieved. + */ +static void arm_cca_attestation_continue(void *param) +{ + unsigned long len; + unsigned long size; + struct arm_cca_token_info *info; + + info = (struct arm_cca_token_info *)param; + + size = RSI_GRANULE_SIZE - info->offset; + info->result = rsi_attestation_token_continue(info->granule, + info->offset, size, &len); + info->offset += len; +} + +/** + * arm_cca_report_new - Generate a new attestation token. + * + * @report: pointer to the TSM report context information. + * @data: pointer to the context specific data for this module. + * + * Initialise the attestation token generation using the challenge data + * passed in the TSM descriptor. Allocate memory for the attestation token + * and schedule calls to retrieve the attestation token on the same CPU + * on which the attestation token generation was initialised. + * + * The challenge data must be at least 32 bytes and no more than 64 bytes. If + * less than 64 bytes are provided it will be zero padded to 64 bytes. + * + * Return: + * * %0 - Attestation token generated successfully. + * * %-EINVAL - A parameter was not valid. + * * %-ENOMEM - Out of memory. + * * %-EFAULT - Failed to get IPA for memory page(s). + * * A negative status code as returned by smp_call_function_single(). + */ +static int arm_cca_report_new(struct tsm_report *report, void *data) +{ + int ret; + int cpu; + long max_size; + unsigned long token_size = 0; + struct arm_cca_token_info info; + void *buf; + u8 *token __free(kvfree) = NULL; + struct tsm_desc *desc = &report->desc; + + if (desc->inblob_len < 32 || desc->inblob_len > 64) + return -EINVAL; + + /* + * The attestation token 'init' and 'continue' calls must be + * performed on the same CPU. smp_call_function_single() is used + * instead of simply calling get_cpu() because of the need to + * allocate outblob based on the returned value from the 'init' + * call and that cannot be done in an atomic context. + */ + cpu = smp_processor_id(); + + info.challenge = desc->inblob; + info.challenge_size = desc->inblob_len; + + ret = smp_call_function_single(cpu, arm_cca_attestation_init, + &info, true); + if (ret) + return ret; + max_size = info.result; + + if (max_size <= 0) + return -EINVAL; + + /* Allocate outblob */ + token = kvzalloc(max_size, GFP_KERNEL); + if (!token) + return -ENOMEM; + + /* + * Since the outblob may not be physically contiguous, use a page + * to bounce the buffer from RMM. + */ + buf = alloc_pages_exact(RSI_GRANULE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Get the PA of the memory page(s) that were allocated */ + info.granule = (unsigned long)virt_to_phys(buf); + + /* Loop until the token is ready or there is an error */ + do { + /* Retrieve one RSI_GRANULE_SIZE data per loop iteration */ + info.offset = 0; + do { + /* + * Schedule a call to retrieve a sub-granule chunk + * of data per loop iteration. + */ + ret = smp_call_function_single(cpu, + arm_cca_attestation_continue, + (void *)&info, true); + if (ret != 0) { + token_size = 0; + goto exit_free_granule_page; + } + } while (info.result == RSI_INCOMPLETE && + info.offset < RSI_GRANULE_SIZE); + + if (info.result != RSI_SUCCESS) { + ret = -ENXIO; + token_size = 0; + goto exit_free_granule_page; + } + + /* + * Copy the retrieved token data from the granule + * to the token buffer, ensuring that the RMM doesn't + * overflow the buffer. + */ + if (WARN_ON(token_size + info.offset > max_size)) + break; + memcpy(&token[token_size], buf, info.offset); + token_size += info.offset; + } while (info.result == RSI_INCOMPLETE); + + report->outblob = no_free_ptr(token); +exit_free_granule_page: + report->outblob_len = token_size; + free_pages_exact(buf, RSI_GRANULE_SIZE); + return ret; +} + +static const struct tsm_ops arm_cca_tsm_ops = { + .name = KBUILD_MODNAME, + .report_new = arm_cca_report_new, +}; + +/** + * arm_cca_guest_init - Register with the Trusted Security Module (TSM) + * interface. + * + * Return: + * * %0 - Registered successfully with the TSM interface. + * * %-ENODEV - The execution context is not an Arm Realm. + * * %-EBUSY - Already registered. + */ +static int __init arm_cca_guest_init(void) +{ + int ret; + + if (!is_realm_world()) + return -ENODEV; + + ret = tsm_register(&arm_cca_tsm_ops, NULL); + if (ret < 0) + pr_err("Error %d registering with TSM\n", ret); + + return ret; +} +module_init(arm_cca_guest_init); + +/** + * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM) + * interface. + */ +static void __exit arm_cca_guest_exit(void) +{ + tsm_unregister(&arm_cca_tsm_ops); +} +module_exit(arm_cca_guest_exit); + +MODULE_AUTHOR("Sami Mujawar "); +MODULE_DESCRIPTION("Arm CCA Guest TSM Driver"); +MODULE_LICENSE("GPL"); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 06dc4a57ba78..3039a6b7aba4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -257,6 +257,12 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 4fe5bb9f1b1f..31d253bd3961 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,6 +623,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 8f0af4f62631..d5ef5469e4e6 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -80,6 +80,16 @@ #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 #endif +#ifdef COMPAT_ELF_HWCAP3 +#undef ELF_HWCAP3 +#define ELF_HWCAP3 COMPAT_ELF_HWCAP3 +#endif + +#ifdef COMPAT_ELF_HWCAP4 +#undef ELF_HWCAP4 +#define ELF_HWCAP4 COMPAT_ELF_HWCAP4 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2dea122e5b93..1bbf783b244a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -113,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap unwinds (may be important on powerpc * and ia64). */ - vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); + vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND | VM_MTE_ALLOWED); vma->vm_ops = &hugetlb_vm_ops; ret = seal_check_write(info->seals, vma); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7eb010de39fe..38a5a3e9cba2 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -978,7 +978,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR [ilog2(VM_UFFD_MINOR)] = "ui", #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ -#ifdef CONFIG_X86_USER_SHADOW_STACK +#ifdef CONFIG_ARCH_HAS_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2361ed4d2b15..61d9a66d1807 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -227,6 +227,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, diff --git a/include/linux/mm.h b/include/linux/mm.h index 61fff5d34ed5..feb5c8021bef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -329,12 +329,14 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS @@ -365,7 +367,17 @@ extern unsigned int kobjsize(const void *objp); * for more details on the guard size. */ # define VM_SHADOW_STACK VM_HIGH_ARCH_5 -#else +#endif + +#if defined(CONFIG_ARM64_GCS) +/* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_6 +#endif + +#ifndef VM_SHADOW_STACK # define VM_SHADOW_STACK VM_NONE #endif @@ -4220,4 +4232,8 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old) } #endif /* CONFIG_MEM_ALLOC_PROFILING */ +int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); +int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); +int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + #endif /* _LINUX_MM_H */ diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 3372c1b56486..d698efba28a2 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -257,6 +257,7 @@ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_UEN (1 << 4) /* Fine grained per counter access at EL0 */ /* Mask for writable bits */ #define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h index 57e8195d0b53..5e3d61ddbd8c 100644 --- a/include/uapi/asm-generic/mman.h +++ b/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b9935988da5c..9adc218fb6df 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -443,6 +443,7 @@ typedef struct elf64_shdr { #define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ #define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 35791791a879..557a3d2ac1d4 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -328,4 +328,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index ebe10c27a9f4..c4c701c6f0b4 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2319,6 +2319,21 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, return -EINVAL; } +int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) +{ + return -EINVAL; +} + +int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + +int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) #ifdef CONFIG_ANON_VMA_NAME @@ -2779,6 +2794,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_RISCV_SET_ICACHE_FLUSH_CTX: error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); break; + case PR_GET_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2); + break; + case PR_SET_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_set_shadow_stack_status(me, arg2); + break; + case PR_LOCK_SHADOW_STACK_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_lock_shadow_stack_status(me, arg2); + break; default: error = -EINVAL; break; diff --git a/mm/Kconfig b/mm/Kconfig index 33fa51d608dc..84000b016808 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1295,6 +1295,12 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. +config ARCH_HAS_USER_SHADOW_STACK + bool + help + The architecture has hardware support for userspace shadow call + stacks (eg, x86 CET, arm64 GCS or RISC-V Zicfiss). + source "mm/damon/Kconfig" endmenu diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 28b93cab8c0d..22029e60eff3 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index f2d6007a2b98..0029ed9c5c9a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -98,6 +98,17 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void gcs_sigill(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); +} + static void ilrcpc_sigill(void) { /* LDAPUR W0, [SP, #8] */ @@ -361,8 +372,8 @@ static void sveaes_sigill(void) static void sveb16b16_sigill(void) { - /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ - asm volatile(".inst 0xC1E41C00" : : : ); + /* BFADD Z0.H, Z0.H, Z0.H */ + asm volatile(".inst 0x65000000" : : : ); } static void svepmull_sigill(void) @@ -490,7 +501,7 @@ static const struct hwcap_data { .name = "F8DP2", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_F8DP2, - .cpuinfo = "f8dp4", + .cpuinfo = "f8dp2", .sigill_fn = f8dp2_sigill, }, { @@ -534,6 +545,14 @@ static const struct hwcap_data { .sigill_fn = fpmr_sigill, .sigill_reliable = true, }, + { + .name = "GCS", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_GCS, + .cpuinfo = "gcs", + .sigill_fn = gcs_sigill, + .sigill_reliable = true, + }, { .name = "JSCVT", .at_hwcap = AT_HWCAP, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d704511a0955..5ec9a18ec802 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -81,7 +81,7 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t s */ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) { if (gpr_in[i] != gpr_out[i]) { - ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n", + ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %lx != %lx\n", cfg->name, sve_vl, i, gpr_in[i], gpr_out[i]); errors++; @@ -112,7 +112,7 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, if (!sve_vl && !(svcr & SVCR_SM_MASK)) { for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { if (fpr_in[i] != fpr_out[i]) { - ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", + ksft_print_msg("%s Q%d/%d mismatch %lx != %lx\n", cfg->name, i / 2, i % 2, fpr_in[i], fpr_out[i]); @@ -294,13 +294,13 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, int errors = 0; if (svcr_out & SVCR_SM_MASK) { - ksft_print_msg("%s Still in SM, SVCR %llx\n", + ksft_print_msg("%s Still in SM, SVCR %lx\n", cfg->name, svcr_out); errors++; } if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { - ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", + ksft_print_msg("%s PSTATE.ZA changed, SVCR %lx != %lx\n", cfg->name, svcr_in, svcr_out); errors++; } diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 9b38a0da407d..1fc46a5642c2 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -65,4 +65,19 @@ endfunction bl puts .endm +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +.macro enable_gcs + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 +.endm + #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S index 7ad59d92d02b..82c3ab70e1cf 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S +++ b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S @@ -15,10 +15,7 @@ // Load and save register values with pauses for ptrace // -// x0 - SVE in use -// x1 - SME in use -// x2 - SME2 in use -// x3 - FA64 supported +// x0 - HAVE_ flags indicating which features are in use .globl load_and_save load_and_save: @@ -44,7 +41,7 @@ load_and_save: ldp q30, q31, [x7, #16 * 30] // SME? - cbz x1, check_sve_in + tbz x0, #HAVE_SME_SHIFT, check_sve_in adrp x7, svcr_in ldr x7, [x7, :lo12:svcr_in] @@ -64,7 +61,7 @@ load_and_save: bne 1b // ZT? - cbz x2, check_sm_in + tbz x0, #HAVE_SME2_SHIFT, check_sm_in adrp x6, zt_in add x6, x6, :lo12:zt_in _ldr_zt 6 @@ -72,12 +69,14 @@ load_and_save: // In streaming mode? check_sm_in: tbz x7, #SVCR_SM_SHIFT, check_sve_in - mov x4, x3 // Load FFR if we have FA64 + + // Load FFR if we have FA64 + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b load_sve // SVE? check_sve_in: - cbz x0, wait_for_writes + tbz x0, #HAVE_SVE_SHIFT, check_fpmr_in mov x4, #1 load_sve: @@ -142,6 +141,13 @@ load_sve: ldr p14, [x7, #14, MUL VL] ldr p15, [x7, #15, MUL VL] + // This has to come after we set PSTATE.SM +check_fpmr_in: + tbz x0, #HAVE_FPMR_SHIFT, wait_for_writes + adrp x7, fpmr_in + ldr x7, [x7, :lo12:fpmr_in] + msr REG_FPMR, x7 + wait_for_writes: // Wait for the parent brk #0 @@ -165,8 +171,13 @@ wait_for_writes: stp q28, q29, [x7, #16 * 28] stp q30, q31, [x7, #16 * 30] - // SME? - cbz x1, check_sve_out + tbz x0, #HAVE_FPMR_SHIFT, check_sme_out + mrs x7, REG_FPMR + adrp x6, fpmr_out + str x7, [x6, :lo12:fpmr_out] + +check_sme_out: + tbz x0, #HAVE_SME_SHIFT, check_sve_out rdsvl 11, 1 adrp x6, sme_vl_out @@ -187,7 +198,7 @@ wait_for_writes: bne 1b // ZT? - cbz x2, check_sm_out + tbz x0, #HAVE_SME2_SHIFT, check_sm_out adrp x6, zt_out add x6, x6, :lo12:zt_out _str_zt 6 @@ -195,12 +206,14 @@ wait_for_writes: // In streaming mode? check_sm_out: tbz x7, #SVCR_SM_SHIFT, check_sve_out - mov x4, x3 // FFR? + + // Do we have FA64 and FFR? + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b read_sve // SVE? check_sve_out: - cbz x0, wait_for_reads + tbz x0, #HAVE_SVE_SHIFT, wait_for_reads mov x4, #1 rdvl x7, #1 @@ -271,7 +284,7 @@ wait_for_reads: brk #0 // Ensure we don't leave ourselves in streaming mode - cbz x1, out + tbz x0, #HAVE_SME_SHIFT, out msr S3_3_C4_C2_2, xzr out: diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index c7ceafe5f471..4930e03a7b99 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -31,6 +31,14 @@ #include "fp-ptrace.h" +#include + +#define FPMR_LSCALE2_MASK GENMASK(37, 32) +#define FPMR_NSCALE_MASK GENMASK(31, 24) +#define FPMR_LSCALE_MASK GENMASK(22, 16) +#define FPMR_OSC_MASK GENMASK(15, 15) +#define FPMR_OSM_MASK GENMASK(14, 14) + /* and don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -48,11 +56,22 @@ #define NT_ARM_ZT 0x40d #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e +#endif + #define ARCH_VQ_MAX 256 /* VL 128..2048 in powers of 2 */ #define MAX_NUM_VLS 5 +/* + * FPMR bits we can set without doing feature checks to see if values + * are valid. + */ +#define FPMR_SAFE_BITS (FPMR_LSCALE2_MASK | FPMR_NSCALE_MASK | \ + FPMR_LSCALE_MASK | FPMR_OSC_MASK | FPMR_OSM_MASK) + #define NUM_FPR 32 __uint128_t v_in[NUM_FPR]; __uint128_t v_expected[NUM_FPR]; @@ -78,11 +97,13 @@ char zt_in[ZT_SIG_REG_BYTES]; char zt_expected[ZT_SIG_REG_BYTES]; char zt_out[ZT_SIG_REG_BYTES]; +uint64_t fpmr_in, fpmr_expected, fpmr_out; + uint64_t sve_vl_out; uint64_t sme_vl_out; uint64_t svcr_in, svcr_expected, svcr_out; -void load_and_save(int sve, int sme, int sme2, int fa64); +void load_and_save(int flags); static bool got_alarm; @@ -128,6 +149,11 @@ static bool fa64_supported(void) return getauxval(AT_HWCAP2) & HWCAP2_SME_FA64; } +static bool fpmr_supported(void) +{ + return getauxval(AT_HWCAP2) & HWCAP2_FPMR; +} + static bool compare_buffer(const char *name, void *out, void *expected, size_t size) { @@ -198,7 +224,7 @@ static int vl_expected(struct test_config *config) static void run_child(struct test_config *config) { - int ret; + int ret, flags; /* Let the parent attach to us */ ret = ptrace(PTRACE_TRACEME, 0, 0, 0); @@ -224,8 +250,19 @@ static void run_child(struct test_config *config) } /* Load values and wait for the parent */ - load_and_save(sve_supported(), sme_supported(), - sme2_supported(), fa64_supported()); + flags = 0; + if (sve_supported()) + flags |= HAVE_SVE; + if (sme_supported()) + flags |= HAVE_SME; + if (sme2_supported()) + flags |= HAVE_SME2; + if (fa64_supported()) + flags |= HAVE_FA64; + if (fpmr_supported()) + flags |= HAVE_FPMR; + + load_and_save(flags); exit(0); } @@ -312,6 +349,14 @@ static void read_child_regs(pid_t child) iov_child.iov_len = sizeof(zt_out); read_one_child_regs(child, "ZT", &iov_parent, &iov_child); } + + if (fpmr_supported()) { + iov_parent.iov_base = &fpmr_out; + iov_parent.iov_len = sizeof(fpmr_out); + iov_child.iov_base = &fpmr_out; + iov_child.iov_len = sizeof(fpmr_out); + read_one_child_regs(child, "FPMR", &iov_parent, &iov_child); + } } static bool continue_breakpoint(pid_t child, @@ -586,6 +631,26 @@ static bool check_ptrace_values_zt(pid_t child, struct test_config *config) return compare_buffer("initial ZT", buf, zt_in, ZT_SIG_REG_BYTES); } +static bool check_ptrace_values_fpmr(pid_t child, struct test_config *config) +{ + uint64_t val; + struct iovec iov; + int ret; + + if (!fpmr_supported()) + return true; + + iov.iov_base = &val; + iov.iov_len = sizeof(val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read initial FPMR: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return compare_buffer("initial FPMR", &val, &fpmr_in, sizeof(val)); +} static bool check_ptrace_values(pid_t child, struct test_config *config) { @@ -620,6 +685,9 @@ static bool check_ptrace_values(pid_t child, struct test_config *config) if (!check_ptrace_values_zt(child, config)) pass = false; + if (!check_ptrace_values_fpmr(child, config)) + pass = false; + return pass; } @@ -823,11 +891,18 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); + bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; + if (sme_supported() && + (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) + sm_change = true; + else + sm_change = false; + fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -874,6 +949,21 @@ static void set_initial_values(struct test_config *config) memset(zt_expected, 0, ZT_SIG_REG_BYTES); memset(zt_out, 0, sizeof(zt_out)); } + + if (fpmr_supported()) { + fill_random(&fpmr_in, sizeof(fpmr_in)); + fpmr_in &= FPMR_SAFE_BITS; + + /* Entering or exiting streaming mode clears FPMR */ + if (sm_change) + fpmr_expected = 0; + else + fpmr_expected = fpmr_in; + } else { + fpmr_in = 0; + fpmr_expected = 0; + fpmr_out = 0; + } } static bool check_memory_values(struct test_config *config) @@ -924,6 +1014,12 @@ static bool check_memory_values(struct test_config *config) if (!compare_buffer("saved ZT", zt_out, zt_expected, ZT_SIG_REG_BYTES)) pass = false; + if (fpmr_out != fpmr_expected) { + ksft_print_msg("Mismatch in saved FPMR: %lx != %lx\n", + fpmr_out, fpmr_expected); + pass = false; + } + return pass; } @@ -1001,6 +1097,36 @@ static void fpsimd_write(pid_t child, struct test_config *test_config) strerror(errno), errno); } +static bool fpmr_write_supported(struct test_config *config) +{ + if (!fpmr_supported()) + return false; + + if (!sve_sme_same(config)) + return false; + + return true; +} + +static void fpmr_write_expected(struct test_config *config) +{ + fill_random(&fpmr_expected, sizeof(fpmr_expected)); + fpmr_expected &= FPMR_SAFE_BITS; +} + +static void fpmr_write(pid_t child, struct test_config *config) +{ + struct iovec iov; + int ret; + + iov.iov_len = sizeof(fpmr_expected); + iov.iov_base = &fpmr_expected; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) + ksft_print_msg("Failed to write FPMR: %s (%d)\n", + strerror(errno), errno); +} + static void sve_write_expected(struct test_config *config) { int vl = vl_expected(config); @@ -1069,21 +1195,19 @@ static void sve_write(pid_t child, struct test_config *config) static bool za_write_supported(struct test_config *config) { - if (config->svcr_expected & SVCR_SM) { - if (!(config->svcr_in & SVCR_SM)) + if (config->sme_vl_in != config->sme_vl_expected) { + /* Changing the SME VL exits streaming mode. */ + if (config->svcr_expected & SVCR_SM) { return false; - - /* Changing the SME VL exits streaming mode */ - if (config->sme_vl_in != config->sme_vl_expected) { + } + } else { + /* Otherwise we can't change streaming mode */ + if ((config->svcr_in & SVCR_SM) != + (config->svcr_expected & SVCR_SM)) { return false; } } - /* Can't disable SM outside a VL change */ - if ((config->svcr_in & SVCR_SM) && - !(config->svcr_expected & SVCR_SM)) - return false; - return true; } @@ -1259,6 +1383,12 @@ static struct test_definition base_test_defs[] = { .set_expected_values = fpsimd_write_expected, .modify_values = fpsimd_write, }, + { + .name = "FPMR write", + .supported = fpmr_write_supported, + .set_expected_values = fpmr_write_expected, + .modify_values = fpmr_write, + }, }; static struct test_definition sve_test_defs[] = { @@ -1468,6 +1598,9 @@ int main(void) if (fa64_supported()) ksft_print_msg("FA64 supported\n"); + if (fpmr_supported()) + ksft_print_msg("FPMR supported\n"); + ksft_set_plan(tests); /* Get signal handers ready before we start any children */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.h b/tools/testing/selftests/arm64/fp/fp-ptrace.h index db4f2c4d750c..c06919aaf1f7 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.h +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.h @@ -10,4 +10,16 @@ #define SVCR_SM (1 << SVCR_SM_SHIFT) #define SVCR_ZA (1 << SVCR_ZA_SHIFT) +#define HAVE_SVE_SHIFT 0 +#define HAVE_SME_SHIFT 1 +#define HAVE_SME2_SHIFT 2 +#define HAVE_FA64_SHIFT 3 +#define HAVE_FPMR_SHIFT 4 + +#define HAVE_SVE (1 << HAVE_SVE_SHIFT) +#define HAVE_SME (1 << HAVE_SME_SHIFT) +#define HAVE_SME2 (1 << HAVE_SME2_SHIFT) +#define HAVE_FA64 (1 << HAVE_FA64_SHIFT) +#define HAVE_FPMR (1 << HAVE_FPMR_SHIFT) + #endif diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index faac24bdefeb..74e23208b94c 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -28,6 +28,9 @@ #define MAX_VLS 16 +#define SIGNAL_INTERVAL_MS 25 +#define LOG_INTERVALS (1000 / SIGNAL_INTERVAL_MS) + struct child_data { char *name, *output; pid_t pid; @@ -79,7 +82,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(pipefd[1], 1); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -89,7 +92,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(startup_pipe[0], 3); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -107,16 +110,15 @@ static void child_start(struct child_data *child, const char *program) */ ret = read(3, &i, sizeof(i)); if (ret < 0) - fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", - strerror(errno), errno); + printf("read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); if (ret > 0) - fprintf(stderr, "%d bytes of data on startup pipe\n", - ret); + printf("%d bytes of data on startup pipe\n", ret); close(3); ret = execl(program, program, NULL); - fprintf(stderr, "execl(%s) failed: %d (%s)\n", - program, errno, strerror(errno)); + printf("execl(%s) failed: %d (%s)\n", + program, errno, strerror(errno)); exit(EXIT_FAILURE); } else { @@ -221,7 +223,7 @@ static void child_output(struct child_data *child, uint32_t events, static void child_tickle(struct child_data *child) { if (child->output_seen && !child->exited) - kill(child->pid, SIGUSR2); + kill(child->pid, SIGUSR1); } static void child_stop(struct child_data *child) @@ -449,7 +451,8 @@ static const struct option options[] = { int main(int argc, char **argv) { int ret; - int timeout = 10; + int timeout = 10 * (1000 / SIGNAL_INTERVAL_MS); + int poll_interval = 5000; int cpus, i, j, c; int sve_vl_count, sme_vl_count; bool all_children_started = false; @@ -505,7 +508,7 @@ int main(int argc, char **argv) have_sme2 ? "present" : "absent"); if (timeout > 0) - ksft_print_msg("Will run for %ds\n", timeout); + ksft_print_msg("Will run for %d\n", timeout); else ksft_print_msg("Will run until terminated\n"); @@ -578,14 +581,14 @@ int main(int argc, char **argv) break; /* - * Timeout is counted in seconds with no output, the - * tests print during startup then are silent when - * running so this should ensure they all ran enough - * to install the signal handler, this is especially - * useful in emulation where we will both be slow and - * likely to have a large set of VLs. + * Timeout is counted in poll intervals with no + * output, the tests print during startup then are + * silent when running so this should ensure they all + * ran enough to install the signal handler, this is + * especially useful in emulation where we will both + * be slow and likely to have a large set of VLs. */ - ret = epoll_wait(epoll_fd, evs, tests, 1000); + ret = epoll_wait(epoll_fd, evs, tests, poll_interval); if (ret < 0) { if (errno == EINTR) continue; @@ -623,10 +626,12 @@ int main(int argc, char **argv) } all_children_started = true; + poll_interval = SIGNAL_INTERVAL_MS; } - ksft_print_msg("Sending signals, timeout remaining: %d\n", - timeout); + if ((timeout % LOG_INTERVALS) == 0) + ksft_print_msg("Sending signals, timeout remaining: %d\n", + timeout); for (i = 0; i < num_children; i++) child_tickle(&children[i]); @@ -651,7 +656,5 @@ int main(int argc, char **argv) drain_output(true); - ksft_print_cnts(); - - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 8b960d01ed2e..f89d67894c2e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -134,8 +134,7 @@ function check_vreg b memcmp endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -143,7 +142,6 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random V-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #7 movi v9.16b, #9 movi v31.8b, #31 @@ -215,6 +213,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index e8da3b4cbd23..859345379044 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -267,6 +267,10 @@ int main(void) strerror(errno), errno); sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR1, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR1 handler: %s (%d)\n", + strerror(errno), errno); ret = sigaction(SIGUSR2, &sa, NULL); if (ret < 0) printf("Failed to install SIGUSR2 handler: %s (%d)\n", diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h index 9292bba5400b..85b9184e0835 100644 --- a/tools/testing/selftests/arm64/fp/sme-inst.h +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -5,6 +5,8 @@ #ifndef SME_INST_H #define SME_INST_H +#define REG_FPMR S3_3_C4_C4_2 + /* * RDSVL X\nx, #\imm */ diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 6d61992fe8a0..577b6e05e860 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -82,10 +82,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -340,7 +342,7 @@ static void ptrace_set_sve_get_sve_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -441,7 +443,7 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -545,7 +547,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, read_sve = read_buf; if (read_sve->vl != vl) { - ksft_test_result_fail("Child VL != expected VL %d\n", + ksft_test_result_fail("Child VL != expected VL: %u != %u\n", read_sve->vl, vl); goto out; } @@ -555,7 +557,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_FPSIMD: expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } @@ -571,7 +573,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_SVE: expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index fff60e2a25ad..80e072f221cd 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -291,8 +291,7 @@ function check_ffr #endif endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -300,13 +299,12 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random Z-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #1 movi v9.16b, #2 movi v31.8b, #3 -#ifndef SSVE // And P0 - rdffr p0.b + ptrue p0.d +#ifndef SSVE // And FFR wrffr p15.b #endif @@ -378,6 +376,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // Irritation signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c index ac27d87396fc..08c777f87ea2 100644 --- a/tools/testing/selftests/arm64/fp/za-ptrace.c +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -48,10 +48,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -201,7 +203,7 @@ static void ptrace_set_get_data(pid_t child, unsigned int vl) data_size = ZA_PT_SIZE(vq); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for VL %u\n", data_size, vl); return; } diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 095b45531640..9c33e13e9dc4 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -148,21 +148,16 @@ function check_za b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZA data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZA to all bits 0 + smstop + smstart_za ret endfunction @@ -231,6 +226,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c index 996d9614a131..584b8d59b7ea 100644 --- a/tools/testing/selftests/arm64/fp/zt-ptrace.c +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -43,10 +43,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -231,7 +233,7 @@ static void ptrace_enable_za_via_zt(pid_t child) /* Should have register data */ if (za_out->size < ZA_PT_SIZE(vq)) { ksft_print_msg("ZA data less than expected: %u < %u\n", - za_out->size, ZA_PT_SIZE(vq)); + za_out->size, (unsigned int)ZA_PT_SIZE(vq)); fail = true; vq = 0; } diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index b5c81e81a379..38080f3c3280 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -117,21 +117,16 @@ function check_zt b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZT data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZT to all bits 0 + smstop + smstart_za ret endfunction @@ -200,6 +195,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore new file mode 100644 index 000000000000..bbb8e40a7e52 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/.gitignore @@ -0,0 +1,7 @@ +basic-gcs +libc-gcs +gcs-locking +gcs-stress +gcs-stress-thread +gcspushm +gcsstr diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile new file mode 100644 index 000000000000..d2f3497a9103 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2023 ARM Limited +# +# In order to avoid interaction with the toolchain and dynamic linker the +# portions of these tests that interact with the GCS are implemented using +# nolibc. +# + +TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress gcspushm gcsstr +TEST_GEN_PROGS_EXTENDED := gcs-stress-thread + +LDLIBS+=-lpthread + +include ../../lib.mk + +$(OUTPUT)/basic-gcs: basic-gcs.c + $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -static -include ../../../../include/nolibc/nolibc.h \ + -I../../../../../usr/include \ + -std=gnu99 -I../.. -g \ + -ffreestanding -Wall $^ -o $@ -lgcc + +$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcspushm: gcspushm.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcsstr: gcsstr.S + $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/gcs/asm-offsets.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c new file mode 100644 index 000000000000..3fb9742342a3 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#include +#include + +#include + +#include +#include +#include + +#include "kselftest.h" +#include "gcs-util.h" + +/* nolibc doesn't have sysconf(), just hard code the maximum */ +static size_t page_size = 65536; + +static __attribute__((noinline)) void valid_gcs_function(void) +{ + /* Do something the compiler can't optimise out */ + my_syscall1(__NR_prctl, PR_SVE_GET_VL); +} + +static inline int gcs_set_status(unsigned long mode) +{ + bool enabling = mode & PR_SHADOW_STACK_ENABLE; + int ret; + unsigned long new_mode; + + /* + * The prctl takes 1 argument but we need to ensure that the + * other 3 values passed in registers to the syscall are zero + * since the kernel validates them. + */ + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, + 0, 0, 0); + + if (ret == 0) { + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &new_mode, 0, 0, 0); + if (ret == 0) { + if (new_mode != mode) { + ksft_print_msg("Mode set to %lx not %lx\n", + new_mode, mode); + ret = -EINVAL; + } + } else { + ksft_print_msg("Failed to validate mode: %d\n", ret); + } + + if (enabling != chkfeat_gcs()) { + ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n", + enabling ? "" : "not ", + chkfeat_gcs() ? "" : "not "); + ret = -EINVAL; + } + } + + return ret; +} + +/* Try to read the status */ +static bool read_status(void) +{ + unsigned long state; + int ret; + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &state, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("Failed to read state: %d\n", ret); + return false; + } + + return state & PR_SHADOW_STACK_ENABLE; +} + +/* Just a straight enable */ +static bool base_enable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret); + return false; + } + + return true; +} + +/* Check we can read GCSPR_EL0 when GCS is enabled */ +static bool read_gcspr_el0(void) +{ + unsigned long *gcspr_el0; + + ksft_print_msg("GET GCSPR\n"); + gcspr_el0 = get_gcspr(); + ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0); + + return true; +} + +/* Also allow writes to stack */ +static bool enable_writeable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Also allow writes to stack */ +static bool enable_push_pop(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Enable GCS and allow everything */ +static bool enable_all(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH | + PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +static bool enable_invalid(void) +{ + int ret = gcs_set_status(ULONG_MAX); + if (ret == 0) { + ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX); + return false; + } + + return true; +} + +/* Map a GCS */ +static bool map_guarded_stack(void) +{ + int ret; + uint64_t *buf; + uint64_t expected_cap; + int elem; + bool pass = true; + + buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size, + SHADOW_STACK_SET_MARKER | + SHADOW_STACK_SET_TOKEN); + if (buf == MAP_FAILED) { + ksft_print_msg("Failed to map %lu byte GCS: %d\n", + page_size, errno); + return false; + } + ksft_print_msg("Mapped GCS at %p-%p\n", buf, + (void *)((uint64_t)buf + page_size)); + + /* The top of the newly allocated region should be 0 */ + elem = (page_size / sizeof(uint64_t)) - 1; + if (buf[elem]) { + ksft_print_msg("Last entry is 0x%llx not 0x0\n", buf[elem]); + pass = false; + } + + /* Then a valid cap token */ + elem--; + expected_cap = ((uint64_t)buf + page_size - 16); + expected_cap &= GCS_CAP_ADDR_MASK; + expected_cap |= GCS_CAP_VALID_TOKEN; + if (buf[elem] != expected_cap) { + ksft_print_msg("Cap entry is 0x%llx not 0x%llx\n", + buf[elem], expected_cap); + pass = false; + } + ksft_print_msg("cap token is 0x%llx\n", buf[elem]); + + /* The rest should be zeros */ + for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) { + if (!buf[elem]) + continue; + ksft_print_msg("GCS slot %d is 0x%llx not 0x0\n", + elem, buf[elem]); + pass = false; + } + + ret = munmap(buf, page_size); + if (ret != 0) { + ksft_print_msg("Failed to unmap %ld byte GCS: %d\n", + page_size, errno); + pass = false; + } + + return pass; +} + +/* A fork()ed process can run */ +static bool test_fork(void) +{ + unsigned long child_mode; + int ret, status; + pid_t pid; + bool pass = true; + + pid = fork(); + if (pid == -1) { + ksft_print_msg("fork() failed: %d\n", errno); + pass = false; + goto out; + } + if (pid == 0) { + /* In child, make sure we can call a function, read + * the GCS pointer and status and then exit */ + valid_gcs_function(); + get_gcspr(); + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &child_mode, 0, 0, 0); + if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in child\n"); + ret = -EINVAL; + } + + exit(ret); + } + + /* + * In parent, check we can still do function calls then block + * for the child. + */ + valid_gcs_function(); + + ksft_print_msg("Waiting for child %d\n", pid); + + ret = waitpid(pid, &status, 0); + if (ret == -1) { + ksft_print_msg("Failed to wait for child: %d\n", + errno); + return false; + } + + if (!WIFEXITED(status)) { + ksft_print_msg("Child exited due to signal %d\n", + WTERMSIG(status)); + pass = false; + } else { + if (WEXITSTATUS(status)) { + ksft_print_msg("Child exited with status %d\n", + WEXITSTATUS(status)); + pass = false; + } + } + +out: + + return pass; +} + +typedef bool (*gcs_test)(void); + +static struct { + char *name; + gcs_test test; + bool needs_enable; +} tests[] = { + { "read_status", read_status }, + { "base_enable", base_enable, true }, + { "read_gcspr_el0", read_gcspr_el0 }, + { "enable_writeable", enable_writeable, true }, + { "enable_push_pop", enable_push_pop, true }, + { "enable_all", enable_all, true }, + { "enable_invalid", enable_invalid, true }, + { "map_guarded_stack", map_guarded_stack }, + { "fork", test_fork }, +}; + +int main(void) +{ + int i, ret; + unsigned long gcs_mode; + + ksft_print_header(); + + /* + * We don't have getauxval() with nolibc so treat a failure to + * read GCS state as a lack of support and skip. + */ + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_skip("Failed to read GCS state: %d\n", ret); + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret); + } + + ksft_set_plan(ARRAY_SIZE(tests)); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name); + } + + /* One last test: disable GCS, we can do this one time */ + my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + if (ret != 0) + ksft_print_msg("Failed to disable GCS: %d\n", ret); + + ksft_finished(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c new file mode 100644 index 000000000000..989f75a491b7 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + * + * Tests for GCS mode locking. These tests rely on both having GCS + * unconfigured on entry and on the kselftest harness running each + * test in a fork()ed process which will have it's own mode. + */ + +#include + +#include +#include + +#include + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* No mode bits are rejected for locking */ +TEST(lock_all_modes) +{ + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0); + ASSERT_EQ(ret, 0); +} + +FIXTURE(valid_modes) +{ +}; + +FIXTURE_VARIANT(valid_modes) +{ + unsigned long mode; +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable) +{ + .mode = PR_SHADOW_STACK_ENABLE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | + PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_SETUP(valid_modes) +{ +} + +FIXTURE_TEARDOWN(valid_modes) +{ +} + +/* We can set the mode at all */ +TEST_F(valid_modes, set) +{ + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + _exit(0); +} + +/* Enabling, locking then disabling is rejected */ +TEST_F(valid_modes, enable_lock_disable) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0); + ASSERT_EQ(ret, -EBUSY); + + _exit(0); +} + +/* Locking then enabling is rejected */ +TEST_F(valid_modes, lock_enable) +{ + unsigned long mode; + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, -EBUSY); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, 0); + + _exit(0); +} + +/* Locking then changing other modes is fine */ +TEST_F(valid_modes, lock_enable_disable_others) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + PR_SHADOW_STACK_ALL_MODES); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); + + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + _exit(0); +} + +int main(int argc, char **argv) +{ + unsigned long mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (mode & PR_SHADOW_STACK_ENABLE) { + ksft_print_msg("GCS was enabled, test unsupported\n"); + return KSFT_SKIP; + } + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S new file mode 100644 index 000000000000..b88b25217da5 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S @@ -0,0 +1,311 @@ +// Program that loops for ever doing lots of recursions and system calls, +// intended to be used as part of a stress test for GCS context switching. +// +// Copyright 2015-2023 Arm Ltd + +#include + +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 + +#define si_code 8 + +#define SIGINT 2 +#define SIGABRT 6 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGTERM 15 +#define SEGV_CPERR 10 + +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 + +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b abort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + + +function tickle_handler + // Perhaps collect GCSPR_EL0 here in future? + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error\n" + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +function segv_handler + // stash the siginfo_t * + mov x20, x1 + + // Disable GCS, we don't want additional faults logging things + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + + puts "Got SIGSEGV code " + + ldr x21, [x20, #si_code] + mov x0, x21 + bl putdec + + // GCS faults should have si_code SEGV_CPERR + cmp x21, #SEGV_CPERR + bne 1f + + puts " (GCS violation)" +1: + mov x0, '\n' + bl putc + b abort +endfunction + +// Recurse x20 times +.macro recurse id +function recurse\id + stp x29, x30, [sp, #-16]! + mov x29, sp + + cmp x20, 0 + beq 1f + sub x20, x20, 1 + bl recurse\id + +1: + ldp x29, x30, [sp], #16 + + // Do a syscall immediately prior to returning to try to provoke + // scheduling and migration at a point where coherency issues + // might trigger. + mov x8, #__NR_getpid + svc #0 + + ret +endfunction +.endm + +// Generate and use two copies so we're changing the GCS contents +recurse 1 +recurse 2 + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS\n" + b abort +1: + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGSEGV + adr x1, segv_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + puts "Running\n" + +loop: + // Small recursion depth so we're frequently flipping between + // the two recursors and changing what's on the stack + mov x20, #5 + bl recurse1 + mov x20, #5 + bl recurse2 + b loop +endfunction + +abort: + mov x0, #255 + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c new file mode 100644 index 000000000000..bbc7f4950c13 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022-3 ARM Limited. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 199309L + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +struct child_data { + char *name, *output; + pid_t pid; + int stdout; + bool output_seen; + bool exited; + int exit_status; + int exit_signal; +}; + +static int epoll_fd; +static struct child_data *children; +static struct epoll_event *evs; +static int tests; +static int num_children; +static bool terminate; + +static int startup_pipe[2]; + +static int num_processors(void) +{ + long nproc = sysconf(_SC_NPROCESSORS_CONF); + if (nproc < 0) { + perror("Unable to read number of processors\n"); + exit(EXIT_FAILURE); + } + + return nproc; +} + +static void start_thread(struct child_data *child, int id) +{ + int ret, pipefd[2], i; + struct epoll_event ev; + + ret = pipe(pipefd); + if (ret != 0) + ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", + strerror(errno), errno); + + child->pid = fork(); + if (child->pid == -1) + ksft_exit_fail_msg("fork() failed: %s (%d)\n", + strerror(errno), errno); + + if (!child->pid) { + /* + * In child, replace stdout with the pipe, errors to + * stderr from here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Duplicate the read side of the startup pipe to + * FD 3 so we can close everything else. + */ + ret = dup2(startup_pipe[0], 3); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Very dumb mechanism to clean open FDs other than + * stdio. We don't want O_CLOEXEC for the pipes... + */ + for (i = 4; i < 8192; i++) + close(i); + + /* + * Read from the startup pipe, there should be no data + * and we should block until it is closed. We just + * carry on on error since this isn't super critical. + */ + ret = read(3, &i, sizeof(i)); + if (ret < 0) + fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); + if (ret > 0) + fprintf(stderr, "%d bytes of data on startup pipe\n", + ret); + close(3); + + ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL); + fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n", + errno, strerror(errno)); + + exit(EXIT_FAILURE); + } else { + /* + * In parent, remember the child and close our copy of the + * write side of stdout. + */ + close(pipefd[1]); + child->stdout = pipefd[0]; + child->output = NULL; + child->exited = false; + child->output_seen = false; + + ev.events = EPOLLIN | EPOLLHUP; + ev.data.ptr = child; + + ret = asprintf(&child->name, "Thread-%d", id); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); + if (ret < 0) { + ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", + child->name, strerror(errno), errno); + } + } + + ksft_print_msg("Started %s\n", child->name); + num_children++; +} + +static bool child_output_read(struct child_data *child) +{ + char read_data[1024]; + char work[1024]; + int ret, len, cur_work, cur_read; + + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + if (errno == EINTR) + return true; + + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), + errno); + return false; + } + len = ret; + + child->output_seen = true; + + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } + + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; + + if (work[cur_work] == '\n') { + work[cur_work] = '\0'; + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; + } + } + + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + + return false; +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + bool read_more; + + if (events & EPOLLIN) { + do { + read_more = child_output_read(child); + } while (read_more); + } + + if (events & EPOLLHUP) { + close(child->stdout); + child->stdout = -1; + flush = true; + } + + if (flush && child->output) { + ksft_print_msg("%s: %s\n", child->name, child->output); + free(child->output); + child->output = NULL; + } +} + +static void child_tickle(struct child_data *child) +{ + if (child->output_seen && !child->exited) + kill(child->pid, SIGUSR1); +} + +static void child_stop(struct child_data *child) +{ + if (!child->exited) + kill(child->pid, SIGTERM); +} + +static void child_cleanup(struct child_data *child) +{ + pid_t ret; + int status; + bool fail = false; + + if (!child->exited) { + do { + ret = waitpid(child->pid, &status, 0); + if (ret == -1 && errno == EINTR) + continue; + + if (ret == -1) { + ksft_print_msg("waitpid(%d) failed: %s (%d)\n", + child->pid, strerror(errno), + errno); + fail = true; + break; + } + + if (WIFEXITED(status)) { + child->exit_status = WEXITSTATUS(status); + child->exited = true; + } + + if (WIFSIGNALED(status)) { + child->exit_signal = WTERMSIG(status); + ksft_print_msg("%s: Exited due to signal %d\n", + child->name, child->exit_signal); + fail = true; + child->exited = true; + } + } while (!child->exited); + } + + if (!child->output_seen) { + ksft_print_msg("%s no output seen\n", child->name); + fail = true; + } + + if (child->exit_status != 0) { + ksft_print_msg("%s exited with error code %d\n", + child->name, child->exit_status); + fail = true; + } + + ksft_test_result(!fail, "%s\n", child->name); +} + +static void handle_child_signal(int sig, siginfo_t *info, void *context) +{ + int i; + bool found = false; + + for (i = 0; i < num_children; i++) { + if (children[i].pid == info->si_pid) { + children[i].exited = true; + children[i].exit_status = info->si_status; + found = true; + break; + } + } + + if (!found) + ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", + info->si_pid, info->si_status); +} + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + int i; + + /* If we're already exiting then don't signal again */ + if (terminate) + return; + + ksft_print_msg("Got signal, exiting...\n"); + + terminate = true; + + /* + * This should be redundant, the main loop should clean up + * after us, but for safety stop everything we can here. + */ + for (i = 0; i < num_children; i++) + child_stop(&children[i]); +} + +/* Handle any pending output without blocking */ +static void drain_output(bool flush) +{ + int ret = 1; + int i; + + while (ret > 0) { + ret = epoll_wait(epoll_fd, evs, tests, 0); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_print_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + for (i = 0; i < ret; i++) + child_output(evs[i].data.ptr, evs[i].events, flush); + } +} + +static const struct option options[] = { + { "timeout", required_argument, NULL, 't' }, + { } +}; + +int main(int argc, char **argv) +{ + int seen_children; + bool all_children_started = false; + int gcs_threads; + int timeout = 10; + int ret, cpus, i, c; + struct sigaction sa; + + while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { + switch (c) { + case 't': + ret = sscanf(optarg, "%d", &timeout); + if (ret != 1) + ksft_exit_fail_msg("Failed to parse timeout %s\n", + optarg); + break; + default: + ksft_exit_fail_msg("Unknown argument\n"); + } + } + + cpus = num_processors(); + tests = 0; + + if (getauxval(AT_HWCAP) & HWCAP_GCS) { + /* One extra thread, trying to trigger migrations */ + gcs_threads = cpus + 1; + tests += gcs_threads; + } else { + gcs_threads = 0; + } + + ksft_print_header(); + ksft_set_plan(tests); + + ksft_print_msg("%d CPUs, %d GCS threads\n", + cpus, gcs_threads); + + if (!tests) + ksft_exit_skip("No tests scheduled\n"); + + if (timeout > 0) + ksft_print_msg("Will run for %ds\n", timeout); + else + ksft_print_msg("Will run until terminated\n"); + + children = calloc(sizeof(*children), tests); + if (!children) + ksft_exit_fail_msg("Unable to allocate child data\n"); + + ret = epoll_create1(EPOLL_CLOEXEC); + if (ret < 0) + ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", + strerror(errno), ret); + epoll_fd = ret; + + /* Create a pipe which children will block on before execing */ + ret = pipe(startup_pipe); + if (ret != 0) + ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", + strerror(errno), errno); + + /* Get signal handers ready before we start any children */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGINT, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", + strerror(errno), errno); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + sa.sa_sigaction = handle_child_signal; + ret = sigaction(SIGCHLD, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", + strerror(errno), errno); + + evs = calloc(tests, sizeof(*evs)); + if (!evs) + ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + tests); + + for (i = 0; i < gcs_threads; i++) + start_thread(&children[i], i); + + /* + * All children started, close the startup pipe and let them + * run. + */ + close(startup_pipe[0]); + close(startup_pipe[1]); + + timeout *= 10; + for (;;) { + /* Did we get a signal asking us to exit? */ + if (terminate) + break; + + /* + * Timeout is counted in 100ms with no output, the + * tests print during startup then are silent when + * running so this should ensure they all ran enough + * to install the signal handler, this is especially + * useful in emulation where we will both be slow and + * likely to have a large set of VLs. + */ + ret = epoll_wait(epoll_fd, evs, tests, 100); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + /* Output? */ + if (ret > 0) { + for (i = 0; i < ret; i++) { + child_output(evs[i].data.ptr, evs[i].events, + false); + } + continue; + } + + /* Otherwise epoll_wait() timed out */ + + /* + * If the child processes have not produced output they + * aren't actually running the tests yet. + */ + if (!all_children_started) { + seen_children = 0; + + for (i = 0; i < num_children; i++) + if (children[i].output_seen || + children[i].exited) + seen_children++; + + if (seen_children != num_children) { + ksft_print_msg("Waiting for %d children\n", + num_children - seen_children); + continue; + } + + all_children_started = true; + } + + ksft_print_msg("Sending signals, timeout remaining: %d00ms\n", + timeout); + + for (i = 0; i < num_children; i++) + child_tickle(&children[i]); + + /* Negative timeout means run indefinitely */ + if (timeout < 0) + continue; + if (--timeout == 0) + break; + } + + ksft_print_msg("Finishing up...\n"); + terminate = true; + + for (i = 0; i < tests; i++) + child_stop(&children[i]); + + drain_output(false); + + for (i = 0; i < tests; i++) + child_cleanup(&children[i]); + + drain_output(true); + + ksft_finished(); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h new file mode 100644 index 000000000000..c99a6b39ac14 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-util.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Limited. + */ + +#ifndef GCS_UTIL_H +#define GCS_UTIL_H + +#include + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +#ifndef NT_ARM_GCS +#define NT_ARM_GCS 0x410 + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; +#endif + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define PR_SHADOW_STACK_ALL_MODES \ + PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */ + +#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL) +#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL) +#define GCS_CAP_VALID_TOKEN 1 +#define GCS_CAP_IN_PROGRESS_TOKEN 5 + +#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) + +static inline unsigned long *get_gcspr(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); + + return gcspr; +} + +static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline unsigned long __attribute__((always_inline)) *gcsss2(void) +{ + unsigned long *Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +static inline bool chkfeat_gcs(void) +{ + register long val __asm__ ("x16") = 1; + + /* CHKFEAT x16 */ + asm volatile( + "hint #0x28\n" + : "=r" (val) + : "r" (val)); + + return val != 1; +} + +#endif diff --git a/tools/testing/selftests/arm64/gcs/gcspushm.S b/tools/testing/selftests/arm64/gcs/gcspushm.S new file mode 100644 index 000000000000..bbe17c1325ac --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcspushm.S @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS push permissions then use them + +#include + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with push permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + sys #3, c7, c7, #0, x0 // GCSPUSHM + sysl x0, #3, c7, c7, #1 // GCSPOPM + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcsstr.S b/tools/testing/selftests/arm64/gcs/gcsstr.S new file mode 100644 index 000000000000..a42bba6e30b1 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcsstr.S @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS write permissions then use them + +#include + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with write permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + mrs x0, GCSPR_EL0 + sub x0, x0, #8 + .inst 0xd91f1c01 // GCSSTR x1, x0 + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c new file mode 100644 index 000000000000..17b2fabfec38 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#define _GNU_SOURCE + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +static noinline void gcs_recurse(int depth) +{ + if (depth) + gcs_recurse(depth - 1); + + /* Prevent tail call optimization so we actually recurse */ + asm volatile("dsb sy" : : : "memory"); +} + +/* Smoke test that a function call and return works*/ +TEST(can_call_function) +{ + gcs_recurse(0); +} + +static void *gcs_test_thread(void *arg) +{ + int ret; + unsigned long mode; + + /* + * Some libcs don't seem to fill unused arguments with 0 but + * the kernel validates this so we supply all 5 arguments. + */ + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret); + return NULL; + } + + if (!(mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in thread, mode is %lu\n", + mode); + return NULL; + } + + /* Just in case... */ + gcs_recurse(0); + + /* Use a non-NULL value to indicate a pass */ + return &gcs_test_thread; +} + +/* Verify that if we start a new thread it has GCS enabled */ +TEST(gcs_enabled_thread) +{ + pthread_t thread; + void *thread_ret; + int ret; + + ret = pthread_create(&thread, NULL, gcs_test_thread, NULL); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ret = pthread_join(thread, &thread_ret); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ASSERT_TRUE(thread_ret != NULL); +} + +/* Read the GCS until we find the terminator */ +TEST(gcs_find_terminator) +{ + unsigned long *gcs, *cur; + + gcs = get_gcspr(); + cur = gcs; + while (*cur) + cur++; + + ksft_print_msg("GCS in use from %p-%p\n", gcs, cur); + + /* + * We should have at least whatever called into this test so + * the two pointer should differ. + */ + ASSERT_TRUE(gcs != cur); +} + +/* + * We can access a GCS via ptrace + * + * This could usefully have a fixture but note that each test is + * fork()ed into a new child whcih causes issues. Might be better to + * lift at least some of this out into a separate, non-harness, test + * program. + */ +TEST(ptrace_read_write) +{ + pid_t child, pid; + int ret, status; + siginfo_t si; + uint64_t val, rval, gcspr; + struct user_gcs child_gcs; + struct iovec iov, local_iov, remote_iov; + + child = fork(); + if (child == -1) { + ksft_print_msg("fork() failed: %d (%s)\n", + errno, strerror(errno)); + ASSERT_NE(child, -1); + } + + if (child == 0) { + /* + * In child, make sure there's something on the stack and + * ask to be traced. + */ + gcs_recurse(0); + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME %s", + strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP) %s", + strerror(errno)); + + return; + } + + ksft_print_msg("Child: %d\n", child); + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + ksft_print_msg("wait() failed: %s", + strerror(errno)); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_print_msg("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno)); + goto error; + } + } + + /* Where is the child GCS? */ + iov.iov_base = &child_gcs; + iov.iov_len = sizeof(child_gcs); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read child GCS state: %s (%d)\n", + strerror(errno), errno); + goto error; + } + + /* We should have inherited GCS over fork(), confirm */ + if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) { + ASSERT_TRUE(child_gcs.features_enabled & + PR_SHADOW_STACK_ENABLE); + goto error; + } + + gcspr = child_gcs.gcspr_el0; + ksft_print_msg("Child GCSPR 0x%lx, flags %llx, locked %llx\n", + gcspr, child_gcs.features_enabled, + child_gcs.features_locked); + + /* Ideally we'd cross check with the child memory map */ + + errno = 0; + val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL); + ret = errno; + if (ret != 0) + ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n", + strerror(ret), ret); + EXPECT_EQ(ret, 0); + + /* The child should be in a function, the GCSPR shouldn't be 0 */ + EXPECT_NE(val, 0); + + /* Same thing via process_vm_readv() */ + local_iov.iov_base = &rval; + local_iov.iov_len = sizeof(rval); + remote_iov.iov_base = (void *)gcspr; + remote_iov.iov_len = sizeof(rval); + ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0); + if (ret == -1) + ksft_print_msg("process_vm_readv() failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, sizeof(rval)); + EXPECT_EQ(val, rval); + + /* Write data via a peek */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* Restore what we had before */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* That's all, folks */ + kill(child, SIGKILL); + return; + +error: + kill(child, SIGKILL); + ASSERT_FALSE(true); +} + +FIXTURE(map_gcs) +{ + unsigned long *stack; +}; + +FIXTURE_VARIANT(map_gcs) +{ + size_t stack_size; + unsigned long flags; +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k) +{ + .stack_size = 2 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s3k_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k) +{ + .stack_size = 4 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k) +{ + .stack_size = 16 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k) +{ + .stack_size = 64 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k) +{ + .stack_size = 128 * 1024, + .flags = 0, +}; + +FIXTURE_SETUP(map_gcs) +{ + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, + variant->flags); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + variant->stack_size); +} + +FIXTURE_TEARDOWN(map_gcs) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, variant->stack_size); + ASSERT_EQ(ret, 0); + } +} + +/* The stack has a cap token */ +TEST_F(map_gcs, stack_capped) +{ + unsigned long *stack = self->stack; + size_t cap_index; + + cap_index = (variant->stack_size / sizeof(unsigned long)); + + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + + ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index])); +} + +/* The top of the stack is 0 */ +TEST_F(map_gcs, stack_terminated) +{ + unsigned long *stack = self->stack; + size_t term_index; + + if (!(variant->flags & SHADOW_STACK_SET_MARKER)) + return; + + term_index = (variant->stack_size / sizeof(unsigned long)) - 1; + + ASSERT_EQ(stack[term_index], 0); +} + +/* Writes should fault */ +TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV) +{ + self->stack[0] = 0; +} + +/* Put it all together, we can safely switch to and from the stack */ +TEST_F(map_gcs, stack_switch) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + get_gcspr(), orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* We should be able to use all but 2 slots of the new stack */ + ksft_print_msg("Recursing %zu levels\n", cap_index - 1); + gcs_recurse(cap_index - 1); + + /* Pivot back to the original GCS */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + + gcs_recurse(0); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +/* We fault if we try to go beyond the end of the stack */ +TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test but we need to SEGV to avoid a false fail */ + orig_gcspr_el0 = get_gcspr(); + *orig_gcspr_el0 = 0; + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* Now try to recurse, we should fault doing this. */ + ksft_print_msg("Recursing %zu levels...\n", cap_index + 1); + gcs_recurse(cap_index + 1); + ksft_print_msg("...done\n"); + + /* Clean up properly to try to guard against spurious passes. */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +FIXTURE(map_invalid_gcs) +{ +}; + +FIXTURE_VARIANT(map_invalid_gcs) +{ + size_t stack_size; +}; + +FIXTURE_SETUP(map_invalid_gcs) +{ +} + +FIXTURE_TEARDOWN(map_invalid_gcs) +{ +} + +/* GCS must be larger than 16 bytes */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small) +{ + .stack_size = 8, +}; + +/* GCS size must be 16 byte aligned */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 }; + +TEST_F(map_invalid_gcs, do_map) +{ + void *stack; + + stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, 0); + ASSERT_TRUE(stack == MAP_FAILED); + if (stack != MAP_FAILED) + munmap(stack, variant->stack_size); +} + +FIXTURE(invalid_mprotect) +{ + unsigned long *stack; + size_t stack_size; +}; + +FIXTURE_VARIANT(invalid_mprotect) +{ + unsigned long flags; +}; + +FIXTURE_SETUP(invalid_mprotect) +{ + self->stack_size = sysconf(_SC_PAGE_SIZE); + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + self->stack_size, 0); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + self->stack_size); +} + +FIXTURE_TEARDOWN(invalid_mprotect) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, self->stack_size); + ASSERT_EQ(ret, 0); + } +} + +FIXTURE_VARIANT_ADD(invalid_mprotect, exec) +{ + .flags = PROT_EXEC, +}; + +TEST_F(invalid_mprotect, do_map) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, variant->flags); + ASSERT_EQ(ret, -1); +} + +TEST_F(invalid_mprotect, do_map_read) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, + variant->flags | PROT_READ); + ASSERT_EQ(ret, -1); +} + +int main(int argc, char **argv) +{ + unsigned long gcs_mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + /* + * Force shadow stacks on, our tests *should* be fine with or + * without libc support and with or without this having ended + * up tagged for GCS and enabled by the dynamic linker. We + * can't use the libc prctl() function since we can't return + * from enabling the stack. + */ + ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode); + if (ret) { + ksft_print_msg("Failed to configure GCS: %d\n", ret); + return EXIT_FAILURE; + } + } + + /* Avoid returning in case libc doesn't understand GCS */ + exit(test_harness_run(argc, argv)); +} diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 1dbbbd47dd50..2ee7f114d7fa 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -91,7 +91,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } @@ -189,7 +189,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c new file mode 100644 index 000000000000..303260a6dc65 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Ampere Computing LLC + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +static bool is_hugetlb_allocated(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return false; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugetlb: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + + if (hps > 0) + return true; + + return false; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void allocate_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 2); +} + +static void free_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 0); +} + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0; i < size; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag (%d) mismatch\n", i); + fault = 1; + goto check_child_tag_inheritance_err; + } + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)map_ptr, map_size); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)map_ptr, map_size); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, map_size, mode, tag_check); + mte_clear_tags((void *)ptr, map_size); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *map_ptr; + int prot_flag, result; + unsigned long map_size; + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_size = default_huge_page_size(); + map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(map_ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, + 0, 0); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(map_ptr, map_size, mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, 0, 0); + if (result != KSFT_PASS) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, map_size, mode); + mte_free_memory_tag_range((void *)ptr, map_size, mem_type, 0, 0); + if (result == KSFT_FAIL) + return result; + + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + allocate_hugetlb(); + + if (!is_hugetlb_allocated()) { + ksft_print_msg("ERR: Unable allocate hugetlb pages\n"); + return KSFT_FAIL; + } + + /* Set test plan */ + ksft_set_plan(12); + + mte_enable_pstate_tco(); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, no error mode, mmap memory and tag check off\n"); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + free_hugetlb(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index f139a33a43ef..4c89e9538ca0 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -85,7 +85,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", - (ret & PR_MTE_TCF_MASK), mask); + (ret & (int)PR_MTE_TCF_MASK), mask); ksft_test_result_fail("%s\n", name); } } diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 2b1425b92b69..a3d1e23fe02a 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -65,7 +65,7 @@ static int check_single_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%x\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAG(tag)); result = KSFT_FAIL; @@ -97,7 +97,7 @@ static int check_multiple_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%lx\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAGS(excl_mask)); result = KSFT_FAIL; diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 00ffd34c66d3..a1dc2fe5285b 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -38,7 +38,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) if (cur_mte_cxt.trig_si_code == si->si_code) cur_mte_cxt.fault_valid = true; else - ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n", + ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=%llx, fault addr=%lx\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr); return; @@ -64,7 +64,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) exit(1); } } else if (signum == SIGBUS) { - ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && @@ -100,7 +100,7 @@ void *mte_insert_tags(void *ptr, size_t size) int align_size; if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return NULL; } align_size = MT_ALIGN_UP(size); @@ -112,7 +112,7 @@ void *mte_insert_tags(void *ptr, size_t size) void mte_clear_tags(void *ptr, size_t size) { if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return; } size = MT_ALIGN_UP(size); @@ -150,13 +150,13 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, map_flag |= MAP_PRIVATE; ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); if (ptr == MAP_FAILED) { - ksft_print_msg("FAIL: mmap allocation\n"); + ksft_perror("mmap()"); return NULL; } if (mem_type == USE_MPROTECT) { if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + ksft_perror("mprotect(PROT_MTE)"); munmap(ptr, size); - ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); return NULL; } } @@ -190,13 +190,13 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, size - index) != size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); @@ -217,12 +217,12 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, map_size - index) != map_size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, @@ -319,10 +319,9 @@ int mte_default_setup(void) unsigned long en = 0; int ret; - if (!(hwcaps2 & HWCAP2_MTE)) { - ksft_print_msg("SKIP: MTE features unavailable\n"); - return KSFT_SKIP; - } + if (!(hwcaps2 & HWCAP2_MTE)) + ksft_exit_skip("MTE features unavailable\n"); + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -359,7 +358,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { - perror(filename); + ksft_perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 2d3e71724e55..a0017a303beb 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -77,13 +77,13 @@ static inline void evaluate_test(int err, const char *msg) { switch (err) { case KSFT_PASS: - ksft_test_result_pass(msg); + ksft_test_result_pass("%s", msg); break; case KSFT_FAIL: - ksft_test_result_fail(msg); + ksft_test_result_fail("%s", msg); break; case KSFT_SKIP: - ksft_test_result_skip(msg); + ksft_test_result_skip("%s", msg); break; default: ksft_test_result_error("Unknown return code %d from %s", diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 72e290b0b10c..b5a1c80e0ead 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -7,8 +7,14 @@ CC := $(CROSS_COMPILE)gcc endif CFLAGS += -mbranch-protection=pac-ret + +# All supported LLVMs have PAC, test for GCC +ifeq ($(LLVM),1) +pauth_cc_support := 1 +else # check if the compiler supports ARMv8.3 and branch protection with PAuth pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) +endif ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index b743daa772f5..6d21b2fc758d 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -13,7 +13,7 @@ #include "../../kselftest_harness.h" #include "helper.h" -#define PAC_COLLISION_ATTEMPTS 10 +#define PAC_COLLISION_ATTEMPTS 1000 /* * The kernel sets TBID by default. So bits 55 and above should remain * untouched no matter what. @@ -182,6 +182,9 @@ int exec_sign_all(struct signatures *signed_vals, size_t val) return -1; } + close(new_stdin[1]); + close(new_stdout[0]); + return 0; } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index b2f2bfd5c6aa..b257db665a35 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -3,6 +3,7 @@ mangle_* fake_sigreturn_* fpmr_* poe_* +gcs_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index edb3613513b8..1381039fb36f 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2019 ARM Limited # Additional include paths needed by kselftest.h and local headers -CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) PROGS := $(patsubst %.c,%,$(SRCS)) diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h index 50948ce471cc..ca133b93375f 100644 --- a/tools/testing/selftests/arm64/signal/sve_helpers.h +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -18,4 +18,17 @@ extern unsigned int nvls; int sve_fill_vls(bool use_sme, int min_vls); +static inline uint64_t get_svcr(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, S3_3_C4_C2_2\n" + : "=r"(val) + : + : "cc"); + + return val; +} + #endif diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index 00051b40d71e..1304c8ec0f2f 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -7,6 +7,10 @@ * Each test provides its own tde struct tdescr descriptor to link with * this wrapper. Framework provides common helpers. */ + +#include +#include + #include #include "test_signals.h" @@ -16,6 +20,16 @@ struct tdescr *current = &tde; int main(int argc, char *argv[]) { + /* + * Ensure GCS is at least enabled throughout the tests if + * supported, otherwise the inability to return from the + * function that enabled GCS makes it very inconvenient to set + * up test cases. The prctl() may fail if GCS was locked by + * libc setup code. + */ + if (getauxval(AT_HWCAP) & HWCAP_GCS) + gcs_set_state(PR_SHADOW_STACK_ENABLE); + ksft_print_msg("%s :: %s\n", current->name, current->descr); if (test_setup(current) && test_init(current)) { test_run(current); @@ -23,5 +37,6 @@ int main(int argc, char *argv[]) } test_result(current); - return current->result; + /* Do not return in case GCS was enabled */ + exit(current->result); } diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index 1e6273d81575..ee75a2c25ce7 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -35,6 +35,7 @@ enum { FSME_BIT, FSME_FA64_BIT, FSME2_BIT, + FGCS_BIT, FMAX_END }; @@ -43,6 +44,7 @@ enum { #define FEAT_SME (1UL << FSME_BIT) #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) #define FEAT_SME2 (1UL << FSME2_BIT) +#define FEAT_GCS (1UL << FGCS_BIT) /* * A descriptor used to describe and configure a test case. @@ -69,6 +71,10 @@ struct tdescr { * Zero when no signal is expected on success */ int sig_ok; + /* + * expected si_code for sig_ok, or 0 to not check + */ + int sig_ok_code; /* signum expected on unsupported CPU features. */ int sig_unsupp; /* a timeout in second for test completion */ diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 0dc948db3a4a..5d3621921cfe 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -30,6 +30,7 @@ static char const *const feats_names[FMAX_END] = { " SME ", " FA64 ", " SME2 ", + " GCS ", }; #define MAX_FEATS_SZ 128 @@ -142,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td, "current->token ZEROED...test is probably broken!\n"); abort(); } - /* - * Trying to narrow down the SEGV to the ones generated by Kernel itself - * via arm64_notify_segfault(). This is a best-effort check anyway, and - * the si_code check may need to change if this aspect of the kernel - * ABI changes. - */ - if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { - fprintf(stdout, - "si_code != SEGV_ACCERR...test is probably broken!\n"); - abort(); + if (td->sig_ok_code) { + if (si->si_code != td->sig_ok_code) { + fprintf(stdout, "si_code is %d not %d\n", + si->si_code, td->sig_ok_code); + abort(); + } + } else { + /* + * Trying to narrow down the SEGV to the ones + * generated by Kernel itself via + * arm64_notify_segfault(). This is a best-effort + * check anyway, and the si_code check may need to + * change if this aspect of the kernel ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } } td->pass = 1; /* @@ -329,6 +339,8 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SME_FA64; if (getauxval(AT_HWCAP2) & HWCAP2_SME2) td->feats_supported |= FEAT_SME2; + if (getauxval(AT_HWCAP) & HWCAP_GCS) + td->feats_supported |= FEAT_GCS; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 762c8fe9c54a..36fc12b3cd60 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -18,6 +19,44 @@ void test_cleanup(struct tdescr *td); int test_run(struct tdescr *td); void test_result(struct tdescr *td); +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +/* + * The prctl takes 1 argument but we need to ensure that the other + * values passed in registers to the syscall are zero since the kernel + * validates them. + */ +#define gcs_set_state(state) \ + ({ \ + register long _num __asm__ ("x8") = __NR_prctl; \ + register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \ + register long _arg2 __asm__ ("x1") = (long)(state); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ + }) + +static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void) +{ + uint64_t val; + + asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val)); + + return val; +} + static inline bool feats_ok(struct tdescr *td) { if (td->feats_incompatible & td->feats_supported) diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c new file mode 100644 index 000000000000..6228448b2ae7 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include +#include +#include + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +/* + * We should get this from asm/siginfo.h but the testsuite is being + * clever with redefining siginfo_t. + */ +#ifndef SEGV_CPERR +#define SEGV_CPERR 10 +#endif + +static inline void gcsss1(uint64_t Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static int gcs_op_fault_trigger(struct tdescr *td) +{ + /* + * The slot below our current GCS should be in a valid GCS but + * must not have a valid cap in it. + */ + gcsss1(get_gcspr_el0() - 8); + + return 0; +} + +static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + +struct tdescr tde = { + .name = "Invalid GCS operation", + .descr = "An invalid GCS operation generates the expected signal", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sig_ok_code = SEGV_CPERR, + .sanity_disabled = true, + .trigger = gcs_op_fault_trigger, + .run = gcs_op_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c new file mode 100644 index 000000000000..b405d82321da --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 64]; +} context; + +static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct gcs_context *gcs; + unsigned long expected, gcspr; + uint64_t *u64_val; + int ret; + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0); + if (ret != 0) { + fprintf(stderr, "Unable to query GCS status\n"); + return 1; + } + + /* We expect a cap to be added to the GCS in the signal frame */ + gcspr = get_gcspr_el0(); + gcspr -= 8; + fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr); + + if (!get_current_context(td, &context.uc, sizeof(context))) { + fprintf(stderr, "Failed getting context\n"); + return 1; + } + + /* Ensure that the signal restore token was consumed */ + u64_val = (uint64_t *)get_gcspr_el0() + 1; + if (*u64_val) { + fprintf(stderr, "GCS value at %p is %lx not 0\n", + u64_val, *u64_val); + return 1; + } + + fprintf(stderr, "Got context\n"); + + head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); + if (!head) { + fprintf(stderr, "No GCS context\n"); + return 1; + } + + gcs = (struct gcs_context *)head; + + /* Basic size validation is done in get_current_context() */ + + if (gcs->features_enabled != expected) { + fprintf(stderr, "Features enabled %llx but expected %lx\n", + gcs->features_enabled, expected); + return 1; + } + + if (gcs->gcspr != gcspr) { + fprintf(stderr, "Got GCSPR %llx but expected %lx\n", + gcs->gcspr, gcspr); + return 1; + } + + fprintf(stderr, "GCS context validated\n"); + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "GCS basics", + .descr = "Validate a GCS signal context", + .feats_required = FEAT_GCS, + .timeout = 3, + .run = gcs_regs, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c new file mode 100644 index 000000000000..faeabb18c4b2 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include +#include +#include + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +static uint64_t *gcs_page; + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +static bool alloc_gcs(struct tdescr *td) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, + page_size, 0); + if (gcs_page == MAP_FAILED) { + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", + page_size, errno); + return false; + } + + return true; +} + +static int gcs_write_fault_trigger(struct tdescr *td) +{ + /* Verify that the page is readable (ie, not completely unmapped) */ + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); + + /* A regular write should trigger a fault */ + gcs_page[0] = EINVAL; + + return 0; +} + +static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + + +struct tdescr tde = { + .name = "GCS write fault", + .descr = "Normal writes to a GCS segfault", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sanity_disabled = true, + .init = alloc_gcs, + .trigger = gcs_write_fault_trigger, + .run = gcs_write_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 6dbe48cf8b09..1dbca9afb13c 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -85,6 +85,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, fprintf(stderr, "Got expected size %u and VL %d\n", head->size, ssve->vl); + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e6daa94fcd2e..0c1a6b26afac 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -198,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) *err = "Bad size for fpmr_context"; new_flags |= FPMR_CTX; break; + case GCS_MAGIC: + if (flags & GCS_CTX) + *err = "Multiple GCS_MAGIC"; + if (head->size != sizeof(struct gcs_context)) + *err = "Bad size for gcs_context"; + new_flags |= GCS_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 9872b8912714..98b97efdda23 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -20,6 +20,7 @@ #define EXTRA_CTX (1 << 3) #define ZT_CTX (1 << 4) #define FPMR_CTX (1 << 5) +#define GCS_CTX (1 << 6) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index b9e13f27f1f9..badaead5326a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -91,6 +91,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, return 1; } + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..0f8c110e0805 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -105,12 +105,12 @@ endif ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif -else -ifneq (,$(filter $(ARCH),arm64 powerpc)) +else ifeq ($(ARCH),arm64) +TEST_GEN_FILES += protection_keys +TEST_GEN_FILES += pkey_sighandler_tests +else ifeq ($(ARCH),powerpc) TEST_GEN_FILES += protection_keys -endif - endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index 580e1b0bb38e..d9d2100eafc0 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -31,6 +31,7 @@ #define NR_RESERVED_PKEYS 1 /* pkey-0 */ #define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 #define PAGE_SIZE sysconf(_SC_PAGESIZE) @@ -126,7 +127,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) return 0; } -static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) { struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); struct poe_context *poe_ctx = diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 9ab6a3ee153b..f7cfe163b0ff 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -112,6 +112,13 @@ void record_pkey_malloc(void *ptr, long size, int prot); #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) #endif +/* + * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged. + */ +#ifndef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 +#endif + #ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 5f28e26a2511..ac91777c8917 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -34,6 +34,8 @@ #define PAGE_SIZE 4096 #define MB (1<<20) +#define PKEY_REG_ALLOW_NONE 0x55555555 + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index a8088b645ad6..c593a426341c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -11,6 +11,7 @@ */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ +#include #include #include #include @@ -59,12 +60,58 @@ long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) : "=a"(ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory"); +#elif defined __aarch64__ + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + register long x5 asm("x5") = a6; + register long x8 asm("x8") = n; + asm volatile ("svc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8) + : "memory"); + ret = x0; #else # error syscall_raw() not implemented #endif return ret; } +static inline long clone_raw(unsigned long flags, void *stack, + int *parent_tid, int *child_tid) +{ + long a1 = flags; + long a2 = (long)stack; + long a3 = (long)parent_tid; +#if defined(__x86_64__) || defined(__i386) + long a4 = (long)child_tid; + long a5 = 0; +#elif defined(__aarch64__) + long a4 = 0; + long a5 = (long)child_tid; +#else +# error clone_raw() not implemented +#endif + + return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0); +} + +/* + * Returns the most restrictive pkey register value that can be used by the + * tests. + */ +static inline u64 pkey_reg_restrictive_default(void) +{ + /* + * Disallow everything except execution on pkey 0, so that each caller + * doesn't need to enable it explicitly (the selftest code runs with + * its code mapped with pkey 0). + */ + return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS); +} + static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) { pthread_mutex_lock(&mutex); @@ -113,7 +160,7 @@ static void raise_sigusr2(void) static void *thread_segv_with_pkey0_disabled(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ *(int *) (0x1) = 1; @@ -123,7 +170,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) static void *thread_segv_pkuerr_stack(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* After we disable MPK 0, we can't access the stack to return */ return NULL; @@ -133,6 +180,7 @@ static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; int *bad = (int *)1; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -142,7 +190,9 @@ static void *thread_segv_maperr_ptr(void *ptr) syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 1 is enabled. */ - __write_pkey_reg(0x55555551); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Segfault */ *bad = 1; @@ -240,6 +290,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -257,7 +308,10 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) assert(stack != MAP_FAILED); /* Allow access to MPK 0 and MPK 1 */ - __write_pkey_reg(0x55555550); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ pkey = pkey_alloc(0, 0); @@ -272,14 +326,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) memset(&siginfo, 0, sizeof(siginfo)); /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; @@ -307,7 +360,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) static void test_pkru_preserved_after_sigusr1(void) { struct sigaction sa; - unsigned long pkru = 0x45454544; + u64 pkey_reg; + + /* Allow access to MPK 0 and an arbitrary set of keys */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED); sa.sa_flags = SA_SIGINFO; @@ -320,7 +379,7 @@ static void test_pkru_preserved_after_sigusr1(void) memset(&siginfo, 0, sizeof(siginfo)); - __write_pkey_reg(pkru); + __write_pkey_reg(pkey_reg); raise(SIGUSR1); @@ -330,7 +389,7 @@ static void test_pkru_preserved_after_sigusr1(void) pthread_mutex_unlock(&mutex); /* Ensure the pkru value is the same after returning from signal. */ - ksft_test_result(pkru == __read_pkey_reg() && + ksft_test_result(pkey_reg == __read_pkey_reg() && siginfo.si_signo == SIGUSR1, "%s\n", __func__); } @@ -347,6 +406,7 @@ static noinline void *thread_sigusr2_self(void *ptr) 'S', 'I', 'G', 'U', 'S', 'R', '2', '.', '.', '.', '\n', '\0'}; stack_t *stack = ptr; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -356,7 +416,9 @@ static noinline void *thread_sigusr2_self(void *ptr) syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 2 is enabled. */ - __write_pkey_reg(0x55555545); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); raise_sigusr2(); @@ -384,6 +446,7 @@ static void test_pkru_sigreturn(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_handler = SIG_DFL; sa.sa_flags = 0; @@ -418,7 +481,10 @@ static void test_pkru_sigreturn(void) * the current thread's stack is protected by the default MPK 0. Hence * both need to be enabled. */ - __write_pkey_reg(0x55555544); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ pkey = pkey_alloc(0, 0); @@ -431,14 +497,13 @@ static void test_pkru_sigreturn(void) sigstack.ss_size = STACK_SIZE; /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret;