diff mbox series

[v5,05/19] arm64: Detect if in a realm and set RIPAS RAM

Message ID 20240819131924.372366-6-steven.price@arm.com (mailing list archive)
State New
Headers show
Series arm64: Support for running as a guest in Arm CCA | expand

Commit Message

Steven Price Aug. 19, 2024, 1:19 p.m. UTC
From: Suzuki K Poulose <suzuki.poulose@arm.com>

Detect that the VM is a realm guest by the presence of the RSI
interface.

If in a realm then all memory needs to be marked as RIPAS RAM initially,
the loader may or may not have done this for us. To be sure iterate over
all RAM and mark it as such. Any failure is fatal as that implies the
RAM regions passed to Linux are incorrect - which would mean failing
later when attempting to access non-existent RAM.

Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
---
Changes since v4:
 * Minor tidy ups.
Changes since v3:
 * Provide safe/unsafe versions for converting memory to protected,
   using the safer version only for the early boot.
 * Use the new psci_early_test_conduit() function to avoid calling an
   SMC if EL3 is not present (or not configured to handle an SMC).
Changes since v2:
 * Use DECLARE_STATIC_KEY_FALSE rather than "extern struct
   static_key_false".
 * Rename set_memory_range() to rsi_set_memory_range().
 * Downgrade some BUG()s to WARN()s and handle the condition by
   propagating up the stack. Comment the remaining case that ends in a
   BUG() to explain why.
 * Rely on the return from rsi_request_version() rather than checking
   the version the RMM claims to support.
 * Rename the generic sounding arm64_setup_memory() to
   arm64_rsi_setup_memory() and move the call site to setup_arch().
---
 arch/arm64/include/asm/rsi.h | 65 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/Makefile   |  3 +-
 arch/arm64/kernel/rsi.c      | 78 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/setup.c    |  8 ++++
 4 files changed, 153 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/rsi.h
 create mode 100644 arch/arm64/kernel/rsi.c

Comments

Suzuki K Poulose Aug. 19, 2024, 2:04 p.m. UTC | #1
Hi Steven

On 19/08/2024 14:19, Steven Price wrote:
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Detect that the VM is a realm guest by the presence of the RSI
> interface.
> 
> If in a realm then all memory needs to be marked as RIPAS RAM initially,
> the loader may or may not have done this for us. To be sure iterate over
> all RAM and mark it as such. Any failure is fatal as that implies the
> RAM regions passed to Linux are incorrect - which would mean failing
> later when attempting to access non-existent RAM.
> 
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Co-developed-by: Steven Price <steven.price@arm.com>
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
> Changes since v4:
>   * Minor tidy ups.
> Changes since v3:
>   * Provide safe/unsafe versions for converting memory to protected,
>     using the safer version only for the early boot.
>   * Use the new psci_early_test_conduit() function to avoid calling an
>     SMC if EL3 is not present (or not configured to handle an SMC).
> Changes since v2:
>   * Use DECLARE_STATIC_KEY_FALSE rather than "extern struct
>     static_key_false".
>   * Rename set_memory_range() to rsi_set_memory_range().
>   * Downgrade some BUG()s to WARN()s and handle the condition by
>     propagating up the stack. Comment the remaining case that ends in a
>     BUG() to explain why.
>   * Rely on the return from rsi_request_version() rather than checking
>     the version the RMM claims to support.
>   * Rename the generic sounding arm64_setup_memory() to
>     arm64_rsi_setup_memory() and move the call site to setup_arch().
> ---
>   arch/arm64/include/asm/rsi.h | 65 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/Makefile   |  3 +-
>   arch/arm64/kernel/rsi.c      | 78 ++++++++++++++++++++++++++++++++++++
>   arch/arm64/kernel/setup.c    |  8 ++++
>   4 files changed, 153 insertions(+), 1 deletion(-)
>   create mode 100644 arch/arm64/include/asm/rsi.h
>   create mode 100644 arch/arm64/kernel/rsi.c
> 
> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
> new file mode 100644
> index 000000000000..2bc013badbc3
> --- /dev/null
> +++ b/arch/arm64/include/asm/rsi.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2024 ARM Ltd.
> + */
> +
> +#ifndef __ASM_RSI_H_
> +#define __ASM_RSI_H_
> +
> +#include <linux/jump_label.h>
> +#include <asm/rsi_cmds.h>
> +
> +DECLARE_STATIC_KEY_FALSE(rsi_present);
> +
> +void __init arm64_rsi_init(void);
> +void __init arm64_rsi_setup_memory(void);
> +static inline bool is_realm_world(void)
> +{
> +	return static_branch_unlikely(&rsi_present);
> +}
> +
> +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end,
> +				       enum ripas state, unsigned long flags)
> +{
> +	unsigned long ret;
> +	phys_addr_t top;
> +
> +	while (start != end) {
> +		ret = rsi_set_addr_range_state(start, end, state, flags, &top);
> +		if (WARN_ON(ret || top < start || top > end))
> +			return -EINVAL;
> +		start = top;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Convert the specified range to RAM. Do not use this if you rely on the
> + * contents of a page that may already be in RAM state.
> + */
> +static inline int rsi_set_memory_range_protected(phys_addr_t start,
> +						 phys_addr_t end)
> +{
> +	return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
> +				    RSI_CHANGE_DESTROYED);
> +}
> +
> +/*
> + * Convert the specified range to RAM. Do not convert any pages that may have
> + * been DESTROYED, without our permission.
> + */
> +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
> +						      phys_addr_t end)
> +{
> +	return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
> +				    RSI_NO_CHANGE_DESTROYED);
> +}
> +
> +static inline int rsi_set_memory_range_shared(phys_addr_t start,
> +					      phys_addr_t end)
> +{
> +	return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
> +				    RSI_NO_CHANGE_DESTROYED);

I think this should be RSI_CHANGE_DESTROYED, as we are transitioning a 
page to "shared" (i.e, IPA state to EMPTY) and we do not expect the data
to be retained over the transition. Thus we do not care if the IPA was
in RIPAS_DESTROYED.

Rest looks good to me.


Suzuki
Steven Price Aug. 19, 2024, 2:10 p.m. UTC | #2
On 19/08/2024 15:04, Suzuki K Poulose wrote:
> Hi Steven
> 
> On 19/08/2024 14:19, Steven Price wrote:
>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>>
>> Detect that the VM is a realm guest by the presence of the RSI
>> interface.
>>
>> If in a realm then all memory needs to be marked as RIPAS RAM initially,
>> the loader may or may not have done this for us. To be sure iterate over
>> all RAM and mark it as such. Any failure is fatal as that implies the
>> RAM regions passed to Linux are incorrect - which would mean failing
>> later when attempting to access non-existent RAM.
>>
>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Co-developed-by: Steven Price <steven.price@arm.com>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>> Changes since v4:
>>   * Minor tidy ups.
>> Changes since v3:
>>   * Provide safe/unsafe versions for converting memory to protected,
>>     using the safer version only for the early boot.
>>   * Use the new psci_early_test_conduit() function to avoid calling an
>>     SMC if EL3 is not present (or not configured to handle an SMC).
>> Changes since v2:
>>   * Use DECLARE_STATIC_KEY_FALSE rather than "extern struct
>>     static_key_false".
>>   * Rename set_memory_range() to rsi_set_memory_range().
>>   * Downgrade some BUG()s to WARN()s and handle the condition by
>>     propagating up the stack. Comment the remaining case that ends in a
>>     BUG() to explain why.
>>   * Rely on the return from rsi_request_version() rather than checking
>>     the version the RMM claims to support.
>>   * Rename the generic sounding arm64_setup_memory() to
>>     arm64_rsi_setup_memory() and move the call site to setup_arch().
>> ---
>>   arch/arm64/include/asm/rsi.h | 65 ++++++++++++++++++++++++++++++
>>   arch/arm64/kernel/Makefile   |  3 +-
>>   arch/arm64/kernel/rsi.c      | 78 ++++++++++++++++++++++++++++++++++++
>>   arch/arm64/kernel/setup.c    |  8 ++++
>>   4 files changed, 153 insertions(+), 1 deletion(-)
>>   create mode 100644 arch/arm64/include/asm/rsi.h
>>   create mode 100644 arch/arm64/kernel/rsi.c
>>
>> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
>> new file mode 100644
>> index 000000000000..2bc013badbc3
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/rsi.h
>> @@ -0,0 +1,65 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2024 ARM Ltd.
>> + */
>> +
>> +#ifndef __ASM_RSI_H_
>> +#define __ASM_RSI_H_
>> +
>> +#include <linux/jump_label.h>
>> +#include <asm/rsi_cmds.h>
>> +
>> +DECLARE_STATIC_KEY_FALSE(rsi_present);
>> +
>> +void __init arm64_rsi_init(void);
>> +void __init arm64_rsi_setup_memory(void);
>> +static inline bool is_realm_world(void)
>> +{
>> +    return static_branch_unlikely(&rsi_present);
>> +}
>> +
>> +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t
>> end,
>> +                       enum ripas state, unsigned long flags)
>> +{
>> +    unsigned long ret;
>> +    phys_addr_t top;
>> +
>> +    while (start != end) {
>> +        ret = rsi_set_addr_range_state(start, end, state, flags, &top);
>> +        if (WARN_ON(ret || top < start || top > end))
>> +            return -EINVAL;
>> +        start = top;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Convert the specified range to RAM. Do not use this if you rely on
>> the
>> + * contents of a page that may already be in RAM state.
>> + */
>> +static inline int rsi_set_memory_range_protected(phys_addr_t start,
>> +                         phys_addr_t end)
>> +{
>> +    return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
>> +                    RSI_CHANGE_DESTROYED);
>> +}
>> +
>> +/*
>> + * Convert the specified range to RAM. Do not convert any pages that
>> may have
>> + * been DESTROYED, without our permission.
>> + */
>> +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
>> +                              phys_addr_t end)
>> +{
>> +    return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
>> +                    RSI_NO_CHANGE_DESTROYED);
>> +}
>> +
>> +static inline int rsi_set_memory_range_shared(phys_addr_t start,
>> +                          phys_addr_t end)
>> +{
>> +    return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
>> +                    RSI_NO_CHANGE_DESTROYED);
> 
> I think this should be RSI_CHANGE_DESTROYED, as we are transitioning a
> page to "shared" (i.e, IPA state to EMPTY) and we do not expect the data
> to be retained over the transition. Thus we do not care if the IPA was
> in RIPAS_DESTROYED.

Fair point - although something has gone wrong if the VMM has destroyed
the memory we're calling this on. But it's not going to cause problems
using RSI_CHANGE_DESTROYED and might be (slightly) more efficient.

Thanks,

Steve

> Rest looks good to me.
> 
> 
> Suzuki
Catalin Marinas Aug. 26, 2024, 10:03 a.m. UTC | #3
On Mon, Aug 19, 2024 at 02:19:10PM +0100, Steven Price wrote:
> +static bool rsi_version_matches(void)
> +{
> +	unsigned long ver_lower, ver_higher;
> +	unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
> +						&ver_lower,
> +						&ver_higher);
> +
> +	if (ret == SMCCC_RET_NOT_SUPPORTED)
> +		return false;
> +
> +	if (ret != RSI_SUCCESS) {
> +		pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
> +		       RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
> +		       RSI_ABI_VERSION_GET_MAJOR(ver_lower),
> +		       RSI_ABI_VERSION_GET_MINOR(ver_lower),
> +		       RSI_ABI_VERSION_GET_MAJOR(ver_higher),
> +		       RSI_ABI_VERSION_GET_MINOR(ver_higher));
> +		return false;
> +	}
> +
> +	pr_info("RME: Using RSI version %lu.%lu\n",
> +		RSI_ABI_VERSION_GET_MAJOR(ver_lower),
> +		RSI_ABI_VERSION_GET_MINOR(ver_lower));
> +
> +	return true;
> +}

I don't have the spec at hand now (on a plane) but given the possibility
of a 1.0 guest regressing on later RMM versions, I wonder whether we
should simply bail out if it's not an exact version match. I forgot what
the spec says about returned ranges (they were pretty confusing last
time I checked).

> +
> +void __init arm64_rsi_setup_memory(void)
> +{
> +	u64 i;
> +	phys_addr_t start, end;
> +
> +	if (!is_realm_world())
> +		return;
> +
> +	/*
> +	 * Iterate over the available memory ranges and convert the state to
> +	 * protected memory. We should take extra care to ensure that we DO NOT
> +	 * permit any "DESTROYED" pages to be converted to "RAM".
> +	 *
> +	 * BUG_ON is used because if the attempt to switch the memory to
> +	 * protected has failed here, then future accesses to the memory are
> +	 * simply going to be reflected as a SEA (Synchronous External Abort)
> +	 * which we can't handle.  Bailing out early prevents the guest limping
> +	 * on and dying later.
> +	 */
> +	for_each_mem_range(i, &start, &end) {
> +		BUG_ON(rsi_set_memory_range_protected_safe(start, end));
> +	}

Would it help debugging if we print the memory ranges as well rather
than just a BUG_ON()?
Steven Price Aug. 30, 2024, 3:54 p.m. UTC | #4
On 26/08/2024 11:03, Catalin Marinas wrote:
> On Mon, Aug 19, 2024 at 02:19:10PM +0100, Steven Price wrote:
>> +static bool rsi_version_matches(void)
>> +{
>> +	unsigned long ver_lower, ver_higher;
>> +	unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
>> +						&ver_lower,
>> +						&ver_higher);
>> +
>> +	if (ret == SMCCC_RET_NOT_SUPPORTED)
>> +		return false;
>> +
>> +	if (ret != RSI_SUCCESS) {
>> +		pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
>> +		       RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
>> +		       RSI_ABI_VERSION_GET_MAJOR(ver_lower),
>> +		       RSI_ABI_VERSION_GET_MINOR(ver_lower),
>> +		       RSI_ABI_VERSION_GET_MAJOR(ver_higher),
>> +		       RSI_ABI_VERSION_GET_MINOR(ver_higher));
>> +		return false;
>> +	}
>> +
>> +	pr_info("RME: Using RSI version %lu.%lu\n",
>> +		RSI_ABI_VERSION_GET_MAJOR(ver_lower),
>> +		RSI_ABI_VERSION_GET_MINOR(ver_lower));
>> +
>> +	return true;
>> +}
> 
> I don't have the spec at hand now (on a plane) but given the possibility
> of a 1.0 guest regressing on later RMM versions, I wonder whether we
> should simply bail out if it's not an exact version match. I forgot what
> the spec says about returned ranges (they were pretty confusing last
> time I checked).

Well the idea at least is that the RMM can tell us that it is providing
a 1.0 compatible interface. So it might be supporting 1.x but it's
promising that what it's providing is 1.0 compatible.

Indeed the spec allows the RMM to emulate 1.0 while supporting a higher
(incompatible) interface as well - which is where the version ranges
come in. So in the future we might negotiate versions with the RMM, or
opportunistically use newer features if the RMM provides them. But
obviously for now the guest is only 1.0.

I'd prefer not to add an exact version match because then upgrading the
RMM would break existing guests and would probably lead to pressure for
the RMM to simply lie to guests to avoid them breaking on upgrade.

>> +
>> +void __init arm64_rsi_setup_memory(void)
>> +{
>> +	u64 i;
>> +	phys_addr_t start, end;
>> +
>> +	if (!is_realm_world())
>> +		return;
>> +
>> +	/*
>> +	 * Iterate over the available memory ranges and convert the state to
>> +	 * protected memory. We should take extra care to ensure that we DO NOT
>> +	 * permit any "DESTROYED" pages to be converted to "RAM".
>> +	 *
>> +	 * BUG_ON is used because if the attempt to switch the memory to
>> +	 * protected has failed here, then future accesses to the memory are
>> +	 * simply going to be reflected as a SEA (Synchronous External Abort)
>> +	 * which we can't handle.  Bailing out early prevents the guest limping
>> +	 * on and dying later.
>> +	 */
>> +	for_each_mem_range(i, &start, &end) {
>> +		BUG_ON(rsi_set_memory_range_protected_safe(start, end));
>> +	}
> 
> Would it help debugging if we print the memory ranges as well rather
> than just a BUG_ON()?
> 

Yes that would probably be useful - I'll fix that.

Thanks,
Steve
Shanker Donthineni Sept. 6, 2024, 6:58 p.m. UTC | #5
Hi Steven,

On 8/19/24 08:19, Steven Price wrote:
> External email: Use caution opening links or attachments
> 
> 
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Detect that the VM is a realm guest by the presence of the RSI
> interface.
> 
> If in a realm then all memory needs to be marked as RIPAS RAM initially,
> the loader may or may not have done this for us. To be sure iterate over
> all RAM and mark it as such. Any failure is fatal as that implies the
> RAM regions passed to Linux are incorrect - which would mean failing
> later when attempting to access non-existent RAM.
> 
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> Co-developed-by: Steven Price <steven.price@arm.com>
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
> Changes since v4:
>   * Minor tidy ups.
> Changes since v3:
>   * Provide safe/unsafe versions for converting memory to protected,
>     using the safer version only for the early boot.
>   * Use the new psci_early_test_conduit() function to avoid calling an
>     SMC if EL3 is not present (or not configured to handle an SMC).
> Changes since v2:
>   * Use DECLARE_STATIC_KEY_FALSE rather than "extern struct
>     static_key_false".
>   * Rename set_memory_range() to rsi_set_memory_range().
>   * Downgrade some BUG()s to WARN()s and handle the condition by
>     propagating up the stack. Comment the remaining case that ends in a
>     BUG() to explain why.
>   * Rely on the return from rsi_request_version() rather than checking
>     the version the RMM claims to support.
>   * Rename the generic sounding arm64_setup_memory() to
>     arm64_rsi_setup_memory() and move the call site to setup_arch().
> ---
>   arch/arm64/include/asm/rsi.h | 65 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/Makefile   |  3 +-
>   arch/arm64/kernel/rsi.c      | 78 ++++++++++++++++++++++++++++++++++++
>   arch/arm64/kernel/setup.c    |  8 ++++
>   4 files changed, 153 insertions(+), 1 deletion(-)
>   create mode 100644 arch/arm64/include/asm/rsi.h
>   create mode 100644 arch/arm64/kernel/rsi.c
> 
> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
> new file mode 100644
> index 000000000000..2bc013badbc3
> --- /dev/null
> +++ b/arch/arm64/include/asm/rsi.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2024 ARM Ltd.
> + */
> +
> +#ifndef __ASM_RSI_H_
> +#define __ASM_RSI_H_
> +
> +#include <linux/jump_label.h>
> +#include <asm/rsi_cmds.h>
> +
> +DECLARE_STATIC_KEY_FALSE(rsi_present);
> +
> +void __init arm64_rsi_init(void);
> +void __init arm64_rsi_setup_memory(void);
> +static inline bool is_realm_world(void)
> +{
> +       return static_branch_unlikely(&rsi_present);
> +}
> +
> +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end,
> +                                      enum ripas state, unsigned long flags)
> +{
> +       unsigned long ret;
> +       phys_addr_t top;
> +
> +       while (start != end) {
> +               ret = rsi_set_addr_range_state(start, end, state, flags, &top);
> +               if (WARN_ON(ret || top < start || top > end))
> +                       return -EINVAL;
> +               start = top;
> +       }
> +
> +       return 0;
> +}
> +
> +/*
> + * Convert the specified range to RAM. Do not use this if you rely on the
> + * contents of a page that may already be in RAM state.
> + */
> +static inline int rsi_set_memory_range_protected(phys_addr_t start,
> +                                                phys_addr_t end)
> +{
> +       return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
> +                                   RSI_CHANGE_DESTROYED);
> +}
> +
> +/*
> + * Convert the specified range to RAM. Do not convert any pages that may have
> + * been DESTROYED, without our permission.
> + */
> +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
> +                                                     phys_addr_t end)
> +{
> +       return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
> +                                   RSI_NO_CHANGE_DESTROYED);
> +}
> +
> +static inline int rsi_set_memory_range_shared(phys_addr_t start,
> +                                             phys_addr_t end)
> +{
> +       return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
> +                                   RSI_NO_CHANGE_DESTROYED);
> +}
> +#endif /* __ASM_RSI_H_ */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 2b112f3b7510..71c29a2a2f19 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -33,7 +33,8 @@ obj-y                 := debug-monitors.o entry.o irq.o fpsimd.o              \
>                             return_address.o cpuinfo.o cpu_errata.o              \
>                             cpufeature.o alternative.o cacheinfo.o               \
>                             smp.o smp_spin_table.o topology.o smccc-call.o       \
> -                          syscall.o proton-pack.o idle.o patching.o pi/
> +                          syscall.o proton-pack.o idle.o patching.o pi/        \
> +                          rsi.o
> 
>   obj-$(CONFIG_COMPAT)                   += sys32.o signal32.o                   \
>                                             sys_compat.o
> diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
> new file mode 100644
> index 000000000000..128a9a05a96b
> --- /dev/null
> +++ b/arch/arm64/kernel/rsi.c
> @@ -0,0 +1,78 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 ARM Ltd.
> + */
> +
> +#include <linux/jump_label.h>
> +#include <linux/memblock.h>
> +#include <linux/psci.h>
> +#include <asm/rsi.h>
> +
> +DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
> +EXPORT_SYMBOL(rsi_present);
> +
> +static bool rsi_version_matches(void)
> +{
> +       unsigned long ver_lower, ver_higher;
> +       unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
> +                                               &ver_lower,
> +                                               &ver_higher);
> +
> +       if (ret == SMCCC_RET_NOT_SUPPORTED)
> +               return false;
> +
> +       if (ret != RSI_SUCCESS) {
> +               pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
> +                      RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
> +                      RSI_ABI_VERSION_GET_MAJOR(ver_lower),
> +                      RSI_ABI_VERSION_GET_MINOR(ver_lower),
> +                      RSI_ABI_VERSION_GET_MAJOR(ver_higher),
> +                      RSI_ABI_VERSION_GET_MINOR(ver_higher));
> +               return false;
> +       }
> +
> +       pr_info("RME: Using RSI version %lu.%lu\n",
> +               RSI_ABI_VERSION_GET_MAJOR(ver_lower),
> +               RSI_ABI_VERSION_GET_MINOR(ver_lower));
> +
> +       return true;
> +}
> +
> +void __init arm64_rsi_setup_memory(void)
> +{
> +       u64 i;
> +       phys_addr_t start, end;
> +
> +       if (!is_realm_world())
> +               return;
> +
> +       /*
> +        * Iterate over the available memory ranges and convert the state to
> +        * protected memory. We should take extra care to ensure that we DO NOT
> +        * permit any "DESTROYED" pages to be converted to "RAM".
> +        *
> +        * BUG_ON is used because if the attempt to switch the memory to
> +        * protected has failed here, then future accesses to the memory are
> +        * simply going to be reflected as a SEA (Synchronous External Abort)
> +        * which we can't handle.  Bailing out early prevents the guest limping
> +        * on and dying later.
> +        */
> +       for_each_mem_range(i, &start, &end) {
> +               BUG_ON(rsi_set_memory_range_protected_safe(start, end));
> +       }
> +}
> +
> +void __init arm64_rsi_init(void)
> +{
> +       /*
> +        * If PSCI isn't using SMC, RMM isn't present. Don't try to execute an
> +        * SMC as it could be UNDEFINED.
> +        */
> +       if (!psci_early_test_conduit(SMCCC_CONDUIT_SMC))
> +               return;
For ACPI-based kernel boot flow, control never reaches this point because the above
function does not check the PSCI conduit method when the kernel is booted via UEFI.

As a result, the boot process fails when using ACPI. It works fine with DTB based
boot.

> +       if (!rsi_version_matches())
> +               return;
> +
> +       static_branch_enable(&rsi_present);
> +}
> +
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index a096e2451044..143f87615af0 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -43,6 +43,7 @@
>   #include <asm/cpu_ops.h>
>   #include <asm/kasan.h>
>   #include <asm/numa.h>
> +#include <asm/rsi.h>
>   #include <asm/scs.h>
>   #include <asm/sections.h>
>   #include <asm/setup.h>
> @@ -293,6 +294,11 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
>           * cpufeature code and early parameters.
>           */
>          jump_label_init();
> +       /*
> +        * Init RSI before early param so that "earlycon" console uses the
> +        * shared alias when in a realm
> +        */
> +       arm64_rsi_init();
>          parse_early_param();
> 
>          dynamic_scs_init();
> @@ -328,6 +334,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
> 
>          arm64_memblock_init();
> 
> +       arm64_rsi_setup_memory();
> +
>          paging_init();
> 
>          acpi_table_upgrade();
> --
> 2.34.1
>
Shanker Donthineni Sept. 9, 2024, 3:15 p.m. UTC | #6
Hi Steven,

On 8/19/24 09:10, Steven Price wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 19/08/2024 15:04, Suzuki K Poulose wrote:
>> Hi Steven
>>
>> On 19/08/2024 14:19, Steven Price wrote:
>>> From: Suzuki K Poulose <suzuki.poulose@arm.com>
>>>
>>> Detect that the VM is a realm guest by the presence of the RSI
>>> interface.
>>>
>>> If in a realm then all memory needs to be marked as RIPAS RAM initially,
>>> the loader may or may not have done this for us. To be sure iterate over
>>> all RAM and mark it as such. Any failure is fatal as that implies the
>>> RAM regions passed to Linux are incorrect - which would mean failing
>>> later when attempting to access non-existent RAM.
>>>
>>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>>> Co-developed-by: Steven Price <steven.price@arm.com>
>>> Signed-off-by: Steven Price <steven.price@arm.com>
>>> ---
>>> Changes since v4:
>>>    * Minor tidy ups.
>>> Changes since v3:
>>>    * Provide safe/unsafe versions for converting memory to protected,
>>>      using the safer version only for the early boot.
>>>    * Use the new psci_early_test_conduit() function to avoid calling an
>>>      SMC if EL3 is not present (or not configured to handle an SMC).
>>> Changes since v2:
>>>    * Use DECLARE_STATIC_KEY_FALSE rather than "extern struct
>>>      static_key_false".
>>>    * Rename set_memory_range() to rsi_set_memory_range().
>>>    * Downgrade some BUG()s to WARN()s and handle the condition by
>>>      propagating up the stack. Comment the remaining case that ends in a
>>>      BUG() to explain why.
>>>    * Rely on the return from rsi_request_version() rather than checking
>>>      the version the RMM claims to support.
>>>    * Rename the generic sounding arm64_setup_memory() to
>>>      arm64_rsi_setup_memory() and move the call site to setup_arch().
>>> ---
>>>    arch/arm64/include/asm/rsi.h | 65 ++++++++++++++++++++++++++++++
>>>    arch/arm64/kernel/Makefile   |  3 +-
>>>    arch/arm64/kernel/rsi.c      | 78 ++++++++++++++++++++++++++++++++++++
>>>    arch/arm64/kernel/setup.c    |  8 ++++
>>>    4 files changed, 153 insertions(+), 1 deletion(-)
>>>    create mode 100644 arch/arm64/include/asm/rsi.h
>>>    create mode 100644 arch/arm64/kernel/rsi.c
>>>
>>> diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
>>> new file mode 100644
>>> index 000000000000..2bc013badbc3
>>> --- /dev/null
>>> +++ b/arch/arm64/include/asm/rsi.h
>>> @@ -0,0 +1,65 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * Copyright (C) 2024 ARM Ltd.
>>> + */
>>> +
>>> +#ifndef __ASM_RSI_H_
>>> +#define __ASM_RSI_H_
>>> +
>>> +#include <linux/jump_label.h>
>>> +#include <asm/rsi_cmds.h>

The error number macros are used in this file, but the header file
'<linux/errno.h>' is not included.


>>> +
>>> +DECLARE_STATIC_KEY_FALSE(rsi_present);
>>> +
>>> +void __init arm64_rsi_init(void);
>>> +void __init arm64_rsi_setup_memory(void);
>>> +static inline bool is_realm_world(void)
>>> +{
>>> +    return static_branch_unlikely(&rsi_present);
>>> +}
>>> +
>>> +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t
>>> end,
>>> +                       enum ripas state, unsigned long flags)
>>> +{
>>> +    unsigned long ret;
>>> +    phys_addr_t top;
>>> +
>>> +    while (start != end) {
>>> +        ret = rsi_set_addr_range_state(start, end, state, flags, &top);
>>> +        if (WARN_ON(ret || top < start || top > end))
>>> +            return -EINVAL;
>>> +        start = top;
>>> +    }
>>> +
>>> +    return 0;

-Shanker
Gavin Shan Sept. 10, 2024, 12:09 a.m. UTC | #7
On 8/31/24 1:54 AM, Steven Price wrote:
> On 26/08/2024 11:03, Catalin Marinas wrote:
>> On Mon, Aug 19, 2024 at 02:19:10PM +0100, Steven Price wrote:

[...]

>>> +
>>> +void __init arm64_rsi_setup_memory(void)
>>> +{
>>> +	u64 i;
>>> +	phys_addr_t start, end;
>>> +
>>> +	if (!is_realm_world())
>>> +		return;
>>> +
>>> +	/*
>>> +	 * Iterate over the available memory ranges and convert the state to
>>> +	 * protected memory. We should take extra care to ensure that we DO NOT
>>> +	 * permit any "DESTROYED" pages to be converted to "RAM".
>>> +	 *
>>> +	 * BUG_ON is used because if the attempt to switch the memory to
>>> +	 * protected has failed here, then future accesses to the memory are
>>> +	 * simply going to be reflected as a SEA (Synchronous External Abort)
>>> +	 * which we can't handle.  Bailing out early prevents the guest limping
>>> +	 * on and dying later.
>>> +	 */
>>> +	for_each_mem_range(i, &start, &end) {
>>> +		BUG_ON(rsi_set_memory_range_protected_safe(start, end));
>>> +	}
>>
>> Would it help debugging if we print the memory ranges as well rather
>> than just a BUG_ON()?
>>
> 
> Yes that would probably be useful - I'll fix that.
> 

One potential issue I'm seeing is WARN_ON() followed by BUG_ON(). They're a bit
duplicate. I would suggest to remove the WARN_ON() and print informative messages
in rsi_set_memory_range().

   setup_arch
   arm64_rsi_setup_memory                    // BUG_ON(error)
   rsi_set_memory_range_protected_safe
   rsi_set_memory_range                      // WARN_ON(error)

Thanks,
Gavin
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
new file mode 100644
index 000000000000..2bc013badbc3
--- /dev/null
+++ b/arch/arm64/include/asm/rsi.h
@@ -0,0 +1,65 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 ARM Ltd.
+ */
+
+#ifndef __ASM_RSI_H_
+#define __ASM_RSI_H_
+
+#include <linux/jump_label.h>
+#include <asm/rsi_cmds.h>
+
+DECLARE_STATIC_KEY_FALSE(rsi_present);
+
+void __init arm64_rsi_init(void);
+void __init arm64_rsi_setup_memory(void);
+static inline bool is_realm_world(void)
+{
+	return static_branch_unlikely(&rsi_present);
+}
+
+static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end,
+				       enum ripas state, unsigned long flags)
+{
+	unsigned long ret;
+	phys_addr_t top;
+
+	while (start != end) {
+		ret = rsi_set_addr_range_state(start, end, state, flags, &top);
+		if (WARN_ON(ret || top < start || top > end))
+			return -EINVAL;
+		start = top;
+	}
+
+	return 0;
+}
+
+/*
+ * Convert the specified range to RAM. Do not use this if you rely on the
+ * contents of a page that may already be in RAM state.
+ */
+static inline int rsi_set_memory_range_protected(phys_addr_t start,
+						 phys_addr_t end)
+{
+	return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+				    RSI_CHANGE_DESTROYED);
+}
+
+/*
+ * Convert the specified range to RAM. Do not convert any pages that may have
+ * been DESTROYED, without our permission.
+ */
+static inline int rsi_set_memory_range_protected_safe(phys_addr_t start,
+						      phys_addr_t end)
+{
+	return rsi_set_memory_range(start, end, RSI_RIPAS_RAM,
+				    RSI_NO_CHANGE_DESTROYED);
+}
+
+static inline int rsi_set_memory_range_shared(phys_addr_t start,
+					      phys_addr_t end)
+{
+	return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY,
+				    RSI_NO_CHANGE_DESTROYED);
+}
+#endif /* __ASM_RSI_H_ */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2b112f3b7510..71c29a2a2f19 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -33,7 +33,8 @@  obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o idle.o patching.o pi/
+			   syscall.o proton-pack.o idle.o patching.o pi/	\
+			   rsi.o
 
 obj-$(CONFIG_COMPAT)			+= sys32.o signal32.o			\
 					   sys_compat.o
diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c
new file mode 100644
index 000000000000..128a9a05a96b
--- /dev/null
+++ b/arch/arm64/kernel/rsi.c
@@ -0,0 +1,78 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/jump_label.h>
+#include <linux/memblock.h>
+#include <linux/psci.h>
+#include <asm/rsi.h>
+
+DEFINE_STATIC_KEY_FALSE_RO(rsi_present);
+EXPORT_SYMBOL(rsi_present);
+
+static bool rsi_version_matches(void)
+{
+	unsigned long ver_lower, ver_higher;
+	unsigned long ret = rsi_request_version(RSI_ABI_VERSION,
+						&ver_lower,
+						&ver_higher);
+
+	if (ret == SMCCC_RET_NOT_SUPPORTED)
+		return false;
+
+	if (ret != RSI_SUCCESS) {
+		pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n",
+		       RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR,
+		       RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+		       RSI_ABI_VERSION_GET_MINOR(ver_lower),
+		       RSI_ABI_VERSION_GET_MAJOR(ver_higher),
+		       RSI_ABI_VERSION_GET_MINOR(ver_higher));
+		return false;
+	}
+
+	pr_info("RME: Using RSI version %lu.%lu\n",
+		RSI_ABI_VERSION_GET_MAJOR(ver_lower),
+		RSI_ABI_VERSION_GET_MINOR(ver_lower));
+
+	return true;
+}
+
+void __init arm64_rsi_setup_memory(void)
+{
+	u64 i;
+	phys_addr_t start, end;
+
+	if (!is_realm_world())
+		return;
+
+	/*
+	 * Iterate over the available memory ranges and convert the state to
+	 * protected memory. We should take extra care to ensure that we DO NOT
+	 * permit any "DESTROYED" pages to be converted to "RAM".
+	 *
+	 * BUG_ON is used because if the attempt to switch the memory to
+	 * protected has failed here, then future accesses to the memory are
+	 * simply going to be reflected as a SEA (Synchronous External Abort)
+	 * which we can't handle.  Bailing out early prevents the guest limping
+	 * on and dying later.
+	 */
+	for_each_mem_range(i, &start, &end) {
+		BUG_ON(rsi_set_memory_range_protected_safe(start, end));
+	}
+}
+
+void __init arm64_rsi_init(void)
+{
+	/*
+	 * If PSCI isn't using SMC, RMM isn't present. Don't try to execute an
+	 * SMC as it could be UNDEFINED.
+	 */
+	if (!psci_early_test_conduit(SMCCC_CONDUIT_SMC))
+		return;
+	if (!rsi_version_matches())
+		return;
+
+	static_branch_enable(&rsi_present);
+}
+
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index a096e2451044..143f87615af0 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,6 +43,7 @@ 
 #include <asm/cpu_ops.h>
 #include <asm/kasan.h>
 #include <asm/numa.h>
+#include <asm/rsi.h>
 #include <asm/scs.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -293,6 +294,11 @@  void __init __no_sanitize_address setup_arch(char **cmdline_p)
 	 * cpufeature code and early parameters.
 	 */
 	jump_label_init();
+	/*
+	 * Init RSI before early param so that "earlycon" console uses the
+	 * shared alias when in a realm
+	 */
+	arm64_rsi_init();
 	parse_early_param();
 
 	dynamic_scs_init();
@@ -328,6 +334,8 @@  void __init __no_sanitize_address setup_arch(char **cmdline_p)
 
 	arm64_memblock_init();
 
+	arm64_rsi_setup_memory();
+
 	paging_init();
 
 	acpi_table_upgrade();