diff mbox

[RFC] arm-acpi: Hide SMMU from IORT for hardware domain

Message ID 1409218d-78bd-6371-64d7-b25b423ed04d@caviumnetworks.com (mailing list archive)
State New, archived
Headers show

Commit Message

Manish Jaggi June 8, 2017, 12:38 p.m. UTC
This patch disables the smmu node in IORT table for hardware domain.
Also patches the output_base of pci_rc id_array with output_base of
smmu node id_array.

Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
---
  xen/arch/arm/domain_build.c | 142 
+++++++++++++++++++++++++++++++++++++++++++-
  xen/include/acpi/actbl2.h   |   3 +-
  xen/include/asm-arm/acpi.h  |   1 +
  3 files changed, 144 insertions(+), 2 deletions(-)

Comments

Julien Grall June 8, 2017, 1:09 p.m. UTC | #1
Hi Manish,

On 08/06/17 13:38, Manish Jaggi wrote:
>

Spurious line.

> This patch disables the smmu node in IORT table for hardware domain.
> Also patches the output_base of pci_rc id_array with output_base of
> smmu node id_array.

I would have appreciated a bit more description in the commit message to 
explain your logic.

>
> Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
> ---
>  xen/arch/arm/domain_build.c | 142
> +++++++++++++++++++++++++++++++++++++++++++-

domain_build.c is starting to be really big. I think it is time to move 
some acpi bits outside domain_build.c.

>  xen/include/acpi/actbl2.h   |   3 +-
>  xen/include/asm-arm/acpi.h  |   1 +
>  3 files changed, 144 insertions(+), 2 deletions(-)
>
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index d6d6c94..9f41d0e 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -32,6 +32,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
>  int dom0_11_mapping = 1;
>
>  static u64 __initdata dom0_mem;
>+static u8 *iort_base_ptr;

Looking at the code, I don't see any reason to have this global.

>
>  static void __init parse_dom0_mem(const char *s)
>  {
> @@ -1336,6 +1337,96 @@ static int prepare_dtb(struct domain *d, struct
> kernel_info *kinfo)
>  #ifdef CONFIG_ACPI
>  #define ACPI_DOM0_FDT_MIN_SIZE 4096
>
> +static void patch_output_ref(struct acpi_iort_id_mapping *pci_idmap,
> +                      struct acpi_iort_node *smmu_node)
> +{
> +    struct acpi_iort_id_mapping *idmap = NULL;
> +    int i;

Newline.

> +    for (i=0; i < smmu_node->mapping_count; i++) {

Please respect Xen coding style... I expect you to fix *all* the place 
in the next version.

Also, there is a latent lack of comments within the patch to explain the 
logic.

> +        if(!idmap)
> +            idmap = (struct acpi_iort_id_mapping*)((u8*)smmu_node
> +                                          + smmu_node->mapping_offset);
> +        else
> +            idmap++;
> +
> +        if (pci_idmap->output_base == idmap->input_base) {
> +            pci_idmap->output_base = idmap->output_base;
> +            pci_idmap->output_reference = idmap->output_reference;

As I pointed out on the previous thread, you assume that one PCI ID 
mapping will end up to be translated to one Device ID mapping and not 
split across multiple one. For instance:

RC A
  // doesn't use SMMU 0 so just outputs DeviceIDs to ITS GROUP 0
  // Input ID --> Output reference: Output ID
0x0000-0xffff --> ITS GROUP 0 : 0x0000->0xffff

SMMU 0
// Note that range of StreamIDs that map to DeviceIDs excludes
// the NIC 0 DeviceID as it does not generate MSIs
  // Input ID --> Output reference: Output ID
0x0000-0x01ff --> ITS GROUP 0 : 0x10000->0x101ff
0x0200-0xffff --> ITS GROUP 0 : 0x20000->0x207ff

// SMMU 0 Control interrupt is MSI based
  // Input ID --> Output reference: Output ID
N/A --> ITS GROUP 0 : 0x200001

I still don't see anything in the spec preventing that. And I would like 
clarification from your side before going forward. *hint* The spec 
should be quoted *hint*

[...]

> diff --git a/xen/include/acpi/actbl2.h b/xen/include/acpi/actbl2.h
> index 42beac4..f180ea5 100644
> --- a/xen/include/acpi/actbl2.h
> +++ b/xen/include/acpi/actbl2.h
> @@ -591,7 +591,8 @@ enum acpi_iort_node_type {
>      ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
>      ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
>      ACPI_IORT_NODE_SMMU = 0x03,
> -    ACPI_IORT_NODE_SMMU_V3 = 0x04
> +    ACPI_IORT_NODE_SMMU_V3 = 0x04,
> +    ACPI_IORT_NODE_RESERVED = 0xff

This is likely a call to a separate patch.

>  };
>
>  struct acpi_iort_id_mapping {
> diff --git a/xen/include/asm-arm/acpi.h b/xen/include/asm-arm/acpi.h
> index 9f954d3..1cc0167 100644
> --- a/xen/include/asm-arm/acpi.h
> +++ b/xen/include/asm-arm/acpi.h
> @@ -36,6 +36,7 @@ typedef enum {
>      TBL_FADT,
>      TBL_MADT,
>      TBL_STAO,
> +    TBL_IORT,
>      TBL_XSDT,
>      TBL_RSDP,
>      TBL_EFIT,

Cheers,
Manish Jaggi June 9, 2017, 7:13 a.m. UTC | #2
On 6/8/2017 6:39 PM, Julien Grall wrote:
> Hi Manish,
>
Hi Julien,
> On 08/06/17 13:38, Manish Jaggi wrote:
>>
>
> Spurious line.
>
>> This patch disables the smmu node in IORT table for hardware domain.
>> Also patches the output_base of pci_rc id_array with output_base of
>> smmu node id_array.
>
> I would have appreciated a bit more description in the commit message 
> to explain your logic.
>
I will add it.

>>
>> Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
>> ---
>>  xen/arch/arm/domain_build.c | 142
>> +++++++++++++++++++++++++++++++++++++++++++-
>
> domain_build.c is starting to be really big. I think it is time to 
> move some acpi bits outside domain_build.c.
>
You are right, I also thought that
How about 3 files
domain_build.c
acpi_domain_build.c
dt_domain_build.c
>>  xen/include/acpi/actbl2.h   |   3 +-
>>  xen/include/asm-arm/acpi.h  |   1 +
>>  3 files changed, 144 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index d6d6c94..9f41d0e 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -32,6 +32,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
>>  int dom0_11_mapping = 1;
>>
>>  static u64 __initdata dom0_mem;
>> +static u8 *iort_base_ptr;
>
> Looking at the code, I don't see any reason to have this global.
If you look a bit closer this is used at multiple places
see fixup_pcirc_node, hide_smmu_iort.
>
>>
>>  static void __init parse_dom0_mem(const char *s)
>>  {
>> @@ -1336,6 +1337,96 @@ static int prepare_dtb(struct domain *d, struct
>> kernel_info *kinfo)
>>  #ifdef CONFIG_ACPI
>>  #define ACPI_DOM0_FDT_MIN_SIZE 4096
>>
>> +static void patch_output_ref(struct acpi_iort_id_mapping *pci_idmap,
>> +                      struct acpi_iort_node *smmu_node)
>> +{
>> +    struct acpi_iort_id_mapping *idmap = NULL;
>> +    int i;
>
> Newline.
Sure.
>
>> +    for (i=0; i < smmu_node->mapping_count; i++) {
>
> Please respect Xen coding style... I expect you to fix *all* the place 
> in the next version.
>
> Also, there is a latent lack of comments within the patch to explain 
> the logic.
>
I will add detail comments.
>> +        if(!idmap)
>> +            idmap = (struct acpi_iort_id_mapping*)((u8*)smmu_node
>> +                                          + smmu_node->mapping_offset);
>> +        else
>> +            idmap++;
>> +
>> +        if (pci_idmap->output_base == idmap->input_base) {
>> +            pci_idmap->output_base = idmap->output_base;
>> +            pci_idmap->output_reference = idmap->output_reference;
>
> As I pointed out on the previous thread, you assume that one PCI ID 
> mapping will end up to be translated to one Device ID mapping and not 
> split across multiple one. For instance:
>
The  assumption is based on the ACPI tables on two platforms ThunderX 
and ThunderX2.
While the spec does not deny it but would there be a use case as such 
where a PCI node id array would split the
range into the same smmu.


> RC A
>  // doesn't use SMMU 0 so just outputs DeviceIDs to ITS GROUP 0
>  // Input ID --> Output reference: Output ID
> 0x0000-0xffff --> ITS GROUP 0 : 0x0000->0xffff
>
This is not relevant as this code wont touch RC A.
> SMMU 0
> // Note that range of StreamIDs that map to DeviceIDs excludes
> // the NIC 0 DeviceID as it does not generate MSIs
>  // Input ID --> Output reference: Output ID
> 0x0000-0x01ff --> ITS GROUP 0 : 0x10000->0x101ff
> 0x0200-0xffff --> ITS GROUP 0 : 0x20000->0x207ff
>
It can be from 2 different RC's and not from same RC.
> // SMMU 0 Control interrupt is MSI based
>  // Input ID --> Output reference: Output ID
> N/A --> ITS GROUP 0 : 0x200001
>
> I still don't see anything in the spec preventing that. And I would 
> like clarification from your side before going forward. *hint* The 
> spec should be quoted *hint*
>
Spec does not prevent that, but we need to see IMHO what all cases are 
practically possible and current platforms support it.
Is there any platform which supports that ? I can add code for the 
combinations but how I will test it.
> [...]

>
>> diff --git a/xen/include/acpi/actbl2.h b/xen/include/acpi/actbl2.h
>> index 42beac4..f180ea5 100644
>> --- a/xen/include/acpi/actbl2.h
>> +++ b/xen/include/acpi/actbl2.h
>> @@ -591,7 +591,8 @@ enum acpi_iort_node_type {
>>      ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
>>      ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
>>      ACPI_IORT_NODE_SMMU = 0x03,
>> -    ACPI_IORT_NODE_SMMU_V3 = 0x04
>> +    ACPI_IORT_NODE_SMMU_V3 = 0x04,
>> +    ACPI_IORT_NODE_RESERVED = 0xff
>
> This is likely a call to a separate patch.
>
ok.
>>  };
>>
>>  struct acpi_iort_id_mapping {
>> diff --git a/xen/include/asm-arm/acpi.h b/xen/include/asm-arm/acpi.h
>> index 9f954d3..1cc0167 100644
>> --- a/xen/include/asm-arm/acpi.h
>> +++ b/xen/include/asm-arm/acpi.h
>> @@ -36,6 +36,7 @@ typedef enum {
>>      TBL_FADT,
>>      TBL_MADT,
>>      TBL_STAO,
>> +    TBL_IORT,
>>      TBL_XSDT,
>>      TBL_RSDP,
>>      TBL_EFIT,
>
> Cheers,
>
Julien Grall June 9, 2017, 9:23 a.m. UTC | #3
On 09/06/2017 08:13, Manish Jaggi wrote:
> On 6/8/2017 6:39 PM, Julien Grall wrote:
>> Hi Manish,
>>
> Hi Julien,

Hello,

>> On 08/06/17 13:38, Manish Jaggi wrote:
>>>
>>
>> Spurious line.
>>
>>> This patch disables the smmu node in IORT table for hardware domain.
>>> Also patches the output_base of pci_rc id_array with output_base of
>>> smmu node id_array.
>>
>> I would have appreciated a bit more description in the commit message
>> to explain your logic.
>>
> I will add it.
>
>>>
>>> Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
>>> ---
>>>  xen/arch/arm/domain_build.c | 142
>>> +++++++++++++++++++++++++++++++++++++++++++-
>>
>> domain_build.c is starting to be really big. I think it is time to
>> move some acpi bits outside domain_build.c.
>>
> You are right, I also thought that
> How about 3 files
> domain_build.c
> acpi_domain_build.c
> dt_domain_build.c

If you want to split the current code, then fine. But it is not strictly 
mandatory for this code. What I want is adding new code in separate 
files. But in this case they should be named:

domain_build.c
acpi/domain_build.c
dt/domain_build.c

This would keep the ACPI and DT firmware code separated and not 
polluting the arch/arm.

>>>  xen/include/acpi/actbl2.h   |   3 +-
>>>  xen/include/asm-arm/acpi.h  |   1 +
>>>  3 files changed, 144 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>> index d6d6c94..9f41d0e 100644
>>> --- a/xen/arch/arm/domain_build.c
>>> +++ b/xen/arch/arm/domain_build.c
>>> @@ -32,6 +32,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
>>>  int dom0_11_mapping = 1;
>>>
>>>  static u64 __initdata dom0_mem;
>>> +static u8 *iort_base_ptr;
>>
>> Looking at the code, I don't see any reason to have this global.
> If you look a bit closer this is used at multiple places
> see fixup_pcirc_node, hide_smmu_iort.

My point stands... you could have passed iort_base_ptr as an extra 
parameter of the functions. Or even use kinfo.

Anyway, at the moment I don't see any reason to have this global variable.

>>
>>>
>>>  static void __init parse_dom0_mem(const char *s)
>>>  {
>>> @@ -1336,6 +1337,96 @@ static int prepare_dtb(struct domain *d, struct
>>> kernel_info *kinfo)
>>>  #ifdef CONFIG_ACPI
>>>  #define ACPI_DOM0_FDT_MIN_SIZE 4096
>>>
>>> +static void patch_output_ref(struct acpi_iort_id_mapping *pci_idmap,
>>> +                      struct acpi_iort_node *smmu_node)
>>> +{
>>> +    struct acpi_iort_id_mapping *idmap = NULL;
>>> +    int i;
>>
>> Newline.
> Sure.
>>
>>> +    for (i=0; i < smmu_node->mapping_count; i++) {
>>
>> Please respect Xen coding style... I expect you to fix *all* the place
>> in the next version.
>>
>> Also, there is a latent lack of comments within the patch to explain
>> the logic.
>>
> I will add detail comments.
>>> +        if(!idmap)
>>> +            idmap = (struct acpi_iort_id_mapping*)((u8*)smmu_node
>>> +                                          + smmu_node->mapping_offset);
>>> +        else
>>> +            idmap++;
>>> +
>>> +        if (pci_idmap->output_base == idmap->input_base) {
>>> +            pci_idmap->output_base = idmap->output_base;
>>> +            pci_idmap->output_reference = idmap->output_reference;
>>
>> As I pointed out on the previous thread, you assume that one PCI ID
>> mapping will end up to be translated to one Device ID mapping and not
>> split across multiple one. For instance:
>>
> The  assumption is based on the ACPI tables on two platforms ThunderX
> and ThunderX2.
> While the spec does not deny it but would there be a use case as such
> where a PCI node id array would split the
> range into the same smmu.

May I remind you that the goal of Xen is to run on *all* the current and 
future platforms. If the spec says it is allowed, then we should do it 
unless there is a strong reason not to do it.

>
>> RC A
>>  // doesn't use SMMU 0 so just outputs DeviceIDs to ITS GROUP 0
>>  // Input ID --> Output reference: Output ID
>> 0x0000-0xffff --> ITS GROUP 0 : 0x0000->0xffff
>>
> This is not relevant as this code wont touch RC A.

Can you avoid to dismiss any example that don't fit your solution? This 
is not helpful.

Describing the RC is relevant in my example to show a case that your 
solution will not handle.

>> SMMU 0
>> // Note that range of StreamIDs that map to DeviceIDs excludes
>> // the NIC 0 DeviceID as it does not generate MSIs
>>  // Input ID --> Output reference: Output ID
>> 0x0000-0x01ff --> ITS GROUP 0 : 0x10000->0x101ff
>> 0x0200-0xffff --> ITS GROUP 0 : 0x20000->0x207ff
>>
> It can be from 2 different RC's and not from same RC.

It is not my point in this example. My point is same RC with split 
DeviceID mapping.

>> // SMMU 0 Control interrupt is MSI based
>>  // Input ID --> Output reference: Output ID
>> N/A --> ITS GROUP 0 : 0x200001
>>
>> I still don't see anything in the spec preventing that. And I would
>> like clarification from your side before going forward. *hint* The
>> spec should be quoted *hint*
>>
> Spec does not prevent that, but we need to see IMHO what all cases are
> practically possible and current platforms support it.

See above.

> Is there any platform which supports that ? I can add code for the
> combinations but how I will test it.

The only thing I can tell you is the spec allows it and your suggestion 
would have to be fully rewritten if someone decide to not follow your 
assumptions.

On the previous thread "xen/arm: Hiding SMMUs from Dom0 when using ACPI 
on Xen", I made 2 suggestions which, I believe, is spec-proof:

1) Resolve all the RID (or platform device ID) to a DeviceID one by one 
and generating the a new IORT for DOM0 with that
2) Generating new DeviceID mapping for each RID mapping

Solution 1 would be the easiest to do and could be tested on any 
platform as the algo would be based on the IORT parsing.

So I don't see why we should have a limiting solution at the moment.

Regards,
Manish Jaggi June 9, 2017, 10:02 a.m. UTC | #4
HI Julien,

On 6/9/2017 2:53 PM, Julien Grall wrote:
>
>
> On 09/06/2017 08:13, Manish Jaggi wrote:
>> On 6/8/2017 6:39 PM, Julien Grall wrote:
>>> Hi Manish,
>>>
>> Hi Julien,
>
> Hello,
>
>>> On 08/06/17 13:38, Manish Jaggi wrote:
>>>>
>>>
>>> Spurious line.
>>>
>>>> This patch disables the smmu node in IORT table for hardware domain.
>>>> Also patches the output_base of pci_rc id_array with output_base of
>>>> smmu node id_array.
>>>
>>> I would have appreciated a bit more description in the commit message
>>> to explain your logic.
>>>
>> I will add it.
>>
>>>>
>>>> Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
>>>> ---
>>>>  xen/arch/arm/domain_build.c | 142
>>>> +++++++++++++++++++++++++++++++++++++++++++-
>>>
>>> domain_build.c is starting to be really big. I think it is time to
>>> move some acpi bits outside domain_build.c.
>>>
>> You are right, I also thought that
>> How about 3 files
>> domain_build.c
>> acpi_domain_build.c
>> dt_domain_build.c
>
> If you want to split the current code, then fine. But it is not 
> strictly mandatory for this code. What I want is adding new code in 
> separate files. But in this case they should be named:
>
> domain_build.c
> acpi/domain_build.c
> dt/domain_build.c
>
> This would keep the ACPI and DT firmware code separated and not 
> polluting the arch/arm.
I will follow this structure.
>
>>>>  xen/include/acpi/actbl2.h   |   3 +-
>>>>  xen/include/asm-arm/acpi.h  |   1 +
>>>>  3 files changed, 144 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>>> index d6d6c94..9f41d0e 100644
>>>> --- a/xen/arch/arm/domain_build.c
>>>> +++ b/xen/arch/arm/domain_build.c
>>>> @@ -32,6 +32,7 @@ integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
>>>>  int dom0_11_mapping = 1;
>>>>
>>>>  static u64 __initdata dom0_mem;
>>>> +static u8 *iort_base_ptr;
>>>
>>> Looking at the code, I don't see any reason to have this global.
>> If you look a bit closer this is used at multiple places
>> see fixup_pcirc_node, hide_smmu_iort.
>
> My point stands... you could have passed iort_base_ptr as an extra 
> parameter of the functions. Or even use kinfo.
>
> Anyway, at the moment I don't see any reason to have this global 
> variable.
>
ok, I will pass it as a parameter.
>>>
>>>>
>>>>  static void __init parse_dom0_mem(const char *s)
>>>>  {
>>>> @@ -1336,6 +1337,96 @@ static int prepare_dtb(struct domain *d, struct
>>>> kernel_info *kinfo)
>>>>  #ifdef CONFIG_ACPI
>>>>  #define ACPI_DOM0_FDT_MIN_SIZE 4096
>>>>
>>>> +static void patch_output_ref(struct acpi_iort_id_mapping *pci_idmap,
>>>> +                      struct acpi_iort_node *smmu_node)
>>>> +{
>>>> +    struct acpi_iort_id_mapping *idmap = NULL;
>>>> +    int i;
>>>
>>> Newline.
>> Sure.
>>>
>>>> +    for (i=0; i < smmu_node->mapping_count; i++) {
>>>
>>> Please respect Xen coding style... I expect you to fix *all* the place
>>> in the next version.
>>>
>>> Also, there is a latent lack of comments within the patch to explain
>>> the logic.
>>>
>> I will add detail comments.
>>>> +        if(!idmap)
>>>> +            idmap = (struct acpi_iort_id_mapping*)((u8*)smmu_node
>>>> +                                          + 
>>>> smmu_node->mapping_offset);
>>>> +        else
>>>> +            idmap++;
>>>> +
>>>> +        if (pci_idmap->output_base == idmap->input_base) {
>>>> +            pci_idmap->output_base = idmap->output_base;
>>>> +            pci_idmap->output_reference = idmap->output_reference;
>>>
>>> As I pointed out on the previous thread, you assume that one PCI ID
>>> mapping will end up to be translated to one Device ID mapping and not
>>> split across multiple one. For instance:
>>>
>> The  assumption is based on the ACPI tables on two platforms ThunderX
>> and ThunderX2.
>> While the spec does not deny it but would there be a use case as such
>> where a PCI node id array would split the
>> range into the same smmu.
>
> May I remind you that the goal of Xen is to run on *all* the current 
> and future platforms. If the spec says it is allowed, then we should 
> do it unless there is a strong reason not to do it.
>
>>
>>> RC A
>>>  // doesn't use SMMU 0 so just outputs DeviceIDs to ITS GROUP 0
>>>  // Input ID --> Output reference: Output ID
>>> 0x0000-0xffff --> ITS GROUP 0 : 0x0000->0xffff
>>>
>> This is not relevant as this code wont touch RC A.
>
> Can you avoid to dismiss any example that don't fit your solution? 
> This is not helpful.
Sure. I will add more description in that case.
>
> Describing the RC is relevant in my example to show a case that your 
> solution will not handle.
I will add my rationale here. Hiding smmu from IORT table would require 
setting device ID in the pci_rc id_array for RID and output reference as 
ITS group.
For the RC idarray elements which don't have an output reference as smmu 
but a ITS group, there is no need to touch them.
Based on this rationale I said this is not relevant.
>
>>> SMMU 0
>>> // Note that range of StreamIDs that map to DeviceIDs excludes
>>> // the NIC 0 DeviceID as it does not generate MSIs
>>>  // Input ID --> Output reference: Output ID
>>> 0x0000-0x01ff --> ITS GROUP 0 : 0x10000->0x101ff
>>> 0x0200-0xffff --> ITS GROUP 0 : 0x20000->0x207ff
>>>
>> It can be from 2 different RC's and not from same RC.
>
> It is not my point in this example. My point is same RC with split 
> DeviceID mapping.
ok, I will add that as well.
The current code parses all entries in pci_rc id_array and patches 
output reference and output_base.
The only assumption was on Number of ids in pci_rc id_array element and 
the matching smmu id_array element be same.
I can remove the assumption, will that be ok?

So,One ID Mapping element (Input_base, num_ids, out_base) can translate 
into two or more id_array entries of smmu node.

>
>>> // SMMU 0 Control interrupt is MSI based
>>>  // Input ID --> Output reference: Output ID
>>> N/A --> ITS GROUP 0 : 0x200001
>>>
>>> I still don't see anything in the spec preventing that. And I would
>>> like clarification from your side before going forward. *hint* The
>>> spec should be quoted *hint*
>>>
>> Spec does not prevent that, but we need to see IMHO what all cases are
>> practically possible and current platforms support it.
>
> See above.
>
>> Is there any platform which supports that ? I can add code for the
>> combinations but how I will test it.
>
> The only thing I can tell you is the spec allows it and your 
> suggestion would have to be fully rewritten if someone decide to not 
> follow your assumptions.
>
See above
> On the previous thread "xen/arm: Hiding SMMUs from Dom0 when using 
> ACPI on Xen", I made 2 suggestions which, I believe, is spec-proof:
>
> 1) Resolve all the RID (or platform device ID) to a DeviceID one by 
> one and generating the a new IORT for DOM0 with that
> 2) Generating new DeviceID mapping for each RID mapping
>
> Solution 1 would be the easiest to do and could be tested on any 
> platform as the algo would be based on the IORT parsing.
>
> So I don't see why we should have a limiting solution at the moment.
>
> Regards,
>
Julien Grall June 9, 2017, 10:43 a.m. UTC | #5
On 09/06/17 11:02, Manish Jaggi wrote:
>>
>>>> SMMU 0
>>>> // Note that range of StreamIDs that map to DeviceIDs excludes
>>>> // the NIC 0 DeviceID as it does not generate MSIs
>>>>  // Input ID --> Output reference: Output ID
>>>> 0x0000-0x01ff --> ITS GROUP 0 : 0x10000->0x101ff
>>>> 0x0200-0xffff --> ITS GROUP 0 : 0x20000->0x207ff
>>>>
>>> It can be from 2 different RC's and not from same RC.
>>
>> It is not my point in this example. My point is same RC with split
>> DeviceID mapping.
> ok, I will add that as well.
> The current code parses all entries in pci_rc id_array and patches
> output reference and output_base.
> The only assumption was on Number of ids in pci_rc id_array element and
> the matching smmu id_array element be same.
> I can remove the assumption, will that be ok?

The only thing I care is that you don't have *any* assumption of how the 
IORT is designed.

>
> So,One ID Mapping element (Input_base, num_ids, out_base) can translate
> into two or more id_array entries of smmu node.

Which is basically solution 1) below... Anyway, I will wait the next 
version hoping that you will handle properly the IORT generation.

Cheers,
diff mbox

Patch

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index d6d6c94..9f41d0e 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -32,6 +32,7 @@  integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
  int dom0_11_mapping = 1;

  static u64 __initdata dom0_mem;
+static u8 *iort_base_ptr;

  static void __init parse_dom0_mem(const char *s)
  {
@@ -1336,6 +1337,96 @@  static int prepare_dtb(struct domain *d, struct 
kernel_info *kinfo)
  #ifdef CONFIG_ACPI
  #define ACPI_DOM0_FDT_MIN_SIZE 4096

+static void patch_output_ref(struct acpi_iort_id_mapping *pci_idmap,
+                      struct acpi_iort_node *smmu_node)
+{
+    struct acpi_iort_id_mapping *idmap = NULL;
+    int i;
+    for (i=0; i < smmu_node->mapping_count; i++) {
+        if(!idmap)
+            idmap = (struct acpi_iort_id_mapping*)((u8*)smmu_node
+                                          + smmu_node->mapping_offset);
+        else
+            idmap++;
+
+        if (pci_idmap->output_base == idmap->input_base) {
+            pci_idmap->output_base = idmap->output_base;
+            pci_idmap->output_reference = idmap->output_reference;
+        }
+    }
+}
+
+static void fixup_pcirc_node(struct acpi_iort_node *node)
+{
+    struct acpi_iort_id_mapping *idmap = NULL;
+    struct acpi_iort_node *onode;
+    int i=0;
+
+    for (i=0; i < node->mapping_count; i++) {
+        if(!idmap)
+            idmap = (struct acpi_iort_id_mapping*)((u8*)node +
+                                          + node->mapping_offset);
+        else
+            idmap++;
+
+        onode = (struct acpi_iort_node*)(iort_base_ptr +
+ idmap->output_reference);
+        switch (onode->type)
+    {
+    case ACPI_IORT_NODE_ITS_GROUP:
+            continue;
+    case ACPI_IORT_NODE_SMMU:
+    case ACPI_IORT_NODE_SMMU_V3:
+         patch_output_ref(idmap, onode);
+        break;
+        }
+    }
+}
+
+static int hide_smmu_iort(void)
+{
+    u32 i;
+    u32 node_offset = 0;
+    struct acpi_table_iort *iort_table;
+    struct acpi_iort_node *node = NULL;
+
+    iort_table = (struct acpi_table_iort *)iort_base_ptr;
+
+    for (i=0; i < iort_table->node_count; i++) {
+        if (!node){
+            node = (struct acpi_iort_node *)(iort_base_ptr +
+                                 iort_table->node_offset);
+            node_offset =  iort_table->node_offset;
+        } else {
+            node = (struct acpi_iort_node *)(iort_base_ptr +
+                                 node_offset);
+        }
+
+        node_offset +=  node->length;
+        if (node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX)
+            fixup_pcirc_node(node);
+    }
+
+    node_offset = 0;
+    node = NULL;
+    for (i=0; i < iort_table->node_count; i++) {
+        if (!node){
+            node = (struct acpi_iort_node *)(iort_base_ptr +
+                                 iort_table->node_offset);
+            node_offset =  iort_table->node_offset;
+        } else {
+            node = (struct acpi_iort_node *)(iort_base_ptr +
+                                 node_offset);
+        }
+        node_offset +=  node->length;
+        if ((node->type == ACPI_IORT_NODE_SMMU) ||
+                 (node->type == ACPI_IORT_NODE_SMMU_V3))
+            node->type = ACPI_IORT_NODE_RESERVED;
+    }
+
+    return 0;
+}
+
  static int acpi_iomem_deny_access(struct domain *d)
  {
      acpi_status status;
@@ -1348,7 +1439,12 @@  static int acpi_iomem_deny_access(struct domain *d)
      if ( rc )
          return rc;

-    /* TODO: Deny MMIO access for SMMU, GIC ITS */
+    /* Hide SMMU from IORT */
+    rc = hide_smmu_iort();
+    if (rc)
+        return rc;
+
+    /* Deny MMIO access for GIC ITS */
      status = acpi_get_table(ACPI_SIG_SPCR, 0,
                              (struct acpi_table_header **)&spcr);

@@ -1646,6 +1742,8 @@  static int acpi_create_xsdt(struct domain *d, 
struct membank tbl_add[])
                             ACPI_SIG_FADT, tbl_add[TBL_FADT].start);
      acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
                             ACPI_SIG_MADT, tbl_add[TBL_MADT].start);
+    acpi_xsdt_modify_entry(xsdt->table_offset_entry, entry_count,
+                           ACPI_SIG_IORT, tbl_add[TBL_IORT].start);
      xsdt->table_offset_entry[entry_count] = tbl_add[TBL_STAO].start;

      xsdt->header.length = table_size;
@@ -1794,11 +1892,23 @@  static int estimate_acpi_efi_size(struct domain 
*d, struct kernel_info *kinfo)
  {
      size_t efi_size, acpi_size, madt_size;
      u64 addr;
+    acpi_status status;
      struct acpi_table_rsdp *rsdp_tbl;
      struct acpi_table_header *table;
+    struct acpi_table_header *iort_table;

      efi_size = estimate_efi_size(kinfo->mem.nr_banks);

+    status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table);
+
+    if ( ACPI_FAILURE(status) )
+    {
+        const char *msg = acpi_format_exception(status);
+
+        printk("Failed to get IORT table, %s\n", msg);
+        return -EINVAL;
+    }
+
      acpi_size = ROUNDUP(sizeof(struct acpi_table_fadt), 8);
      acpi_size += ROUNDUP(sizeof(struct acpi_table_stao), 8);

@@ -1839,6 +1949,8 @@  static int estimate_acpi_efi_size(struct domain 
*d, struct kernel_info *kinfo)
      acpi_size += ROUNDUP(table->length + sizeof(u64), 8);
      acpi_os_unmap_memory(table, sizeof(struct acpi_table_header));

+    /* Add size of iort */
+    acpi_size += iort_table->length;
      acpi_size += ROUNDUP(sizeof(struct acpi_table_rsdp), 8);
      d->arch.efi_acpi_len = PAGE_ALIGN(ROUNDUP(efi_size, 8)
                                        + ROUNDUP(acpi_size, 8));
@@ -1846,6 +1958,30 @@  static int estimate_acpi_efi_size(struct domain 
*d, struct kernel_info *kinfo)
      return 0;
  }

+static int acpi_create_iort(struct domain *d, struct membank tbl_add[])
+{
+    struct acpi_table_header *table;
+    acpi_status status;
+
+    status = acpi_get_table(ACPI_SIG_IORT, 0,
+                            (struct acpi_table_header **)&table);
+    if ( ACPI_FAILURE(status) )
+    {
+        printk("Failed to get IORT table\n");
+        return -EINVAL;
+    }
+
+    iort_base_ptr = d->arch.efi_acpi_table
+               + acpi_get_table_offset(tbl_add, TBL_IORT);
+    ACPI_MEMCPY(iort_base_ptr, table, table->length);
+
+    tbl_add[TBL_IORT].start = d->arch.efi_acpi_gpa
+                              + acpi_get_table_offset(tbl_add, TBL_IORT);
+    tbl_add[TBL_IORT].size = table->length;
+
+    return 0;
+}
+
  static int prepare_acpi(struct domain *d, struct kernel_info *kinfo)
  {
      int rc = 0;
@@ -1889,6 +2025,10 @@  static int prepare_acpi(struct domain *d, struct 
kernel_info *kinfo)
      if ( rc != 0 )
          return rc;

+    rc = acpi_create_iort(d, tbl_add);
+    if ( rc != 0 )
+        return rc;
+
      rc = acpi_create_xsdt(d, tbl_add);
      if ( rc != 0 )
          return rc;
diff --git a/xen/include/acpi/actbl2.h b/xen/include/acpi/actbl2.h
index 42beac4..f180ea5 100644
--- a/xen/include/acpi/actbl2.h
+++ b/xen/include/acpi/actbl2.h
@@ -591,7 +591,8 @@  enum acpi_iort_node_type {
      ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
      ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
      ACPI_IORT_NODE_SMMU = 0x03,
-    ACPI_IORT_NODE_SMMU_V3 = 0x04
+    ACPI_IORT_NODE_SMMU_V3 = 0x04,
+    ACPI_IORT_NODE_RESERVED = 0xff
  };

  struct acpi_iort_id_mapping {
diff --git a/xen/include/asm-arm/acpi.h b/xen/include/asm-arm/acpi.h
index 9f954d3..1cc0167 100644
--- a/xen/include/asm-arm/acpi.h
+++ b/xen/include/asm-arm/acpi.h
@@ -36,6 +36,7 @@  typedef enum {
      TBL_FADT,
      TBL_MADT,
      TBL_STAO,
+    TBL_IORT,
      TBL_XSDT,
      TBL_RSDP,
      TBL_EFIT,