diff mbox

[v3,03/09] iommu/ipmmu-vmsa: Enable multi context support

Message ID 148897091345.16106.2935423334300744039.sendpatchset@little-apple (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show

Commit Message

Magnus Damm March 8, 2017, 11:01 a.m. UTC
From: Magnus Damm <damm+renesas@opensource.se>

Add support for up to 8 contexts. Each context is mapped to one
domain. One domain is assigned one or more slave devices. Contexts
are allocated dynamically and slave devices are grouped together
based on which IPMMU device they are connected to. This makes slave
devices tied to the same IPMMU device share the same IOVA space.

Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
---

 Changes since V2:
 - Updated patch description to reflect code included in:
   [PATCH v7 00/07] iommu/ipmmu-vmsa: IPMMU multi-arch update V7

 Changes since V1:
 - Support up to 8 contexts instead of 4
 - Use feature flag and runtime handling
 - Default to single context

 drivers/iommu/ipmmu-vmsa.c |   38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

Comments

Robin Murphy March 8, 2017, 12:21 p.m. UTC | #1
On 08/03/17 11:01, Magnus Damm wrote:
> From: Magnus Damm <damm+renesas@opensource.se>
> 
> Add support for up to 8 contexts. Each context is mapped to one
> domain. One domain is assigned one or more slave devices. Contexts
> are allocated dynamically and slave devices are grouped together
> based on which IPMMU device they are connected to. This makes slave
> devices tied to the same IPMMU device share the same IOVA space.
> 
> Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
> ---
> 
>  Changes since V2:
>  - Updated patch description to reflect code included in:
>    [PATCH v7 00/07] iommu/ipmmu-vmsa: IPMMU multi-arch update V7
> 
>  Changes since V1:
>  - Support up to 8 contexts instead of 4
>  - Use feature flag and runtime handling
>  - Default to single context
> 
>  drivers/iommu/ipmmu-vmsa.c |   38 ++++++++++++++++++++++++++++++--------
>  1 file changed, 30 insertions(+), 8 deletions(-)
> 
> --- 0012/drivers/iommu/ipmmu-vmsa.c
> +++ work/drivers/iommu/ipmmu-vmsa.c	2017-03-08 17:59:19.900607110 +0900
> @@ -30,11 +30,12 @@
>  
>  #include "io-pgtable.h"
>  
> -#define IPMMU_CTX_MAX 1
> +#define IPMMU_CTX_MAX 8
>  
>  struct ipmmu_features {
>  	bool use_ns_alias_offset;
>  	bool has_cache_leaf_nodes;
> +	bool has_eight_ctx;

Wouldn't it be more sensible to just encode a number of contexts
directly, if it isn't reported by the hardware itself? I'm just
imagining future hardware generations... :P

bool also_has_another_eight_ctx_on_top_of_that;
bool wait_no_this_is_the_one_where_ctx_15_isnt_usable;

>  };
>  
>  struct ipmmu_vmsa_device {
> @@ -44,6 +45,7 @@ struct ipmmu_vmsa_device {
>  	const struct ipmmu_features *features;
>  	bool is_leaf;
>  	unsigned int num_utlbs;
> +	unsigned int num_ctx;
>  	spinlock_t lock;			/* Protects ctx and domains[] */
>  	DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
>  	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
> @@ -376,11 +378,12 @@ static int ipmmu_domain_allocate_context
>  
>  	spin_lock_irqsave(&mmu->lock, flags);
>  
> -	ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
> -	if (ret != IPMMU_CTX_MAX) {
> +	ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
> +	if (ret != mmu->num_ctx) {
>  		mmu->domains[ret] = domain;
>  		set_bit(ret, mmu->ctx);

Using test_and_set_bit() in a loop would avoid having to take a lock here.

> -	}
> +	} else
> +		ret = -EBUSY;
>  
>  	spin_unlock_irqrestore(&mmu->lock, flags);
>  
> @@ -425,9 +428,9 @@ static int ipmmu_domain_init_context(str
>  	 * Find an unused context.
>  	 */
>  	ret = ipmmu_domain_allocate_context(domain->root, domain);
> -	if (ret == IPMMU_CTX_MAX) {
> +	if (ret < 0) {
>  		free_io_pgtable_ops(domain->iop);
> -		return -EBUSY;
> +		return ret;
>  	}
>  
>  	domain->context_id = ret;
> @@ -562,7 +565,7 @@ static irqreturn_t ipmmu_irq(int irq, vo
>  	/*
>  	 * Check interrupts for all active contexts.
>  	 */
> -	for (i = 0; i < IPMMU_CTX_MAX; i++) {
> +	for (i = 0; i < mmu->num_ctx; i++) {
>  		if (!mmu->domains[i])
>  			continue;
>  		if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
> @@ -632,6 +635,13 @@ static int ipmmu_attach_device(struct io
>  		domain->mmu = mmu;
>  		domain->root = root;
>  		ret = ipmmu_domain_init_context(domain);
> +		if (ret < 0) {
> +			dev_err(dev, "Unable to initialize IPMMU context\n");
> +			domain->mmu = NULL;
> +		} else {
> +			dev_info(dev, "Using IPMMU context %u\n",
> +				 domain->context_id);
> +		}
>  	} else if (domain->mmu != mmu) {
>  		/*
>  		 * Something is wrong, we can't attach two devices using
> @@ -1047,13 +1057,14 @@ static void ipmmu_device_reset(struct ip
>  	unsigned int i;
>  
>  	/* Disable all contexts. */
> -	for (i = 0; i < 4; ++i)
> +	for (i = 0; i < mmu->num_ctx; ++i)
>  		ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
>  }
>  
>  static const struct ipmmu_features ipmmu_features_default = {
>  	.use_ns_alias_offset = true,
>  	.has_cache_leaf_nodes = false,
> +	.has_eight_ctx = false,
>  };
>  
>  static const struct of_device_id ipmmu_of_ids[] = {
> @@ -1112,6 +1123,17 @@ static int ipmmu_probe(struct platform_d
>  	if (mmu->features->use_ns_alias_offset)
>  		mmu->base += IM_NS_ALIAS_OFFSET;
>  
> +	/*
> +	 * The number of contexts varies with generation and instance.
> +	 * Newer SoCs get a total of 8 contexts enabled, older ones just one.
> +	 */
> +	if (mmu->features->has_eight_ctx)
> +		mmu->num_ctx = 8;
> +	else
> +		mmu->num_ctx = 1;
> +
> +	WARN_ON(mmu->num_ctx > IPMMU_CTX_MAX);

The likelihood of that happening doesn't appear to warrant a runtime
check. Especially one which probably isn't even generated because it's
trivially resolvable to "if (false)..." at compile time.

Robin.

> +
>  	irq = platform_get_irq(pdev, 0);
>  
>  	/*
>
Magnus Damm March 9, 2017, 4:16 a.m. UTC | #2
Hi Robin,

Thanks for your feedback!

On Wed, Mar 8, 2017 at 9:21 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> On 08/03/17 11:01, Magnus Damm wrote:
>> From: Magnus Damm <damm+renesas@opensource.se>
>>
>> Add support for up to 8 contexts. Each context is mapped to one
>> domain. One domain is assigned one or more slave devices. Contexts
>> are allocated dynamically and slave devices are grouped together
>> based on which IPMMU device they are connected to. This makes slave
>> devices tied to the same IPMMU device share the same IOVA space.
>>
>> Signed-off-by: Magnus Damm <damm+renesas@opensource.se>
>> ---
>>
>>  Changes since V2:
>>  - Updated patch description to reflect code included in:
>>    [PATCH v7 00/07] iommu/ipmmu-vmsa: IPMMU multi-arch update V7
>>
>>  Changes since V1:
>>  - Support up to 8 contexts instead of 4
>>  - Use feature flag and runtime handling
>>  - Default to single context
>>
>>  drivers/iommu/ipmmu-vmsa.c |   38 ++++++++++++++++++++++++++++++--------
>>  1 file changed, 30 insertions(+), 8 deletions(-)
>>
>> --- 0012/drivers/iommu/ipmmu-vmsa.c
>> +++ work/drivers/iommu/ipmmu-vmsa.c   2017-03-08 17:59:19.900607110 +0900
>> @@ -30,11 +30,12 @@
>>
>>  #include "io-pgtable.h"
>>
>> -#define IPMMU_CTX_MAX 1
>> +#define IPMMU_CTX_MAX 8
>>
>>  struct ipmmu_features {
>>       bool use_ns_alias_offset;
>>       bool has_cache_leaf_nodes;
>> +     bool has_eight_ctx;
>
> Wouldn't it be more sensible to just encode a number of contexts
> directly, if it isn't reported by the hardware itself? I'm just
> imagining future hardware generations... :P
>
> bool also_has_another_eight_ctx_on_top_of_that;
> bool wait_no_this_is_the_one_where_ctx_15_isnt_usable;

=)

Sure, I agree with you!

Please note that this is currently a mix of software and hardware
policy. On R-Car Gen2 (ARM32) the legacy code only uses a single
context for now but 4 contexts are supported by hardware according to
the data sheet. The remaining 3 contexts are untested at this point.
For R-Car Gen3 (ARM64) the hardware supports 8 contexts and this patch
enables all of them.

>>  };
>>
>>  struct ipmmu_vmsa_device {
>> @@ -44,6 +45,7 @@ struct ipmmu_vmsa_device {
>>       const struct ipmmu_features *features;
>>       bool is_leaf;
>>       unsigned int num_utlbs;
>> +     unsigned int num_ctx;
>>       spinlock_t lock;                        /* Protects ctx and domains[] */
>>       DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
>>       struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
>> @@ -376,11 +378,12 @@ static int ipmmu_domain_allocate_context
>>
>>       spin_lock_irqsave(&mmu->lock, flags);
>>
>> -     ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
>> -     if (ret != IPMMU_CTX_MAX) {
>> +     ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
>> +     if (ret != mmu->num_ctx) {
>>               mmu->domains[ret] = domain;
>>               set_bit(ret, mmu->ctx);
>
> Using test_and_set_bit() in a loop would avoid having to take a lock here.

So you mean that in case of test_and_set_bit() returns 1 then we try
find_first_zero_bit() again?

This is not really a performance sensitive part of the driver, so I'm
currently optimizing for code readability. I'm of course all for
dropping the lock, but I have a hard time figuring out how your
suggestion could result in semi-readable code. Any pointers? =)

>> @@ -1112,6 +1123,17 @@ static int ipmmu_probe(struct platform_d
>>       if (mmu->features->use_ns_alias_offset)
>>               mmu->base += IM_NS_ALIAS_OFFSET;
>>
>> +     /*
>> +      * The number of contexts varies with generation and instance.
>> +      * Newer SoCs get a total of 8 contexts enabled, older ones just one.
>> +      */
>> +     if (mmu->features->has_eight_ctx)
>> +             mmu->num_ctx = 8;
>> +     else
>> +             mmu->num_ctx = 1;
>> +
>> +     WARN_ON(mmu->num_ctx > IPMMU_CTX_MAX);
>
> The likelihood of that happening doesn't appear to warrant a runtime
> check. Especially one which probably isn't even generated because it's
> trivially resolvable to "if (false)..." at compile time.

Sure, I agree. Will drop.

Thanks,

/ magnus
diff mbox

Patch

--- 0012/drivers/iommu/ipmmu-vmsa.c
+++ work/drivers/iommu/ipmmu-vmsa.c	2017-03-08 17:59:19.900607110 +0900
@@ -30,11 +30,12 @@ 
 
 #include "io-pgtable.h"
 
-#define IPMMU_CTX_MAX 1
+#define IPMMU_CTX_MAX 8
 
 struct ipmmu_features {
 	bool use_ns_alias_offset;
 	bool has_cache_leaf_nodes;
+	bool has_eight_ctx;
 };
 
 struct ipmmu_vmsa_device {
@@ -44,6 +45,7 @@  struct ipmmu_vmsa_device {
 	const struct ipmmu_features *features;
 	bool is_leaf;
 	unsigned int num_utlbs;
+	unsigned int num_ctx;
 	spinlock_t lock;			/* Protects ctx and domains[] */
 	DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
 	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
@@ -376,11 +378,12 @@  static int ipmmu_domain_allocate_context
 
 	spin_lock_irqsave(&mmu->lock, flags);
 
-	ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
-	if (ret != IPMMU_CTX_MAX) {
+	ret = find_first_zero_bit(mmu->ctx, mmu->num_ctx);
+	if (ret != mmu->num_ctx) {
 		mmu->domains[ret] = domain;
 		set_bit(ret, mmu->ctx);
-	}
+	} else
+		ret = -EBUSY;
 
 	spin_unlock_irqrestore(&mmu->lock, flags);
 
@@ -425,9 +428,9 @@  static int ipmmu_domain_init_context(str
 	 * Find an unused context.
 	 */
 	ret = ipmmu_domain_allocate_context(domain->root, domain);
-	if (ret == IPMMU_CTX_MAX) {
+	if (ret < 0) {
 		free_io_pgtable_ops(domain->iop);
-		return -EBUSY;
+		return ret;
 	}
 
 	domain->context_id = ret;
@@ -562,7 +565,7 @@  static irqreturn_t ipmmu_irq(int irq, vo
 	/*
 	 * Check interrupts for all active contexts.
 	 */
-	for (i = 0; i < IPMMU_CTX_MAX; i++) {
+	for (i = 0; i < mmu->num_ctx; i++) {
 		if (!mmu->domains[i])
 			continue;
 		if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
@@ -632,6 +635,13 @@  static int ipmmu_attach_device(struct io
 		domain->mmu = mmu;
 		domain->root = root;
 		ret = ipmmu_domain_init_context(domain);
+		if (ret < 0) {
+			dev_err(dev, "Unable to initialize IPMMU context\n");
+			domain->mmu = NULL;
+		} else {
+			dev_info(dev, "Using IPMMU context %u\n",
+				 domain->context_id);
+		}
 	} else if (domain->mmu != mmu) {
 		/*
 		 * Something is wrong, we can't attach two devices using
@@ -1047,13 +1057,14 @@  static void ipmmu_device_reset(struct ip
 	unsigned int i;
 
 	/* Disable all contexts. */
-	for (i = 0; i < 4; ++i)
+	for (i = 0; i < mmu->num_ctx; ++i)
 		ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
 }
 
 static const struct ipmmu_features ipmmu_features_default = {
 	.use_ns_alias_offset = true,
 	.has_cache_leaf_nodes = false,
+	.has_eight_ctx = false,
 };
 
 static const struct of_device_id ipmmu_of_ids[] = {
@@ -1112,6 +1123,17 @@  static int ipmmu_probe(struct platform_d
 	if (mmu->features->use_ns_alias_offset)
 		mmu->base += IM_NS_ALIAS_OFFSET;
 
+	/*
+	 * The number of contexts varies with generation and instance.
+	 * Newer SoCs get a total of 8 contexts enabled, older ones just one.
+	 */
+	if (mmu->features->has_eight_ctx)
+		mmu->num_ctx = 8;
+	else
+		mmu->num_ctx = 1;
+
+	WARN_ON(mmu->num_ctx > IPMMU_CTX_MAX);
+
 	irq = platform_get_irq(pdev, 0);
 
 	/*