diff mbox

[kernel,v5,4/6] vfio/spapr: Postpone default window creation

Message ID 1478867537-27893-5-git-send-email-aik@ozlabs.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy Nov. 11, 2016, 12:32 p.m. UTC
As mentioned in the previous patch, we are going to allow the userspace
to configure container in one memory context and pass container fd to
another so we are postponing memory allocations accounted against
the locked memory limit. The previous patch took care of it_userspace.

At the moment we create the default DMA window when the first group is
attached to a container; this is done for the userspace which is not
DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
pre-registration - such client expects the default DMA window to exist.

This postpones the default DMA window allocation till first map/unmap
request happens.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 51 deletions(-)

Comments

David Gibson Nov. 22, 2016, 2:50 a.m. UTC | #1
On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote:
> As mentioned in the previous patch, we are going to allow the userspace
> to configure container in one memory context and pass container fd to
> another so we are postponing memory allocations accounted against
> the locked memory limit. The previous patch took care of it_userspace.
> 
> At the moment we create the default DMA window when the first group is
> attached to a container; this is done for the userspace which is not
> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
> pre-registration - such client expects the default DMA window to exist.
> 
> This postpones the default DMA window allocation till first map/unmap
> request happens.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
>  1 file changed, 47 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 442baac..1c02498 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -97,6 +97,7 @@ struct tce_container {
>  	struct mutex lock;
>  	bool enabled;
>  	bool v2;
> +	bool def_window_pending;
>  	unsigned long locked_pages;
>  	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
>  	struct list_head group_list;
> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container,
>  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
>  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
>  
> -	if (!ret && container->v2) {
> -		ret = tce_iommu_userspace_view_alloc(*ptbl);
> -		if (ret)
> -			(*ptbl)->it_ops->free(*ptbl);
> -	}

Does this stuff for the user view belong in the previous patch?

> -
> -	if (ret)
> -		decrement_locked_vm(table_size >> PAGE_SHIFT);
> -
>  	return ret;
>  }
>  
> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container,
>  	return 0;
>  }
>  
> +static long tce_iommu_create_default_window(struct tce_container *container)
> +{
> +	long ret;
> +	__u64 start_addr = 0;
> +	struct tce_iommu_group *tcegrp;
> +	struct iommu_table_group *table_group;
> +
> +	if (!tce_groups_attached(container))
> +		return -ENODEV;
> +
> +	tcegrp = list_first_entry(&container->group_list,
> +			struct tce_iommu_group, next);
> +	table_group = iommu_group_get_iommudata(tcegrp->grp);
> +	if (!table_group)
> +		return -ENODEV;
> +
> +	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
> +			table_group->tce32_size, 1, &start_addr);
> +	WARN_ON_ONCE(!ret && start_addr);
> +
> +	return ret;
> +}
> +
>  static long tce_iommu_ioctl(void *iommu_data,
>  				 unsigned int cmd, unsigned long arg)
>  {
> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>  				VFIO_DMA_MAP_FLAG_WRITE))
>  			return -EINVAL;
>  
> +		if (container->def_window_pending) {
> +			ret = tce_iommu_create_default_window(container);
> +			if (ret)
> +				return ret;
> +			container->def_window_pending = false;

Would it make sense to clear (and maybe test) def_window_pending
within create_default_window()?

> +		}
> +
>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>  		if (num < 0)
>  			return -ENXIO;
> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>  		if (param.flags)
>  			return -EINVAL;
>  
> +		if (container->def_window_pending) {
> +			ret = tce_iommu_create_default_window(container);
> +			if (ret)
> +				return ret;
> +			container->def_window_pending = false;
> +		}
> +
>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>  		if (num < 0)
>  			return -ENXIO;
> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data,
>  
>  		mutex_lock(&container->lock);
>  
> +		container->def_window_pending = false;

Uh.. why is it cleared here, without calling
tce_iommu_create_default_window() AFAICT?

>  		ret = tce_iommu_create_window(container, create.page_shift,
>  				create.window_size, create.levels,
>  				&create.start_addr);
> @@ -1030,6 +1061,12 @@ static long tce_iommu_ioctl(void *iommu_data,
>  		if (remove.flags)
>  			return -EINVAL;
>  
> +		if (container->def_window_pending && !remove.start_addr) {
> +			container->def_window_pending = false;
> +			return 0;
> +		}
> +		container->def_window_pending = false;
> +
>  		mutex_lock(&container->lock);
>  
>  		ret = tce_iommu_remove_window(container, remove.start_addr);
> @@ -1109,9 +1146,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
>  static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>  		struct iommu_table_group *table_group)
>  {
> -	long i, ret = 0;
> -	struct iommu_table *tbl = NULL;
> -
>  	if (!table_group->ops->create_table || !table_group->ops->set_window ||
>  			!table_group->ops->release_ownership) {
>  		WARN_ON_ONCE(1);
> @@ -1120,47 +1154,9 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>  
>  	table_group->ops->take_ownership(table_group);
>  
> -	/*
> -	 * If it the first group attached, check if there is
> -	 * a default DMA window and create one if none as
> -	 * the userspace expects it to exist.
> -	 */
> -	if (!tce_groups_attached(container) && !container->tables[0]) {
> -		ret = tce_iommu_create_table(container,
> -				table_group,
> -				0, /* window number */
> -				IOMMU_PAGE_SHIFT_4K,
> -				table_group->tce32_size,
> -				1, /* default levels */
> -				&tbl);
> -		if (ret)
> -			goto release_exit;
> -		else
> -			container->tables[0] = tbl;
> -	}
> -
> -	/* Set all windows to the new group */
> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
> -		tbl = container->tables[i];
> -
> -		if (!tbl)
> -			continue;
> -
> -		/* Set the default window to a new group */
> -		ret = table_group->ops->set_window(table_group, i, tbl);

Uh... nothing in the new code seems to replace these set_window (and
unset_window) calls.  What's up with that?

> -		if (ret)
> -			goto release_exit;
> -	}
> +	container->def_window_pending = true;
>  
>  	return 0;
> -
> -release_exit:
> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
> -		table_group->ops->unset_window(table_group, i);
> -
> -	table_group->ops->release_ownership(table_group);
> -
> -	return ret;
>  }
>  
>  static int tce_iommu_attach_group(void *iommu_data,
Alexey Kardashevskiy Nov. 22, 2016, 7:29 a.m. UTC | #2
On 22/11/16 13:50, David Gibson wrote:
> On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote:
>> As mentioned in the previous patch, we are going to allow the userspace
>> to configure container in one memory context and pass container fd to
>> another so we are postponing memory allocations accounted against
>> the locked memory limit. The previous patch took care of it_userspace.
>>
>> At the moment we create the default DMA window when the first group is
>> attached to a container; this is done for the userspace which is not
>> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
>> pre-registration - such client expects the default DMA window to exist.
>>
>> This postpones the default DMA window allocation till first map/unmap
>> request happens.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>  drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
>>  1 file changed, 47 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
>> index 442baac..1c02498 100644
>> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
>> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
>> @@ -97,6 +97,7 @@ struct tce_container {
>>  	struct mutex lock;
>>  	bool enabled;
>>  	bool v2;
>> +	bool def_window_pending;
>>  	unsigned long locked_pages;
>>  	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
>>  	struct list_head group_list;
>> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container,
>>  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
>>  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
>>  
>> -	if (!ret && container->v2) {
>> -		ret = tce_iommu_userspace_view_alloc(*ptbl);
>> -		if (ret)
>> -			(*ptbl)->it_ops->free(*ptbl);
>> -	}
> 
> Does this stuff for the user view belong in the previous patch?

Yes it does, my mistake, will fix.


> 
>> -
>> -	if (ret)
>> -		decrement_locked_vm(table_size >> PAGE_SHIFT);
>> -
>>  	return ret;
>>  }
>>  
>> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container,
>>  	return 0;
>>  }
>>  
>> +static long tce_iommu_create_default_window(struct tce_container *container)
>> +{
>> +	long ret;
>> +	__u64 start_addr = 0;
>> +	struct tce_iommu_group *tcegrp;
>> +	struct iommu_table_group *table_group;
>> +
>> +	if (!tce_groups_attached(container))
>> +		return -ENODEV;
>> +
>> +	tcegrp = list_first_entry(&container->group_list,
>> +			struct tce_iommu_group, next);
>> +	table_group = iommu_group_get_iommudata(tcegrp->grp);
>> +	if (!table_group)
>> +		return -ENODEV;
>> +
>> +	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
>> +			table_group->tce32_size, 1, &start_addr);
>> +	WARN_ON_ONCE(!ret && start_addr);
>> +
>> +	return ret;
>> +}
>> +
>>  static long tce_iommu_ioctl(void *iommu_data,
>>  				 unsigned int cmd, unsigned long arg)
>>  {
>> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>>  				VFIO_DMA_MAP_FLAG_WRITE))
>>  			return -EINVAL;
>>  
>> +		if (container->def_window_pending) {
>> +			ret = tce_iommu_create_default_window(container);
>> +			if (ret)
>> +				return ret;
>> +			container->def_window_pending = false;
> 
> Would it make sense to clear (and maybe test) def_window_pending
> within create_default_window()?

Dunno, matter of taste I suppose. I'll move it there.


> 
>> +		}
>> +
>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>>  		if (num < 0)
>>  			return -ENXIO;
>> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>>  		if (param.flags)
>>  			return -EINVAL;
>>  
>> +		if (container->def_window_pending) {
>> +			ret = tce_iommu_create_default_window(container);
>> +			if (ret)
>> +				return ret;
>> +			container->def_window_pending = false;
>> +		}
>> +
>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>>  		if (num < 0)
>>  			return -ENXIO;
>> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data,
>>  
>>  		mutex_lock(&container->lock);
>>  
>> +		container->def_window_pending = false;
> 
> Uh.. why is it cleared here, without calling
> tce_iommu_create_default_window() AFAICT?


It is a branch which creates new window, if we do not have a default
window, then it will be created as the result of this ioctl(), if there is
a default window, then the flag should be false already.




> 
>>  		ret = tce_iommu_create_window(container, create.page_shift,
>>  				create.window_size, create.levels,
>>  				&create.start_addr);
>> @@ -1030,6 +1061,12 @@ static long tce_iommu_ioctl(void *iommu_data,
>>  		if (remove.flags)
>>  			return -EINVAL;
>>  
>> +		if (container->def_window_pending && !remove.start_addr) {
>> +			container->def_window_pending = false;
>> +			return 0;
>> +		}
>> +		container->def_window_pending = false;
>> +
>>  		mutex_lock(&container->lock);
>>  
>>  		ret = tce_iommu_remove_window(container, remove.start_addr);
>> @@ -1109,9 +1146,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
>>  static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>>  		struct iommu_table_group *table_group)
>>  {
>> -	long i, ret = 0;
>> -	struct iommu_table *tbl = NULL;
>> -
>>  	if (!table_group->ops->create_table || !table_group->ops->set_window ||
>>  			!table_group->ops->release_ownership) {
>>  		WARN_ON_ONCE(1);
>> @@ -1120,47 +1154,9 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>>  
>>  	table_group->ops->take_ownership(table_group);
>>  
>> -	/*
>> -	 * If it the first group attached, check if there is
>> -	 * a default DMA window and create one if none as
>> -	 * the userspace expects it to exist.
>> -	 */
>> -	if (!tce_groups_attached(container) && !container->tables[0]) {
>> -		ret = tce_iommu_create_table(container,
>> -				table_group,
>> -				0, /* window number */
>> -				IOMMU_PAGE_SHIFT_4K,
>> -				table_group->tce32_size,
>> -				1, /* default levels */
>> -				&tbl);
>> -		if (ret)
>> -			goto release_exit;
>> -		else
>> -			container->tables[0] = tbl;
>> -	}
>> -
>> -	/* Set all windows to the new group */
>> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
>> -		tbl = container->tables[i];
>> -
>> -		if (!tbl)
>> -			continue;
>> -
>> -		/* Set the default window to a new group */
>> -		ret = table_group->ops->set_window(table_group, i, tbl);
> 
> Uh... nothing in the new code seems to replace these set_window (and
> unset_window) calls.  What's up with that?


tce_iommu_create_table() + set_window() is replaced with
tce_iommu_create_default_window() which calls tce_iommu_create_window()
which calls tce_iommu_create_table() + set_window().

I'll split this patch into 2 patches, one with the change I just explained
and one to postpone the default window creation.


> 
>> -		if (ret)
>> -			goto release_exit;
>> -	}
>> +	container->def_window_pending = true;
>>  
>>  	return 0;
>> -
>> -release_exit:
>> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
>> -		table_group->ops->unset_window(table_group, i);
>> -
>> -	table_group->ops->release_ownership(table_group);
>> -
>> -	return ret;
>>  }
>>  
>>  static int tce_iommu_attach_group(void *iommu_data,
>
David Gibson Nov. 23, 2016, 1:35 a.m. UTC | #3
On Tue, Nov 22, 2016 at 06:29:39PM +1100, Alexey Kardashevskiy wrote:
> On 22/11/16 13:50, David Gibson wrote:
> > On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote:
> >> As mentioned in the previous patch, we are going to allow the userspace
> >> to configure container in one memory context and pass container fd to
> >> another so we are postponing memory allocations accounted against
> >> the locked memory limit. The previous patch took care of it_userspace.
> >>
> >> At the moment we create the default DMA window when the first group is
> >> attached to a container; this is done for the userspace which is not
> >> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
> >> pre-registration - such client expects the default DMA window to exist.
> >>
> >> This postpones the default DMA window allocation till first map/unmap
> >> request happens.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >> ---
> >>  drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
> >>  1 file changed, 47 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> >> index 442baac..1c02498 100644
> >> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> >> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> >> @@ -97,6 +97,7 @@ struct tce_container {
> >>  	struct mutex lock;
> >>  	bool enabled;
> >>  	bool v2;
> >> +	bool def_window_pending;
> >>  	unsigned long locked_pages;
> >>  	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> >>  	struct list_head group_list;
> >> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container,
> >>  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
> >>  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
> >>  
> >> -	if (!ret && container->v2) {
> >> -		ret = tce_iommu_userspace_view_alloc(*ptbl);
> >> -		if (ret)
> >> -			(*ptbl)->it_ops->free(*ptbl);
> >> -	}
> > 
> > Does this stuff for the user view belong in the previous patch?
> 
> Yes it does, my mistake, will fix.
> 
> 
> > 
> >> -
> >> -	if (ret)
> >> -		decrement_locked_vm(table_size >> PAGE_SHIFT);
> >> -
> >>  	return ret;
> >>  }
> >>  
> >> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container,
> >>  	return 0;
> >>  }
> >>  
> >> +static long tce_iommu_create_default_window(struct tce_container *container)
> >> +{
> >> +	long ret;
> >> +	__u64 start_addr = 0;
> >> +	struct tce_iommu_group *tcegrp;
> >> +	struct iommu_table_group *table_group;
> >> +
> >> +	if (!tce_groups_attached(container))
> >> +		return -ENODEV;
> >> +
> >> +	tcegrp = list_first_entry(&container->group_list,
> >> +			struct tce_iommu_group, next);
> >> +	table_group = iommu_group_get_iommudata(tcegrp->grp);
> >> +	if (!table_group)
> >> +		return -ENODEV;
> >> +
> >> +	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
> >> +			table_group->tce32_size, 1, &start_addr);
> >> +	WARN_ON_ONCE(!ret && start_addr);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >>  static long tce_iommu_ioctl(void *iommu_data,
> >>  				 unsigned int cmd, unsigned long arg)
> >>  {
> >> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>  				VFIO_DMA_MAP_FLAG_WRITE))
> >>  			return -EINVAL;
> >>  
> >> +		if (container->def_window_pending) {
> >> +			ret = tce_iommu_create_default_window(container);
> >> +			if (ret)
> >> +				return ret;
> >> +			container->def_window_pending = false;
> > 
> > Would it make sense to clear (and maybe test) def_window_pending
> > within create_default_window()?
> 
> Dunno, matter of taste I suppose. I'll move it there.
> 
> 
> > 
> >> +		}
> >> +
> >>  		num = tce_iommu_find_table(container, param.iova, &tbl);
> >>  		if (num < 0)
> >>  			return -ENXIO;
> >> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>  		if (param.flags)
> >>  			return -EINVAL;
> >>  
> >> +		if (container->def_window_pending) {
> >> +			ret = tce_iommu_create_default_window(container);
> >> +			if (ret)
> >> +				return ret;
> >> +			container->def_window_pending = false;
> >> +		}
> >> +
> >>  		num = tce_iommu_find_table(container, param.iova, &tbl);
> >>  		if (num < 0)
> >>  			return -ENXIO;
> >> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>  
> >>  		mutex_lock(&container->lock);
> >>  
> >> +		container->def_window_pending = false;
> > 
> > Uh.. why is it cleared here, without calling
> > tce_iommu_create_default_window() AFAICT?
> 
> 
> It is a branch which creates new window, if we do not have a default
> window, then it will be created as the result of this ioctl(), if there is
> a default window, then the flag should be false already.

Um.. it will create *a* window, but not necessarily the default one.
Consider this scenario:

1. Container is opened
2. A group is attached
3. Userspace, expecting the default window to be in place, creates a
   second window
4. Mapping starts

Won't the above code mean that we create what userspace expected to be
the second window as the first window replacing the default one
instead?

> 
> 
> 
> 
> > 
> >>  		ret = tce_iommu_create_window(container, create.page_shift,
> >>  				create.window_size, create.levels,
> >>  				&create.start_addr);
> >> @@ -1030,6 +1061,12 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>  		if (remove.flags)
> >>  			return -EINVAL;
> >>  
> >> +		if (container->def_window_pending && !remove.start_addr) {
> >> +			container->def_window_pending = false;
> >> +			return 0;
> >> +		}
> >> +		container->def_window_pending = false;
> >> +
> >>  		mutex_lock(&container->lock);
> >>  
> >>  		ret = tce_iommu_remove_window(container, remove.start_addr);
> >> @@ -1109,9 +1146,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
> >>  static long tce_iommu_take_ownership_ddw(struct tce_container *container,
> >>  		struct iommu_table_group *table_group)
> >>  {
> >> -	long i, ret = 0;
> >> -	struct iommu_table *tbl = NULL;
> >> -
> >>  	if (!table_group->ops->create_table || !table_group->ops->set_window ||
> >>  			!table_group->ops->release_ownership) {
> >>  		WARN_ON_ONCE(1);
> >> @@ -1120,47 +1154,9 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
> >>  
> >>  	table_group->ops->take_ownership(table_group);
> >>  
> >> -	/*
> >> -	 * If it the first group attached, check if there is
> >> -	 * a default DMA window and create one if none as
> >> -	 * the userspace expects it to exist.
> >> -	 */
> >> -	if (!tce_groups_attached(container) && !container->tables[0]) {
> >> -		ret = tce_iommu_create_table(container,
> >> -				table_group,
> >> -				0, /* window number */
> >> -				IOMMU_PAGE_SHIFT_4K,
> >> -				table_group->tce32_size,
> >> -				1, /* default levels */
> >> -				&tbl);
> >> -		if (ret)
> >> -			goto release_exit;
> >> -		else
> >> -			container->tables[0] = tbl;
> >> -	}
> >> -
> >> -	/* Set all windows to the new group */
> >> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
> >> -		tbl = container->tables[i];
> >> -
> >> -		if (!tbl)
> >> -			continue;
> >> -
> >> -		/* Set the default window to a new group */
> >> -		ret = table_group->ops->set_window(table_group, i, tbl);
> > 
> > Uh... nothing in the new code seems to replace these set_window (and
> > unset_window) calls.  What's up with that?
> 
> 
> tce_iommu_create_table() + set_window() is replaced with
> tce_iommu_create_default_window() which calls tce_iommu_create_window()
> which calls tce_iommu_create_table() + set_window().
> 
> I'll split this patch into 2 patches, one with the change I just explained
> and one to postpone the default window creation.

Ah, right, I see.


> 
> 
> > 
> >> -		if (ret)
> >> -			goto release_exit;
> >> -	}
> >> +	container->def_window_pending = true;
> >>  
> >>  	return 0;
> >> -
> >> -release_exit:
> >> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
> >> -		table_group->ops->unset_window(table_group, i);
> >> -
> >> -	table_group->ops->release_ownership(table_group);
> >> -
> >> -	return ret;
> >>  }
> >>  
> >>  static int tce_iommu_attach_group(void *iommu_data,
> > 
> 
>
Alexey Kardashevskiy Nov. 23, 2016, 5:06 a.m. UTC | #4
On 23/11/16 12:35, David Gibson wrote:
> On Tue, Nov 22, 2016 at 06:29:39PM +1100, Alexey Kardashevskiy wrote:
>> On 22/11/16 13:50, David Gibson wrote:
>>> On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote:
>>>> As mentioned in the previous patch, we are going to allow the userspace
>>>> to configure container in one memory context and pass container fd to
>>>> another so we are postponing memory allocations accounted against
>>>> the locked memory limit. The previous patch took care of it_userspace.
>>>>
>>>> At the moment we create the default DMA window when the first group is
>>>> attached to a container; this is done for the userspace which is not
>>>> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
>>>> pre-registration - such client expects the default DMA window to exist.
>>>>
>>>> This postpones the default DMA window allocation till first map/unmap
>>>> request happens.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> ---
>>>>  drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
>>>>  1 file changed, 47 insertions(+), 51 deletions(-)
>>>>
>>>> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
>>>> index 442baac..1c02498 100644
>>>> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
>>>> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
>>>> @@ -97,6 +97,7 @@ struct tce_container {
>>>>  	struct mutex lock;
>>>>  	bool enabled;
>>>>  	bool v2;
>>>> +	bool def_window_pending;
>>>>  	unsigned long locked_pages;
>>>>  	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
>>>>  	struct list_head group_list;
>>>> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container,
>>>>  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
>>>>  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
>>>>  
>>>> -	if (!ret && container->v2) {
>>>> -		ret = tce_iommu_userspace_view_alloc(*ptbl);
>>>> -		if (ret)
>>>> -			(*ptbl)->it_ops->free(*ptbl);
>>>> -	}
>>>
>>> Does this stuff for the user view belong in the previous patch?
>>
>> Yes it does, my mistake, will fix.
>>
>>
>>>
>>>> -
>>>> -	if (ret)
>>>> -		decrement_locked_vm(table_size >> PAGE_SHIFT);
>>>> -
>>>>  	return ret;
>>>>  }
>>>>  
>>>> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container,
>>>>  	return 0;
>>>>  }
>>>>  
>>>> +static long tce_iommu_create_default_window(struct tce_container *container)
>>>> +{
>>>> +	long ret;
>>>> +	__u64 start_addr = 0;
>>>> +	struct tce_iommu_group *tcegrp;
>>>> +	struct iommu_table_group *table_group;
>>>> +
>>>> +	if (!tce_groups_attached(container))
>>>> +		return -ENODEV;
>>>> +
>>>> +	tcegrp = list_first_entry(&container->group_list,
>>>> +			struct tce_iommu_group, next);
>>>> +	table_group = iommu_group_get_iommudata(tcegrp->grp);
>>>> +	if (!table_group)
>>>> +		return -ENODEV;
>>>> +
>>>> +	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
>>>> +			table_group->tce32_size, 1, &start_addr);
>>>> +	WARN_ON_ONCE(!ret && start_addr);
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>>  static long tce_iommu_ioctl(void *iommu_data,
>>>>  				 unsigned int cmd, unsigned long arg)
>>>>  {
>>>> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>>>>  				VFIO_DMA_MAP_FLAG_WRITE))
>>>>  			return -EINVAL;
>>>>  
>>>> +		if (container->def_window_pending) {
>>>> +			ret = tce_iommu_create_default_window(container);
>>>> +			if (ret)
>>>> +				return ret;
>>>> +			container->def_window_pending = false;
>>>
>>> Would it make sense to clear (and maybe test) def_window_pending
>>> within create_default_window()?
>>
>> Dunno, matter of taste I suppose. I'll move it there.
>>
>>
>>>
>>>> +		}
>>>> +
>>>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>>>>  		if (num < 0)
>>>>  			return -ENXIO;
>>>> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data,
>>>>  		if (param.flags)
>>>>  			return -EINVAL;
>>>>  
>>>> +		if (container->def_window_pending) {
>>>> +			ret = tce_iommu_create_default_window(container);
>>>> +			if (ret)
>>>> +				return ret;
>>>> +			container->def_window_pending = false;
>>>> +		}
>>>> +
>>>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
>>>>  		if (num < 0)
>>>>  			return -ENXIO;
>>>> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data,
>>>>  
>>>>  		mutex_lock(&container->lock);
>>>>  
>>>> +		container->def_window_pending = false;
>>>
>>> Uh.. why is it cleared here, without calling
>>> tce_iommu_create_default_window() AFAICT?
>>
>>
>> It is a branch which creates new window, if we do not have a default
>> window, then it will be created as the result of this ioctl(), if there is
>> a default window, then the flag should be false already.
> 
> Um.. it will create *a* window, but not necessarily the default one.
> Consider this scenario:
> 
> 1. Container is opened
> 2. A group is attached
> 3. Userspace, expecting the default window to be in place, creates a
>    second window
> 4. Mapping starts
> 
> Won't the above code mean that we create what userspace expected to be
> the second window as the first window replacing the default one
> instead?

Uff. Yes, this is correct.

This is a bug in my initial design - I should not have created a "default"
window in the first place - it does not make sense at this level; and the
userspace (i.e. QEMU) removes all windows on reset anyway.

The only way QEMU will use this default window is when it is at 318f67ce1
"vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)" - memory
preregistration is there, but ddw is not yet but it is 2 patches later
where vfio_connect_container() unconditionally removes the default window.
Which is unlikely scenario.

Taking into account small number of DDW users (basically - QEMU v2.7.0),
does it still make sense to keep this default window code? The only
immediate problem I see here is QEMU will receive error from
vfio_spapr_remove_window() in vfio_connect_container() as there will be no
default window but I could workaround this.



> 
>>
>>
>>
>>
>>>
>>>>  		ret = tce_iommu_create_window(container, create.page_shift,
>>>>  				create.window_size, create.levels,
>>>>  				&create.start_addr);
>>>> @@ -1030,6 +1061,12 @@ static long tce_iommu_ioctl(void *iommu_data,
>>>>  		if (remove.flags)
>>>>  			return -EINVAL;
>>>>  
>>>> +		if (container->def_window_pending && !remove.start_addr) {
>>>> +			container->def_window_pending = false;
>>>> +			return 0;
>>>> +		}
>>>> +		container->def_window_pending = false;
>>>> +
>>>>  		mutex_lock(&container->lock);
>>>>  
>>>>  		ret = tce_iommu_remove_window(container, remove.start_addr);
>>>> @@ -1109,9 +1146,6 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
>>>>  static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>>>>  		struct iommu_table_group *table_group)
>>>>  {
>>>> -	long i, ret = 0;
>>>> -	struct iommu_table *tbl = NULL;
>>>> -
>>>>  	if (!table_group->ops->create_table || !table_group->ops->set_window ||
>>>>  			!table_group->ops->release_ownership) {
>>>>  		WARN_ON_ONCE(1);
>>>> @@ -1120,47 +1154,9 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
>>>>  
>>>>  	table_group->ops->take_ownership(table_group);
>>>>  
>>>> -	/*
>>>> -	 * If it the first group attached, check if there is
>>>> -	 * a default DMA window and create one if none as
>>>> -	 * the userspace expects it to exist.
>>>> -	 */
>>>> -	if (!tce_groups_attached(container) && !container->tables[0]) {
>>>> -		ret = tce_iommu_create_table(container,
>>>> -				table_group,
>>>> -				0, /* window number */
>>>> -				IOMMU_PAGE_SHIFT_4K,
>>>> -				table_group->tce32_size,
>>>> -				1, /* default levels */
>>>> -				&tbl);
>>>> -		if (ret)
>>>> -			goto release_exit;
>>>> -		else
>>>> -			container->tables[0] = tbl;
>>>> -	}
>>>> -
>>>> -	/* Set all windows to the new group */
>>>> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
>>>> -		tbl = container->tables[i];
>>>> -
>>>> -		if (!tbl)
>>>> -			continue;
>>>> -
>>>> -		/* Set the default window to a new group */
>>>> -		ret = table_group->ops->set_window(table_group, i, tbl);
>>>
>>> Uh... nothing in the new code seems to replace these set_window (and
>>> unset_window) calls.  What's up with that?
>>
>>
>> tce_iommu_create_table() + set_window() is replaced with
>> tce_iommu_create_default_window() which calls tce_iommu_create_window()
>> which calls tce_iommu_create_table() + set_window().
>>
>> I'll split this patch into 2 patches, one with the change I just explained
>> and one to postpone the default window creation.
> 
> Ah, right, I see.
> 
> 
>>
>>
>>>
>>>> -		if (ret)
>>>> -			goto release_exit;
>>>> -	}
>>>> +	container->def_window_pending = true;
>>>>  
>>>>  	return 0;
>>>> -
>>>> -release_exit:
>>>> -	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
>>>> -		table_group->ops->unset_window(table_group, i);
>>>> -
>>>> -	table_group->ops->release_ownership(table_group);
>>>> -
>>>> -	return ret;
>>>>  }
>>>>  
>>>>  static int tce_iommu_attach_group(void *iommu_data,
>>>
>>
>>
> 
> 
> 
>
David Gibson Nov. 24, 2016, 4:08 a.m. UTC | #5
On Wed, Nov 23, 2016 at 04:06:30PM +1100, Alexey Kardashevskiy wrote:
> On 23/11/16 12:35, David Gibson wrote:
> > On Tue, Nov 22, 2016 at 06:29:39PM +1100, Alexey Kardashevskiy wrote:
> >> On 22/11/16 13:50, David Gibson wrote:
> >>> On Fri, Nov 11, 2016 at 11:32:15PM +1100, Alexey Kardashevskiy wrote:
> >>>> As mentioned in the previous patch, we are going to allow the userspace
> >>>> to configure container in one memory context and pass container fd to
> >>>> another so we are postponing memory allocations accounted against
> >>>> the locked memory limit. The previous patch took care of it_userspace.
> >>>>
> >>>> At the moment we create the default DMA window when the first group is
> >>>> attached to a container; this is done for the userspace which is not
> >>>> DDW-aware but familiar with the SPAPR TCE IOMMU v2 in the part of memory
> >>>> pre-registration - such client expects the default DMA window to exist.
> >>>>
> >>>> This postpones the default DMA window allocation till first map/unmap
> >>>> request happens.
> >>>>
> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>> ---
> >>>>  drivers/vfio/vfio_iommu_spapr_tce.c | 98 ++++++++++++++++++-------------------
> >>>>  1 file changed, 47 insertions(+), 51 deletions(-)
> >>>>
> >>>> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
> >>>> index 442baac..1c02498 100644
> >>>> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> >>>> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> >>>> @@ -97,6 +97,7 @@ struct tce_container {
> >>>>  	struct mutex lock;
> >>>>  	bool enabled;
> >>>>  	bool v2;
> >>>> +	bool def_window_pending;
> >>>>  	unsigned long locked_pages;
> >>>>  	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> >>>>  	struct list_head group_list;
> >>>> @@ -594,15 +595,6 @@ static long tce_iommu_create_table(struct tce_container *container,
> >>>>  	WARN_ON(!ret && !(*ptbl)->it_ops->free);
> >>>>  	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
> >>>>  
> >>>> -	if (!ret && container->v2) {
> >>>> -		ret = tce_iommu_userspace_view_alloc(*ptbl);
> >>>> -		if (ret)
> >>>> -			(*ptbl)->it_ops->free(*ptbl);
> >>>> -	}
> >>>
> >>> Does this stuff for the user view belong in the previous patch?
> >>
> >> Yes it does, my mistake, will fix.
> >>
> >>
> >>>
> >>>> -
> >>>> -	if (ret)
> >>>> -		decrement_locked_vm(table_size >> PAGE_SHIFT);
> >>>> -
> >>>>  	return ret;
> >>>>  }
> >>>>  
> >>>> @@ -719,6 +711,29 @@ static long tce_iommu_remove_window(struct tce_container *container,
> >>>>  	return 0;
> >>>>  }
> >>>>  
> >>>> +static long tce_iommu_create_default_window(struct tce_container *container)
> >>>> +{
> >>>> +	long ret;
> >>>> +	__u64 start_addr = 0;
> >>>> +	struct tce_iommu_group *tcegrp;
> >>>> +	struct iommu_table_group *table_group;
> >>>> +
> >>>> +	if (!tce_groups_attached(container))
> >>>> +		return -ENODEV;
> >>>> +
> >>>> +	tcegrp = list_first_entry(&container->group_list,
> >>>> +			struct tce_iommu_group, next);
> >>>> +	table_group = iommu_group_get_iommudata(tcegrp->grp);
> >>>> +	if (!table_group)
> >>>> +		return -ENODEV;
> >>>> +
> >>>> +	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
> >>>> +			table_group->tce32_size, 1, &start_addr);
> >>>> +	WARN_ON_ONCE(!ret && start_addr);
> >>>> +
> >>>> +	return ret;
> >>>> +}
> >>>> +
> >>>>  static long tce_iommu_ioctl(void *iommu_data,
> >>>>  				 unsigned int cmd, unsigned long arg)
> >>>>  {
> >>>> @@ -809,6 +824,13 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>>>  				VFIO_DMA_MAP_FLAG_WRITE))
> >>>>  			return -EINVAL;
> >>>>  
> >>>> +		if (container->def_window_pending) {
> >>>> +			ret = tce_iommu_create_default_window(container);
> >>>> +			if (ret)
> >>>> +				return ret;
> >>>> +			container->def_window_pending = false;
> >>>
> >>> Would it make sense to clear (and maybe test) def_window_pending
> >>> within create_default_window()?
> >>
> >> Dunno, matter of taste I suppose. I'll move it there.
> >>
> >>
> >>>
> >>>> +		}
> >>>> +
> >>>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
> >>>>  		if (num < 0)
> >>>>  			return -ENXIO;
> >>>> @@ -872,6 +894,13 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>>>  		if (param.flags)
> >>>>  			return -EINVAL;
> >>>>  
> >>>> +		if (container->def_window_pending) {
> >>>> +			ret = tce_iommu_create_default_window(container);
> >>>> +			if (ret)
> >>>> +				return ret;
> >>>> +			container->def_window_pending = false;
> >>>> +		}
> >>>> +
> >>>>  		num = tce_iommu_find_table(container, param.iova, &tbl);
> >>>>  		if (num < 0)
> >>>>  			return -ENXIO;
> >>>> @@ -998,6 +1027,8 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>>>  
> >>>>  		mutex_lock(&container->lock);
> >>>>  
> >>>> +		container->def_window_pending = false;
> >>>
> >>> Uh.. why is it cleared here, without calling
> >>> tce_iommu_create_default_window() AFAICT?
> >>
> >>
> >> It is a branch which creates new window, if we do not have a default
> >> window, then it will be created as the result of this ioctl(), if there is
> >> a default window, then the flag should be false already.
> > 
> > Um.. it will create *a* window, but not necessarily the default one.
> > Consider this scenario:
> > 
> > 1. Container is opened
> > 2. A group is attached
> > 3. Userspace, expecting the default window to be in place, creates a
> >    second window
> > 4. Mapping starts
> > 
> > Won't the above code mean that we create what userspace expected to be
> > the second window as the first window replacing the default one
> > instead?
> 
> Uff. Yes, this is correct.
> 
> This is a bug in my initial design - I should not have created a "default"
> window in the first place - it does not make sense at this level; and the
> userspace (i.e. QEMU) removes all windows on reset anyway.
> 
> The only way QEMU will use this default window is when it is at 318f67ce1
> "vfio: spapr: Add DMA memory preregistering (SPAPR IOMMU v2)" - memory
> preregistration is there, but ddw is not yet but it is 2 patches later
> where vfio_connect_container() unconditionally removes the default window.
> Which is unlikely scenario.
> 
> Taking into account small number of DDW users (basically - QEMU v2.7.0),
> does it still make sense to keep this default window code? The only
> immediate problem I see here is QEMU will receive error from
> vfio_spapr_remove_window() in vfio_connect_container() as there will be no
> default window but I could workaround this.

Hmm.. I dislike changing the semantics now.

Can't you just
	if (def_window_pending)
		create_default_window()

Before completing the CREATE_WINDOW ioctl().  That should handle it,
no?
diff mbox

Patch

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 442baac..1c02498 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -97,6 +97,7 @@  struct tce_container {
 	struct mutex lock;
 	bool enabled;
 	bool v2;
+	bool def_window_pending;
 	unsigned long locked_pages;
 	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 	struct list_head group_list;
@@ -594,15 +595,6 @@  static long tce_iommu_create_table(struct tce_container *container,
 	WARN_ON(!ret && !(*ptbl)->it_ops->free);
 	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
 
-	if (!ret && container->v2) {
-		ret = tce_iommu_userspace_view_alloc(*ptbl);
-		if (ret)
-			(*ptbl)->it_ops->free(*ptbl);
-	}
-
-	if (ret)
-		decrement_locked_vm(table_size >> PAGE_SHIFT);
-
 	return ret;
 }
 
@@ -719,6 +711,29 @@  static long tce_iommu_remove_window(struct tce_container *container,
 	return 0;
 }
 
+static long tce_iommu_create_default_window(struct tce_container *container)
+{
+	long ret;
+	__u64 start_addr = 0;
+	struct tce_iommu_group *tcegrp;
+	struct iommu_table_group *table_group;
+
+	if (!tce_groups_attached(container))
+		return -ENODEV;
+
+	tcegrp = list_first_entry(&container->group_list,
+			struct tce_iommu_group, next);
+	table_group = iommu_group_get_iommudata(tcegrp->grp);
+	if (!table_group)
+		return -ENODEV;
+
+	ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
+			table_group->tce32_size, 1, &start_addr);
+	WARN_ON_ONCE(!ret && start_addr);
+
+	return ret;
+}
+
 static long tce_iommu_ioctl(void *iommu_data,
 				 unsigned int cmd, unsigned long arg)
 {
@@ -809,6 +824,13 @@  static long tce_iommu_ioctl(void *iommu_data,
 				VFIO_DMA_MAP_FLAG_WRITE))
 			return -EINVAL;
 
+		if (container->def_window_pending) {
+			ret = tce_iommu_create_default_window(container);
+			if (ret)
+				return ret;
+			container->def_window_pending = false;
+		}
+
 		num = tce_iommu_find_table(container, param.iova, &tbl);
 		if (num < 0)
 			return -ENXIO;
@@ -872,6 +894,13 @@  static long tce_iommu_ioctl(void *iommu_data,
 		if (param.flags)
 			return -EINVAL;
 
+		if (container->def_window_pending) {
+			ret = tce_iommu_create_default_window(container);
+			if (ret)
+				return ret;
+			container->def_window_pending = false;
+		}
+
 		num = tce_iommu_find_table(container, param.iova, &tbl);
 		if (num < 0)
 			return -ENXIO;
@@ -998,6 +1027,8 @@  static long tce_iommu_ioctl(void *iommu_data,
 
 		mutex_lock(&container->lock);
 
+		container->def_window_pending = false;
+
 		ret = tce_iommu_create_window(container, create.page_shift,
 				create.window_size, create.levels,
 				&create.start_addr);
@@ -1030,6 +1061,12 @@  static long tce_iommu_ioctl(void *iommu_data,
 		if (remove.flags)
 			return -EINVAL;
 
+		if (container->def_window_pending && !remove.start_addr) {
+			container->def_window_pending = false;
+			return 0;
+		}
+		container->def_window_pending = false;
+
 		mutex_lock(&container->lock);
 
 		ret = tce_iommu_remove_window(container, remove.start_addr);
@@ -1109,9 +1146,6 @@  static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 		struct iommu_table_group *table_group)
 {
-	long i, ret = 0;
-	struct iommu_table *tbl = NULL;
-
 	if (!table_group->ops->create_table || !table_group->ops->set_window ||
 			!table_group->ops->release_ownership) {
 		WARN_ON_ONCE(1);
@@ -1120,47 +1154,9 @@  static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 
 	table_group->ops->take_ownership(table_group);
 
-	/*
-	 * If it the first group attached, check if there is
-	 * a default DMA window and create one if none as
-	 * the userspace expects it to exist.
-	 */
-	if (!tce_groups_attached(container) && !container->tables[0]) {
-		ret = tce_iommu_create_table(container,
-				table_group,
-				0, /* window number */
-				IOMMU_PAGE_SHIFT_4K,
-				table_group->tce32_size,
-				1, /* default levels */
-				&tbl);
-		if (ret)
-			goto release_exit;
-		else
-			container->tables[0] = tbl;
-	}
-
-	/* Set all windows to the new group */
-	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		tbl = container->tables[i];
-
-		if (!tbl)
-			continue;
-
-		/* Set the default window to a new group */
-		ret = table_group->ops->set_window(table_group, i, tbl);
-		if (ret)
-			goto release_exit;
-	}
+	container->def_window_pending = true;
 
 	return 0;
-
-release_exit:
-	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
-		table_group->ops->unset_window(table_group, i);
-
-	table_group->ops->release_ownership(table_group);
-
-	return ret;
 }
 
 static int tce_iommu_attach_group(void *iommu_data,