diff mbox

[Xen-devel,RFC,1/3] xen/balloon: Allow allocating DMA buffers

Message ID 20180517082604.14828-2-andr2000@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Oleksandr Andrushchenko May 17, 2018, 8:26 a.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
 drivers/xen/balloon.c     | 214 +++++++++++++++++++++++++++++++-------
 drivers/xen/xen-balloon.c |   2 +
 include/xen/balloon.h     |  11 +-
 3 files changed, 188 insertions(+), 39 deletions(-)

Comments

Boris Ostrovsky May 18, 2018, 10:04 p.m. UTC | #1
On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>


A commit message would be useful.


>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>
>  	for (i = 0; i < nr_pages; i++) {
> -		page = alloc_page(gfp);
> -		if (page == NULL) {
> -			nr_pages = i;
> -			state = BP_EAGAIN;
> -			break;
> +		if (ext_pages) {
> +			page = ext_pages[i];
> +		} else {
> +			page = alloc_page(gfp);
> +			if (page == NULL) {
> +				nr_pages = i;
> +				state = BP_EAGAIN;
> +				break;
> +			}
>  		}
>  		scrub_page(page);
>  		list_add(&page->lru, &pages);
> @@ -529,7 +565,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  	i = 0;
>  	list_for_each_entry_safe(page, tmp, &pages, lru) {
>  		/* XENMEM_decrease_reservation requires a GFN */
> -		frame_list[i++] = xen_page_to_gfn(page);
> +		frames[i++] = xen_page_to_gfn(page);
>  
>  #ifdef CONFIG_XEN_HAVE_PVMMU
>  		/*
> @@ -552,18 +588,22 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>  #endif
>  		list_del(&page->lru);
>  
> -		balloon_append(page);
> +		if (!ext_pages)
> +			balloon_append(page);


So what you are proposing is not really ballooning. You are just
piggybacking on existing interfaces, aren't you?

-boris
Oleksandr Andrushchenko May 21, 2018, 5:40 a.m. UTC | #2
On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>
> A commit message would be useful.
Sure, v1 will have it
>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>>   	for (i = 0; i < nr_pages; i++) {
>> -		page = alloc_page(gfp);
>> -		if (page == NULL) {
>> -			nr_pages = i;
>> -			state = BP_EAGAIN;
>> -			break;
>> +		if (ext_pages) {
>> +			page = ext_pages[i];
>> +		} else {
>> +			page = alloc_page(gfp);
>> +			if (page == NULL) {
>> +				nr_pages = i;
>> +				state = BP_EAGAIN;
>> +				break;
>> +			}
>>   		}
>>   		scrub_page(page);
>>   		list_add(&page->lru, &pages);
>> @@ -529,7 +565,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   	i = 0;
>>   	list_for_each_entry_safe(page, tmp, &pages, lru) {
>>   		/* XENMEM_decrease_reservation requires a GFN */
>> -		frame_list[i++] = xen_page_to_gfn(page);
>> +		frames[i++] = xen_page_to_gfn(page);
>>   
>>   #ifdef CONFIG_XEN_HAVE_PVMMU
>>   		/*
>> @@ -552,18 +588,22 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>   #endif
>>   		list_del(&page->lru);
>>   
>> -		balloon_append(page);
>> +		if (!ext_pages)
>> +			balloon_append(page);
>
> So what you are proposing is not really ballooning. You are just
> piggybacking on existing interfaces, aren't you?
Sort of. Basically I need to {increase|decrease}_reservation, not actually
allocating ballooned pages.
Do you think I can simply EXPORT_SYMBOL for {increase|decrease}_reservation?
Any other suggestion?
> -boris
>
>
Thank you,
Oleksandr
Boris Ostrovsky May 21, 2018, 4:35 p.m. UTC | #3
On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> A commit message would be useful.
> Sure, v1 will have it
>>
>>> Signed-off-by: Oleksandr Andrushchenko
>>> <oleksandr_andrushchenko@epam.com>
>>>
>>>       for (i = 0; i < nr_pages; i++) {
>>> -        page = alloc_page(gfp);
>>> -        if (page == NULL) {
>>> -            nr_pages = i;
>>> -            state = BP_EAGAIN;
>>> -            break;
>>> +        if (ext_pages) {
>>> +            page = ext_pages[i];
>>> +        } else {
>>> +            page = alloc_page(gfp);
>>> +            if (page == NULL) {
>>> +                nr_pages = i;
>>> +                state = BP_EAGAIN;
>>> +                break;
>>> +            }
>>>           }
>>>           scrub_page(page);
>>>           list_add(&page->lru, &pages);
>>> @@ -529,7 +565,7 @@ static enum bp_state
>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>       i = 0;
>>>       list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>           /* XENMEM_decrease_reservation requires a GFN */
>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>> +        frames[i++] = xen_page_to_gfn(page);
>>>     #ifdef CONFIG_XEN_HAVE_PVMMU
>>>           /*
>>> @@ -552,18 +588,22 @@ static enum bp_state
>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>   #endif
>>>           list_del(&page->lru);
>>>   -        balloon_append(page);
>>> +        if (!ext_pages)
>>> +            balloon_append(page);
>>
>> So what you are proposing is not really ballooning. You are just
>> piggybacking on existing interfaces, aren't you?
> Sort of. Basically I need to {increase|decrease}_reservation, not
> actually
> allocating ballooned pages.
> Do you think I can simply EXPORT_SYMBOL for
> {increase|decrease}_reservation?
> Any other suggestion?


I am actually wondering how much of that code you end up reusing. You
pretty much create new code paths in both routines and common code ends
up being essentially the hypercall. So the question is --- would it make
sense to do all of this separately from the balloon driver?


-boris
Oleksandr Andrushchenko May 21, 2018, 5:32 p.m. UTC | #4
On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>> A commit message would be useful.
>> Sure, v1 will have it
>>>> Signed-off-by: Oleksandr Andrushchenko
>>>> <oleksandr_andrushchenko@epam.com>
>>>>
>>>>        for (i = 0; i < nr_pages; i++) {
>>>> -        page = alloc_page(gfp);
>>>> -        if (page == NULL) {
>>>> -            nr_pages = i;
>>>> -            state = BP_EAGAIN;
>>>> -            break;
>>>> +        if (ext_pages) {
>>>> +            page = ext_pages[i];
>>>> +        } else {
>>>> +            page = alloc_page(gfp);
>>>> +            if (page == NULL) {
>>>> +                nr_pages = i;
>>>> +                state = BP_EAGAIN;
>>>> +                break;
>>>> +            }
>>>>            }
>>>>            scrub_page(page);
>>>>            list_add(&page->lru, &pages);
>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>        i = 0;
>>>>        list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>            /* XENMEM_decrease_reservation requires a GFN */
>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>      #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>            /*
>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>    #endif
>>>>            list_del(&page->lru);
>>>>    -        balloon_append(page);
>>>> +        if (!ext_pages)
>>>> +            balloon_append(page);
>>> So what you are proposing is not really ballooning. You are just
>>> piggybacking on existing interfaces, aren't you?
>> Sort of. Basically I need to {increase|decrease}_reservation, not
>> actually
>> allocating ballooned pages.
>> Do you think I can simply EXPORT_SYMBOL for
>> {increase|decrease}_reservation?
>> Any other suggestion?
>
> I am actually wondering how much of that code you end up reusing. You
> pretty much create new code paths in both routines and common code ends
> up being essentially the hypercall.
Well, I hoped that it would be easier to maintain if I modify existing code
to support both use-cases, but I am also ok to create new routines if this
seems to be reasonable - please let me know
>   So the question is --- would it make
> sense to do all of this separately from the balloon driver?
This can be done, but which driver will host this code then? If we move from
the balloon driver, then this could go to either gntdev or grant-table.
What's your preference?
>
> -boris
Thank you,
Oleksandr
Boris Ostrovsky May 21, 2018, 6:53 p.m. UTC | #5
On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>> A commit message would be useful.
>>> Sure, v1 will have it
>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>
>>>>>        for (i = 0; i < nr_pages; i++) {
>>>>> -        page = alloc_page(gfp);
>>>>> -        if (page == NULL) {
>>>>> -            nr_pages = i;
>>>>> -            state = BP_EAGAIN;
>>>>> -            break;
>>>>> +        if (ext_pages) {
>>>>> +            page = ext_pages[i];
>>>>> +        } else {
>>>>> +            page = alloc_page(gfp);
>>>>> +            if (page == NULL) {
>>>>> +                nr_pages = i;
>>>>> +                state = BP_EAGAIN;
>>>>> +                break;
>>>>> +            }
>>>>>            }
>>>>>            scrub_page(page);
>>>>>            list_add(&page->lru, &pages);
>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>        i = 0;
>>>>>        list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>            /* XENMEM_decrease_reservation requires a GFN */
>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>      #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>            /*
>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>    #endif
>>>>>            list_del(&page->lru);
>>>>>    -        balloon_append(page);
>>>>> +        if (!ext_pages)
>>>>> +            balloon_append(page);
>>>> So what you are proposing is not really ballooning. You are just
>>>> piggybacking on existing interfaces, aren't you?
>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>> actually
>>> allocating ballooned pages.
>>> Do you think I can simply EXPORT_SYMBOL for
>>> {increase|decrease}_reservation?
>>> Any other suggestion?
>>
>> I am actually wondering how much of that code you end up reusing. You
>> pretty much create new code paths in both routines and common code ends
>> up being essentially the hypercall.
> Well, I hoped that it would be easier to maintain if I modify existing
> code
> to support both use-cases, but I am also ok to create new routines if
> this
> seems to be reasonable - please let me know
>>   So the question is --- would it make
>> sense to do all of this separately from the balloon driver?
> This can be done, but which driver will host this code then? If we
> move from
> the balloon driver, then this could go to either gntdev or grant-table.
> What's your preference?

A separate module?

Is there any use for this feature outside of your zero-copy DRM driver?

-boris
Oleksandr Andrushchenko May 21, 2018, 7:13 p.m. UTC | #6
On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>> A commit message would be useful.
>>>> Sure, v1 will have it
>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>
>>>>>>         for (i = 0; i < nr_pages; i++) {
>>>>>> -        page = alloc_page(gfp);
>>>>>> -        if (page == NULL) {
>>>>>> -            nr_pages = i;
>>>>>> -            state = BP_EAGAIN;
>>>>>> -            break;
>>>>>> +        if (ext_pages) {
>>>>>> +            page = ext_pages[i];
>>>>>> +        } else {
>>>>>> +            page = alloc_page(gfp);
>>>>>> +            if (page == NULL) {
>>>>>> +                nr_pages = i;
>>>>>> +                state = BP_EAGAIN;
>>>>>> +                break;
>>>>>> +            }
>>>>>>             }
>>>>>>             scrub_page(page);
>>>>>>             list_add(&page->lru, &pages);
>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>         i = 0;
>>>>>>         list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>             /* XENMEM_decrease_reservation requires a GFN */
>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>       #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>             /*
>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>     #endif
>>>>>>             list_del(&page->lru);
>>>>>>     -        balloon_append(page);
>>>>>> +        if (!ext_pages)
>>>>>> +            balloon_append(page);
>>>>> So what you are proposing is not really ballooning. You are just
>>>>> piggybacking on existing interfaces, aren't you?
>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>> actually
>>>> allocating ballooned pages.
>>>> Do you think I can simply EXPORT_SYMBOL for
>>>> {increase|decrease}_reservation?
>>>> Any other suggestion?
>>> I am actually wondering how much of that code you end up reusing. You
>>> pretty much create new code paths in both routines and common code ends
>>> up being essentially the hypercall.
>> Well, I hoped that it would be easier to maintain if I modify existing
>> code
>> to support both use-cases, but I am also ok to create new routines if
>> this
>> seems to be reasonable - please let me know
>>>    So the question is --- would it make
>>> sense to do all of this separately from the balloon driver?
>> This can be done, but which driver will host this code then? If we
>> move from
>> the balloon driver, then this could go to either gntdev or grant-table.
>> What's your preference?
> A separate module?

> Is there any use for this feature outside of your zero-copy DRM driver?
Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.

At the time I tried to upstream zcopy driver it was discussed and 
decided that
it would be better if I remove all DRM specific code and move it to Xen 
drivers.
Thus, this RFC.

But it can also be implemented as a dedicated Xen dma-buf driver which 
will have all the
code from this RFC + a bit more (char/misc device handling at least).
This will also require a dedicated user-space library, just like 
libxengnttab.so
for gntdev (now I have all new IOCTLs covered there).

If the idea of a dedicated Xen dma-buf driver seems to be more attractive we
can work toward this solution. BTW, I do support this idea, but was not
sure if Xen community accepts yet another driver which duplicates quite 
some code
of the existing gntdev/balloon/grant-table. And now after this RFC I 
hope that all cons
and pros of both dedicated driver and gntdev/balloon/grant-table 
extension are
clearly seen and we can make a decision.

>
> -boris
Thank you,
Oleksandr
[1] https://lists.freedesktop.org/archives/dri-devel/2018-April/173163.html
Boris Ostrovsky May 21, 2018, 8:36 p.m. UTC | #7
On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>> A commit message would be useful.
>>>>> Sure, v1 will have it
>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>
>>>>>>>         for (i = 0; i < nr_pages; i++) {
>>>>>>> -        page = alloc_page(gfp);
>>>>>>> -        if (page == NULL) {
>>>>>>> -            nr_pages = i;
>>>>>>> -            state = BP_EAGAIN;
>>>>>>> -            break;
>>>>>>> +        if (ext_pages) {
>>>>>>> +            page = ext_pages[i];
>>>>>>> +        } else {
>>>>>>> +            page = alloc_page(gfp);
>>>>>>> +            if (page == NULL) {
>>>>>>> +                nr_pages = i;
>>>>>>> +                state = BP_EAGAIN;
>>>>>>> +                break;
>>>>>>> +            }
>>>>>>>             }
>>>>>>>             scrub_page(page);
>>>>>>>             list_add(&page->lru, &pages);
>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>         i = 0;
>>>>>>>         list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>             /* XENMEM_decrease_reservation requires a GFN */
>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>       #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>             /*
>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>     #endif
>>>>>>>             list_del(&page->lru);
>>>>>>>     -        balloon_append(page);
>>>>>>> +        if (!ext_pages)
>>>>>>> +            balloon_append(page);
>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>> piggybacking on existing interfaces, aren't you?
>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>> actually
>>>>> allocating ballooned pages.
>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>> {increase|decrease}_reservation?
>>>>> Any other suggestion?
>>>> I am actually wondering how much of that code you end up reusing. You
>>>> pretty much create new code paths in both routines and common code
>>>> ends
>>>> up being essentially the hypercall.
>>> Well, I hoped that it would be easier to maintain if I modify existing
>>> code
>>> to support both use-cases, but I am also ok to create new routines if
>>> this
>>> seems to be reasonable - please let me know
>>>>    So the question is --- would it make
>>>> sense to do all of this separately from the balloon driver?
>>> This can be done, but which driver will host this code then? If we
>>> move from
>>> the balloon driver, then this could go to either gntdev or grant-table.
>>> What's your preference?
>> A separate module?
>
>> Is there any use for this feature outside of your zero-copy DRM driver?
> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>
> At the time I tried to upstream zcopy driver it was discussed and
> decided that
> it would be better if I remove all DRM specific code and move it to
> Xen drivers.
> Thus, this RFC.
>
> But it can also be implemented as a dedicated Xen dma-buf driver which
> will have all the
> code from this RFC + a bit more (char/misc device handling at least).
> This will also require a dedicated user-space library, just like
> libxengnttab.so
> for gntdev (now I have all new IOCTLs covered there).
>
> If the idea of a dedicated Xen dma-buf driver seems to be more
> attractive we
> can work toward this solution. BTW, I do support this idea, but was not
> sure if Xen community accepts yet another driver which duplicates
> quite some code
> of the existing gntdev/balloon/grant-table. And now after this RFC I
> hope that all cons
> and pros of both dedicated driver and gntdev/balloon/grant-table
> extension are
> clearly seen and we can make a decision.


IIRC the objection for a separate module was in the context of gntdev
was discussion, because (among other things) people didn't want to have
yet another file in /dev/xen/

Here we are talking about (a new) balloon-like module which doesn't
create any new user-visible interfaces. And as for duplicating code ---
as I said, I am not convinced there is much of duplication.

I might even argue that we should add a new config option for this module.


-boris

>
>>
>> -boris
> Thank you,
> Oleksandr
> [1]
> https://lists.freedesktop.org/archives/dri-devel/2018-April/173163.html
Oleksandr Andrushchenko May 22, 2018, 5:55 a.m. UTC | #8
On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>>> A commit message would be useful.
>>>>>> Sure, v1 will have it
>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>
>>>>>>>>          for (i = 0; i < nr_pages; i++) {
>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>> -        if (page == NULL) {
>>>>>>>> -            nr_pages = i;
>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>> -            break;
>>>>>>>> +        if (ext_pages) {
>>>>>>>> +            page = ext_pages[i];
>>>>>>>> +        } else {
>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>> +            if (page == NULL) {
>>>>>>>> +                nr_pages = i;
>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>> +                break;
>>>>>>>> +            }
>>>>>>>>              }
>>>>>>>>              scrub_page(page);
>>>>>>>>              list_add(&page->lru, &pages);
>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>          i = 0;
>>>>>>>>          list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>              /* XENMEM_decrease_reservation requires a GFN */
>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>        #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>              /*
>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>      #endif
>>>>>>>>              list_del(&page->lru);
>>>>>>>>      -        balloon_append(page);
>>>>>>>> +        if (!ext_pages)
>>>>>>>> +            balloon_append(page);
>>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>>> actually
>>>>>> allocating ballooned pages.
>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>> {increase|decrease}_reservation?
>>>>>> Any other suggestion?
>>>>> I am actually wondering how much of that code you end up reusing. You
>>>>> pretty much create new code paths in both routines and common code
>>>>> ends
>>>>> up being essentially the hypercall.
>>>> Well, I hoped that it would be easier to maintain if I modify existing
>>>> code
>>>> to support both use-cases, but I am also ok to create new routines if
>>>> this
>>>> seems to be reasonable - please let me know
>>>>>     So the question is --- would it make
>>>>> sense to do all of this separately from the balloon driver?
>>>> This can be done, but which driver will host this code then? If we
>>>> move from
>>>> the balloon driver, then this could go to either gntdev or grant-table.
>>>> What's your preference?
>>> A separate module?
>>> Is there any use for this feature outside of your zero-copy DRM driver?
>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>
>> At the time I tried to upstream zcopy driver it was discussed and
>> decided that
>> it would be better if I remove all DRM specific code and move it to
>> Xen drivers.
>> Thus, this RFC.
>>
>> But it can also be implemented as a dedicated Xen dma-buf driver which
>> will have all the
>> code from this RFC + a bit more (char/misc device handling at least).
>> This will also require a dedicated user-space library, just like
>> libxengnttab.so
>> for gntdev (now I have all new IOCTLs covered there).
>>
>> If the idea of a dedicated Xen dma-buf driver seems to be more
>> attractive we
>> can work toward this solution. BTW, I do support this idea, but was not
>> sure if Xen community accepts yet another driver which duplicates
>> quite some code
>> of the existing gntdev/balloon/grant-table. And now after this RFC I
>> hope that all cons
>> and pros of both dedicated driver and gntdev/balloon/grant-table
>> extension are
>> clearly seen and we can make a decision.
>
> IIRC the objection for a separate module was in the context of gntdev
> was discussion, because (among other things) people didn't want to have
> yet another file in /dev/xen/
>
> Here we are talking about (a new) balloon-like module which doesn't
> create any new user-visible interfaces. And as for duplicating code ---
> as I said, I am not convinced there is much of duplication.
>
> I might even argue that we should add a new config option for this module.
I am not quite sure I am fully following you here: so, you suggest
that we have balloon.c unchanged, but instead create a new
module (namely a file under the same folder as balloon.c, e.g.
dma-buf-reservation.c) and move those {increase|decrease}_reservation
routines (specific to dma-buf) to that new file? And make it selectable
via Kconfig? If so, then how about the changes to grant-table and gntdev?
Those will look inconsistent then.

If you suggest a new kernel driver module:
IMO, there is nothing bad if we create a dedicated kernel module
(driver) for Xen dma-buf handling selectable under Kconfig option.
Yes, this will create a yet another device under /dev/xen,
but most people will never see it if we set Kconfig to default to "n".
And then we'll need user-space support for that, so Xen tools will
be extended with libxendmabuf.so or so.
This way all Xen dma-buf support can be localized at one place which
might be easier to maintain. What is more it could be totally transparent
to most of us as Kconfig option won't be set by default (both kernel and 
Xen).

Thank you,
Oleksandr
>
> -boris
>
>>> -boris
>> Thank you,
>> Oleksandr
>> [1]
>> https://lists.freedesktop.org/archives/dri-devel/2018-April/173163.html
Boris Ostrovsky May 22, 2018, 2:33 p.m. UTC | #9
On 05/22/2018 01:55 AM, Oleksandr Andrushchenko wrote:
> On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
>> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>>>> A commit message would be useful.
>>>>>>> Sure, v1 will have it
>>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>
>>>>>>>>>          for (i = 0; i < nr_pages; i++) {
>>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>>> -        if (page == NULL) {
>>>>>>>>> -            nr_pages = i;
>>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>>> -            break;
>>>>>>>>> +        if (ext_pages) {
>>>>>>>>> +            page = ext_pages[i];
>>>>>>>>> +        } else {
>>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>>> +            if (page == NULL) {
>>>>>>>>> +                nr_pages = i;
>>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>>> +                break;
>>>>>>>>> +            }
>>>>>>>>>              }
>>>>>>>>>              scrub_page(page);
>>>>>>>>>              list_add(&page->lru, &pages);
>>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>          i = 0;
>>>>>>>>>          list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>>              /* XENMEM_decrease_reservation requires a GFN */
>>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>>        #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>>              /*
>>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>      #endif
>>>>>>>>>              list_del(&page->lru);
>>>>>>>>>      -        balloon_append(page);
>>>>>>>>> +        if (!ext_pages)
>>>>>>>>> +            balloon_append(page);
>>>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>>>> actually
>>>>>>> allocating ballooned pages.
>>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>>> {increase|decrease}_reservation?
>>>>>>> Any other suggestion?
>>>>>> I am actually wondering how much of that code you end up reusing.
>>>>>> You
>>>>>> pretty much create new code paths in both routines and common code
>>>>>> ends
>>>>>> up being essentially the hypercall.
>>>>> Well, I hoped that it would be easier to maintain if I modify
>>>>> existing
>>>>> code
>>>>> to support both use-cases, but I am also ok to create new routines if
>>>>> this
>>>>> seems to be reasonable - please let me know
>>>>>>     So the question is --- would it make
>>>>>> sense to do all of this separately from the balloon driver?
>>>>> This can be done, but which driver will host this code then? If we
>>>>> move from
>>>>> the balloon driver, then this could go to either gntdev or
>>>>> grant-table.
>>>>> What's your preference?
>>>> A separate module?
>>>> Is there any use for this feature outside of your zero-copy DRM
>>>> driver?
>>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>>
>>> At the time I tried to upstream zcopy driver it was discussed and
>>> decided that
>>> it would be better if I remove all DRM specific code and move it to
>>> Xen drivers.
>>> Thus, this RFC.
>>>
>>> But it can also be implemented as a dedicated Xen dma-buf driver which
>>> will have all the
>>> code from this RFC + a bit more (char/misc device handling at least).
>>> This will also require a dedicated user-space library, just like
>>> libxengnttab.so
>>> for gntdev (now I have all new IOCTLs covered there).
>>>
>>> If the idea of a dedicated Xen dma-buf driver seems to be more
>>> attractive we
>>> can work toward this solution. BTW, I do support this idea, but was not
>>> sure if Xen community accepts yet another driver which duplicates
>>> quite some code
>>> of the existing gntdev/balloon/grant-table. And now after this RFC I
>>> hope that all cons
>>> and pros of both dedicated driver and gntdev/balloon/grant-table
>>> extension are
>>> clearly seen and we can make a decision.
>>
>> IIRC the objection for a separate module was in the context of gntdev
>> was discussion, because (among other things) people didn't want to have
>> yet another file in /dev/xen/
>>
>> Here we are talking about (a new) balloon-like module which doesn't
>> create any new user-visible interfaces. And as for duplicating code ---
>> as I said, I am not convinced there is much of duplication.
>>
>> I might even argue that we should add a new config option for this
>> module.
> I am not quite sure I am fully following you here: so, you suggest
> that we have balloon.c unchanged, but instead create a new
> module (namely a file under the same folder as balloon.c, e.g.
> dma-buf-reservation.c) and move those {increase|decrease}_reservation
> routines (specific to dma-buf) to that new file? And make it selectable
> via Kconfig? If so, then how about the changes to grant-table and gntdev?
> Those will look inconsistent then.

Inconsistent with what? The changes to grant code will also be under the
new config option.


>
> If you suggest a new kernel driver module:
> IMO, there is nothing bad if we create a dedicated kernel module
> (driver) for Xen dma-buf handling selectable under Kconfig option.
> Yes, this will create a yet another device under /dev/xen,
> but most people will never see it if we set Kconfig to default to "n".
> And then we'll need user-space support for that, so Xen tools will
> be extended with libxendmabuf.so or so.
> This way all Xen dma-buf support can be localized at one place which
> might be easier to maintain. What is more it could be totally transparent
> to most of us as Kconfig option won't be set by default (both kernel
> and Xen).


The downside is that we will end up having another device for doing
things that are not that different from what we are already doing with
existing gnttab device. Or are they?

-boris
Oleksandr Andrushchenko May 22, 2018, 3 p.m. UTC | #10
On 05/22/2018 05:33 PM, Boris Ostrovsky wrote:
> On 05/22/2018 01:55 AM, Oleksandr Andrushchenko wrote:
>> On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
>>> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>>>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>>>>> A commit message would be useful.
>>>>>>>> Sure, v1 will have it
>>>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>
>>>>>>>>>>           for (i = 0; i < nr_pages; i++) {
>>>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>>>> -        if (page == NULL) {
>>>>>>>>>> -            nr_pages = i;
>>>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>>>> -            break;
>>>>>>>>>> +        if (ext_pages) {
>>>>>>>>>> +            page = ext_pages[i];
>>>>>>>>>> +        } else {
>>>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>>>> +            if (page == NULL) {
>>>>>>>>>> +                nr_pages = i;
>>>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>>>> +                break;
>>>>>>>>>> +            }
>>>>>>>>>>               }
>>>>>>>>>>               scrub_page(page);
>>>>>>>>>>               list_add(&page->lru, &pages);
>>>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>           i = 0;
>>>>>>>>>>           list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>>>               /* XENMEM_decrease_reservation requires a GFN */
>>>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>>>         #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>>>               /*
>>>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>       #endif
>>>>>>>>>>               list_del(&page->lru);
>>>>>>>>>>       -        balloon_append(page);
>>>>>>>>>> +        if (!ext_pages)
>>>>>>>>>> +            balloon_append(page);
>>>>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>>>>> actually
>>>>>>>> allocating ballooned pages.
>>>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>>>> {increase|decrease}_reservation?
>>>>>>>> Any other suggestion?
>>>>>>> I am actually wondering how much of that code you end up reusing.
>>>>>>> You
>>>>>>> pretty much create new code paths in both routines and common code
>>>>>>> ends
>>>>>>> up being essentially the hypercall.
>>>>>> Well, I hoped that it would be easier to maintain if I modify
>>>>>> existing
>>>>>> code
>>>>>> to support both use-cases, but I am also ok to create new routines if
>>>>>> this
>>>>>> seems to be reasonable - please let me know
>>>>>>>      So the question is --- would it make
>>>>>>> sense to do all of this separately from the balloon driver?
>>>>>> This can be done, but which driver will host this code then? If we
>>>>>> move from
>>>>>> the balloon driver, then this could go to either gntdev or
>>>>>> grant-table.
>>>>>> What's your preference?
>>>>> A separate module?
>>>>> Is there any use for this feature outside of your zero-copy DRM
>>>>> driver?
>>>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>>>
>>>> At the time I tried to upstream zcopy driver it was discussed and
>>>> decided that
>>>> it would be better if I remove all DRM specific code and move it to
>>>> Xen drivers.
>>>> Thus, this RFC.
>>>>
>>>> But it can also be implemented as a dedicated Xen dma-buf driver which
>>>> will have all the
>>>> code from this RFC + a bit more (char/misc device handling at least).
>>>> This will also require a dedicated user-space library, just like
>>>> libxengnttab.so
>>>> for gntdev (now I have all new IOCTLs covered there).
>>>>
>>>> If the idea of a dedicated Xen dma-buf driver seems to be more
>>>> attractive we
>>>> can work toward this solution. BTW, I do support this idea, but was not
>>>> sure if Xen community accepts yet another driver which duplicates
>>>> quite some code
>>>> of the existing gntdev/balloon/grant-table. And now after this RFC I
>>>> hope that all cons
>>>> and pros of both dedicated driver and gntdev/balloon/grant-table
>>>> extension are
>>>> clearly seen and we can make a decision.
>>> IIRC the objection for a separate module was in the context of gntdev
>>> was discussion, because (among other things) people didn't want to have
>>> yet another file in /dev/xen/
>>>
>>> Here we are talking about (a new) balloon-like module which doesn't
>>> create any new user-visible interfaces. And as for duplicating code ---
>>> as I said, I am not convinced there is much of duplication.
>>>
>>> I might even argue that we should add a new config option for this
>>> module.
>> I am not quite sure I am fully following you here: so, you suggest
>> that we have balloon.c unchanged, but instead create a new
>> module (namely a file under the same folder as balloon.c, e.g.
>> dma-buf-reservation.c) and move those {increase|decrease}_reservation
>> routines (specific to dma-buf) to that new file? And make it selectable
>> via Kconfig? If so, then how about the changes to grant-table and gntdev?
>> Those will look inconsistent then.
> Inconsistent with what? The changes to grant code will also be under the
> new config option.
Ah, ok.

Option 1. We will have Kconfig option which will cover dma-buf
changes in balloon, grant-table and gntdev. And for that we will
create dedicated routines in balloon and grant-table (copy of
the existing ones, but modified to fit dma-buf use-case) and
those under something like "#if CONFIG_XEN_DMABUF"?
This is relatively easy to do for balloon/grant-table, but not that
easy for gntdev: there still seems to be lots of code which can be reused,
so I'll have to put lots of "#if CONFIG_XEN_DMABUF" there. Even more, I 
change
interfaces of the existing gntdev routines which won't look cute with 
#if's, IMO.

Option 2. Try moving dma-buf related changes from balloon and
grant-table to a new file. Then gntdev's Kconfig concerns from above 
will still
be there, but balloon/grant-table functionality will be localized in a 
new module.

I am still missing your point here?

>
>> If you suggest a new kernel driver module:
>> IMO, there is nothing bad if we create a dedicated kernel module
>> (driver) for Xen dma-buf handling selectable under Kconfig option.
>> Yes, this will create a yet another device under /dev/xen,
>> but most people will never see it if we set Kconfig to default to "n".
>> And then we'll need user-space support for that, so Xen tools will
>> be extended with libxendmabuf.so or so.
>> This way all Xen dma-buf support can be localized at one place which
>> might be easier to maintain. What is more it could be totally transparent
>> to most of us as Kconfig option won't be set by default (both kernel
>> and Xen).
>
> The downside is that we will end up having another device for doing
> things that are not that different from what we are already doing with
> existing gnttab device. Or are they?
Agree, but Kconfig option, IMO, won't make it look nice because
of gntdev changes and code reuse.
> -boris
Thank you,
Oleksandr
Boris Ostrovsky May 22, 2018, 6:02 p.m. UTC | #11
On 05/22/2018 11:00 AM, Oleksandr Andrushchenko wrote:
> On 05/22/2018 05:33 PM, Boris Ostrovsky wrote:
>> On 05/22/2018 01:55 AM, Oleksandr Andrushchenko wrote:
>>> On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
>>>> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>>>>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>>>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>>> From: Oleksandr Andrushchenko
>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>> A commit message would be useful.
>>>>>>>>> Sure, v1 will have it
>>>>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>>
>>>>>>>>>>>           for (i = 0; i < nr_pages; i++) {
>>>>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>>>>> -        if (page == NULL) {
>>>>>>>>>>> -            nr_pages = i;
>>>>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>>>>> -            break;
>>>>>>>>>>> +        if (ext_pages) {
>>>>>>>>>>> +            page = ext_pages[i];
>>>>>>>>>>> +        } else {
>>>>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>>>>> +            if (page == NULL) {
>>>>>>>>>>> +                nr_pages = i;
>>>>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>>>>> +                break;
>>>>>>>>>>> +            }
>>>>>>>>>>>               }
>>>>>>>>>>>               scrub_page(page);
>>>>>>>>>>>               list_add(&page->lru, &pages);
>>>>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>           i = 0;
>>>>>>>>>>>           list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>>>>               /* XENMEM_decrease_reservation requires a GFN */
>>>>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>>>>         #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>>>>               /*
>>>>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>       #endif
>>>>>>>>>>>               list_del(&page->lru);
>>>>>>>>>>>       -        balloon_append(page);
>>>>>>>>>>> +        if (!ext_pages)
>>>>>>>>>>> +            balloon_append(page);
>>>>>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>>>>>> actually
>>>>>>>>> allocating ballooned pages.
>>>>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>>>>> {increase|decrease}_reservation?
>>>>>>>>> Any other suggestion?
>>>>>>>> I am actually wondering how much of that code you end up reusing.
>>>>>>>> You
>>>>>>>> pretty much create new code paths in both routines and common code
>>>>>>>> ends
>>>>>>>> up being essentially the hypercall.
>>>>>>> Well, I hoped that it would be easier to maintain if I modify
>>>>>>> existing
>>>>>>> code
>>>>>>> to support both use-cases, but I am also ok to create new
>>>>>>> routines if
>>>>>>> this
>>>>>>> seems to be reasonable - please let me know
>>>>>>>>      So the question is --- would it make
>>>>>>>> sense to do all of this separately from the balloon driver?
>>>>>>> This can be done, but which driver will host this code then? If we
>>>>>>> move from
>>>>>>> the balloon driver, then this could go to either gntdev or
>>>>>>> grant-table.
>>>>>>> What's your preference?
>>>>>> A separate module?
>>>>>> Is there any use for this feature outside of your zero-copy DRM
>>>>>> driver?
>>>>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>>>>
>>>>> At the time I tried to upstream zcopy driver it was discussed and
>>>>> decided that
>>>>> it would be better if I remove all DRM specific code and move it to
>>>>> Xen drivers.
>>>>> Thus, this RFC.
>>>>>
>>>>> But it can also be implemented as a dedicated Xen dma-buf driver
>>>>> which
>>>>> will have all the
>>>>> code from this RFC + a bit more (char/misc device handling at least).
>>>>> This will also require a dedicated user-space library, just like
>>>>> libxengnttab.so
>>>>> for gntdev (now I have all new IOCTLs covered there).
>>>>>
>>>>> If the idea of a dedicated Xen dma-buf driver seems to be more
>>>>> attractive we
>>>>> can work toward this solution. BTW, I do support this idea, but
>>>>> was not
>>>>> sure if Xen community accepts yet another driver which duplicates
>>>>> quite some code
>>>>> of the existing gntdev/balloon/grant-table. And now after this RFC I
>>>>> hope that all cons
>>>>> and pros of both dedicated driver and gntdev/balloon/grant-table
>>>>> extension are
>>>>> clearly seen and we can make a decision.
>>>> IIRC the objection for a separate module was in the context of gntdev
>>>> was discussion, because (among other things) people didn't want to
>>>> have
>>>> yet another file in /dev/xen/
>>>>
>>>> Here we are talking about (a new) balloon-like module which doesn't
>>>> create any new user-visible interfaces. And as for duplicating code
>>>> ---
>>>> as I said, I am not convinced there is much of duplication.
>>>>
>>>> I might even argue that we should add a new config option for this
>>>> module.
>>> I am not quite sure I am fully following you here: so, you suggest
>>> that we have balloon.c unchanged, but instead create a new
>>> module (namely a file under the same folder as balloon.c, e.g.
>>> dma-buf-reservation.c) and move those {increase|decrease}_reservation
>>> routines (specific to dma-buf) to that new file? And make it selectable
>>> via Kconfig? If so, then how about the changes to grant-table and
>>> gntdev?
>>> Those will look inconsistent then.
>> Inconsistent with what? The changes to grant code will also be under the
>> new config option.
> Ah, ok.
>
> Option 1. We will have Kconfig option which will cover dma-buf
> changes in balloon, 

I really don't think your changes to balloon driver belong there. The
have nothing to do with ballooning,

> grant-table and gntdev. And for that we will
> create dedicated routines in balloon and grant-table (copy of
> the existing ones, but modified to fit dma-buf use-case) and
> those under something like "#if CONFIG_XEN_DMABUF"?
> This is relatively easy to do for balloon/grant-table, but not that
> easy for gntdev: there still seems to be lots of code which can be
> reused,
> so I'll have to put lots of "#if CONFIG_XEN_DMABUF" there. Even more,
> I change
> interfaces of the existing gntdev routines which won't look cute with
> #if's, IMO.
>
> Option 2. Try moving dma-buf related changes from balloon and
> grant-table to a new file. Then gntdev's Kconfig concerns from above
> will still
> be there, but balloon/grant-table functionality will be localized in a
> new module.

I don't see a problem with leaving your code (from patch 2) where it is
now, in grant table. It's a small change and it seems to me a single
#ifdef/#endif would cover it, even if you factor out common code there
as we've discussed. To my eye it logically belongs there. Just like your
gntdev changes belong to gntdev file. (Presumably, because I haven't
actually looked at them ;-))

So my suggestion is
- separate module for your changes in balloon.c
- keep grant-table changes, with config option
- keep gntdev changes, with config option. (but when you get to post
actual patches I would appreciate if you could split this into a series
of logical changes and not post a one giant patch).


-boris


>
> I am still missing your point here?
>
>>
>>> If you suggest a new kernel driver module:
>>> IMO, there is nothing bad if we create a dedicated kernel module
>>> (driver) for Xen dma-buf handling selectable under Kconfig option.
>>> Yes, this will create a yet another device under /dev/xen,
>>> but most people will never see it if we set Kconfig to default to "n".
>>> And then we'll need user-space support for that, so Xen tools will
>>> be extended with libxendmabuf.so or so.
>>> This way all Xen dma-buf support can be localized at one place which
>>> might be easier to maintain. What is more it could be totally
>>> transparent
>>> to most of us as Kconfig option won't be set by default (both kernel
>>> and Xen).
>>
>> The downside is that we will end up having another device for doing
>> things that are not that different from what we are already doing with
>> existing gnttab device. Or are they?
> Agree, but Kconfig option, IMO, won't make it look nice because
> of gntdev changes and code reuse.
>> -boris
> Thank you,
> Oleksandr
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel
Oleksandr Andrushchenko May 22, 2018, 6:27 p.m. UTC | #12
On 05/22/2018 09:02 PM, Boris Ostrovsky wrote:
> On 05/22/2018 11:00 AM, Oleksandr Andrushchenko wrote:
>> On 05/22/2018 05:33 PM, Boris Ostrovsky wrote:
>>> On 05/22/2018 01:55 AM, Oleksandr Andrushchenko wrote:
>>>> On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
>>>>> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>>>>>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>>>>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>>>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>>>> From: Oleksandr Andrushchenko
>>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>> A commit message would be useful.
>>>>>>>>>> Sure, v1 will have it
>>>>>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>>>
>>>>>>>>>>>>            for (i = 0; i < nr_pages; i++) {
>>>>>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>>>>>> -        if (page == NULL) {
>>>>>>>>>>>> -            nr_pages = i;
>>>>>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>>>>>> -            break;
>>>>>>>>>>>> +        if (ext_pages) {
>>>>>>>>>>>> +            page = ext_pages[i];
>>>>>>>>>>>> +        } else {
>>>>>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>>>>>> +            if (page == NULL) {
>>>>>>>>>>>> +                nr_pages = i;
>>>>>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>>>>>> +                break;
>>>>>>>>>>>> +            }
>>>>>>>>>>>>                }
>>>>>>>>>>>>                scrub_page(page);
>>>>>>>>>>>>                list_add(&page->lru, &pages);
>>>>>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>>            i = 0;
>>>>>>>>>>>>            list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>>>>>                /* XENMEM_decrease_reservation requires a GFN */
>>>>>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>>>>>          #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>>>>>                /*
>>>>>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>>        #endif
>>>>>>>>>>>>                list_del(&page->lru);
>>>>>>>>>>>>        -        balloon_append(page);
>>>>>>>>>>>> +        if (!ext_pages)
>>>>>>>>>>>> +            balloon_append(page);
>>>>>>>>>>> So what you are proposing is not really ballooning. You are just
>>>>>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>>>>>> Sort of. Basically I need to {increase|decrease}_reservation, not
>>>>>>>>>> actually
>>>>>>>>>> allocating ballooned pages.
>>>>>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>>>>>> {increase|decrease}_reservation?
>>>>>>>>>> Any other suggestion?
>>>>>>>>> I am actually wondering how much of that code you end up reusing.
>>>>>>>>> You
>>>>>>>>> pretty much create new code paths in both routines and common code
>>>>>>>>> ends
>>>>>>>>> up being essentially the hypercall.
>>>>>>>> Well, I hoped that it would be easier to maintain if I modify
>>>>>>>> existing
>>>>>>>> code
>>>>>>>> to support both use-cases, but I am also ok to create new
>>>>>>>> routines if
>>>>>>>> this
>>>>>>>> seems to be reasonable - please let me know
>>>>>>>>>       So the question is --- would it make
>>>>>>>>> sense to do all of this separately from the balloon driver?
>>>>>>>> This can be done, but which driver will host this code then? If we
>>>>>>>> move from
>>>>>>>> the balloon driver, then this could go to either gntdev or
>>>>>>>> grant-table.
>>>>>>>> What's your preference?
>>>>>>> A separate module?
>>>>>>> Is there any use for this feature outside of your zero-copy DRM
>>>>>>> driver?
>>>>>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>>>>>
>>>>>> At the time I tried to upstream zcopy driver it was discussed and
>>>>>> decided that
>>>>>> it would be better if I remove all DRM specific code and move it to
>>>>>> Xen drivers.
>>>>>> Thus, this RFC.
>>>>>>
>>>>>> But it can also be implemented as a dedicated Xen dma-buf driver
>>>>>> which
>>>>>> will have all the
>>>>>> code from this RFC + a bit more (char/misc device handling at least).
>>>>>> This will also require a dedicated user-space library, just like
>>>>>> libxengnttab.so
>>>>>> for gntdev (now I have all new IOCTLs covered there).
>>>>>>
>>>>>> If the idea of a dedicated Xen dma-buf driver seems to be more
>>>>>> attractive we
>>>>>> can work toward this solution. BTW, I do support this idea, but
>>>>>> was not
>>>>>> sure if Xen community accepts yet another driver which duplicates
>>>>>> quite some code
>>>>>> of the existing gntdev/balloon/grant-table. And now after this RFC I
>>>>>> hope that all cons
>>>>>> and pros of both dedicated driver and gntdev/balloon/grant-table
>>>>>> extension are
>>>>>> clearly seen and we can make a decision.
>>>>> IIRC the objection for a separate module was in the context of gntdev
>>>>> was discussion, because (among other things) people didn't want to
>>>>> have
>>>>> yet another file in /dev/xen/
>>>>>
>>>>> Here we are talking about (a new) balloon-like module which doesn't
>>>>> create any new user-visible interfaces. And as for duplicating code
>>>>> ---
>>>>> as I said, I am not convinced there is much of duplication.
>>>>>
>>>>> I might even argue that we should add a new config option for this
>>>>> module.
>>>> I am not quite sure I am fully following you here: so, you suggest
>>>> that we have balloon.c unchanged, but instead create a new
>>>> module (namely a file under the same folder as balloon.c, e.g.
>>>> dma-buf-reservation.c) and move those {increase|decrease}_reservation
>>>> routines (specific to dma-buf) to that new file? And make it selectable
>>>> via Kconfig? If so, then how about the changes to grant-table and
>>>> gntdev?
>>>> Those will look inconsistent then.
>>> Inconsistent with what? The changes to grant code will also be under the
>>> new config option.
>> Ah, ok.
>>
>> Option 1. We will have Kconfig option which will cover dma-buf
>> changes in balloon,
> I really don't think your changes to balloon driver belong there. The
> have nothing to do with ballooning,
>
>> grant-table and gntdev. And for that we will
>> create dedicated routines in balloon and grant-table (copy of
>> the existing ones, but modified to fit dma-buf use-case) and
>> those under something like "#if CONFIG_XEN_DMABUF"?
>> This is relatively easy to do for balloon/grant-table, but not that
>> easy for gntdev: there still seems to be lots of code which can be
>> reused,
>> so I'll have to put lots of "#if CONFIG_XEN_DMABUF" there. Even more,
>> I change
>> interfaces of the existing gntdev routines which won't look cute with
>> #if's, IMO.
>>
>> Option 2. Try moving dma-buf related changes from balloon and
>> grant-table to a new file. Then gntdev's Kconfig concerns from above
>> will still
>> be there, but balloon/grant-table functionality will be localized in a
>> new module.
> I don't see a problem with leaving your code (from patch 2) where it is
> now, in grant table. It's a small change and it seems to me a single
> #ifdef/#endif would cover it, even if you factor out common code there
> as we've discussed. To my eye it logically belongs there. Just like your
> gntdev changes belong to gntdev file. (Presumably, because I haven't
> actually looked at them ;-))
>
> So my suggestion is
> - separate module for your changes in balloon.c
Ok, so, basically, the changes I need from the balloon driver is
{increase|decrease}_reservation and DMAable memory allocations, so
I'll move that into a separate file: what could be the name for such a file?

> - keep grant-table changes, with config option
Can we consider moving ex-balloon code into grant-table?

> - keep gntdev changes, with config option.
I'll try to see what happens to gntdev with Kconfig option wrt function 
prototype
changes. I also have to check if UAPI of gntdev can also support 
CONFIG_XXX ifdefs
w/o problems - do you by chance know if #if CONFIG_ is ok for UAPI files?
Or I can leave UAPI as is and ifdef in .ioctl callback.
>   (but when you get to post
> actual patches I would appreciate if you could split this into a series
> of logical changes and not post a one giant patch).
Of course, as this is at RFC stage the idea was to roll out all the 
changes at once, so
everyone has the full picture and don't need to collect changes from set 
of patches.
>
> -boris
>
Thank you,
Oleksandr
>> I am still missing your point here?
>>
>>>> If you suggest a new kernel driver module:
>>>> IMO, there is nothing bad if we create a dedicated kernel module
>>>> (driver) for Xen dma-buf handling selectable under Kconfig option.
>>>> Yes, this will create a yet another device under /dev/xen,
>>>> but most people will never see it if we set Kconfig to default to "n".
>>>> And then we'll need user-space support for that, so Xen tools will
>>>> be extended with libxendmabuf.so or so.
>>>> This way all Xen dma-buf support can be localized at one place which
>>>> might be easier to maintain. What is more it could be totally
>>>> transparent
>>>> to most of us as Kconfig option won't be set by default (both kernel
>>>> and Xen).
>>> The downside is that we will end up having another device for doing
>>> things that are not that different from what we are already doing with
>>> existing gnttab device. Or are they?
>> Agree, but Kconfig option, IMO, won't make it look nice because
>> of gntdev changes and code reuse.
>>> -boris
>> Thank you,
>> Oleksandr
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xenproject.org
>> https://lists.xenproject.org/mailman/listinfo/xen-devel
Boris Ostrovsky May 22, 2018, 7:09 p.m. UTC | #13
On 05/22/2018 02:27 PM, Oleksandr Andrushchenko wrote:
> On 05/22/2018 09:02 PM, Boris Ostrovsky wrote:
>> On 05/22/2018 11:00 AM, Oleksandr Andrushchenko wrote:
>>> On 05/22/2018 05:33 PM, Boris Ostrovsky wrote:
>>>> On 05/22/2018 01:55 AM, Oleksandr Andrushchenko wrote:
>>>>> On 05/21/2018 11:36 PM, Boris Ostrovsky wrote:
>>>>>> On 05/21/2018 03:13 PM, Oleksandr Andrushchenko wrote:
>>>>>>> On 05/21/2018 09:53 PM, Boris Ostrovsky wrote:
>>>>>>>> On 05/21/2018 01:32 PM, Oleksandr Andrushchenko wrote:
>>>>>>>>> On 05/21/2018 07:35 PM, Boris Ostrovsky wrote:
>>>>>>>>>> On 05/21/2018 01:40 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>>> On 05/19/2018 01:04 AM, Boris Ostrovsky wrote:
>>>>>>>>>>>> On 05/17/2018 04:26 AM, Oleksandr Andrushchenko wrote:
>>>>>>>>>>>>> From: Oleksandr Andrushchenko
>>>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>>> A commit message would be useful.
>>>>>>>>>>> Sure, v1 will have it
>>>>>>>>>>>>> Signed-off-by: Oleksandr Andrushchenko
>>>>>>>>>>>>> <oleksandr_andrushchenko@epam.com>
>>>>>>>>>>>>>
>>>>>>>>>>>>>            for (i = 0; i < nr_pages; i++) {
>>>>>>>>>>>>> -        page = alloc_page(gfp);
>>>>>>>>>>>>> -        if (page == NULL) {
>>>>>>>>>>>>> -            nr_pages = i;
>>>>>>>>>>>>> -            state = BP_EAGAIN;
>>>>>>>>>>>>> -            break;
>>>>>>>>>>>>> +        if (ext_pages) {
>>>>>>>>>>>>> +            page = ext_pages[i];
>>>>>>>>>>>>> +        } else {
>>>>>>>>>>>>> +            page = alloc_page(gfp);
>>>>>>>>>>>>> +            if (page == NULL) {
>>>>>>>>>>>>> +                nr_pages = i;
>>>>>>>>>>>>> +                state = BP_EAGAIN;
>>>>>>>>>>>>> +                break;
>>>>>>>>>>>>> +            }
>>>>>>>>>>>>>                }
>>>>>>>>>>>>>                scrub_page(page);
>>>>>>>>>>>>>                list_add(&page->lru, &pages);
>>>>>>>>>>>>> @@ -529,7 +565,7 @@ static enum bp_state
>>>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>>>            i = 0;
>>>>>>>>>>>>>            list_for_each_entry_safe(page, tmp, &pages, lru) {
>>>>>>>>>>>>>                /* XENMEM_decrease_reservation requires a
>>>>>>>>>>>>> GFN */
>>>>>>>>>>>>> -        frame_list[i++] = xen_page_to_gfn(page);
>>>>>>>>>>>>> +        frames[i++] = xen_page_to_gfn(page);
>>>>>>>>>>>>>          #ifdef CONFIG_XEN_HAVE_PVMMU
>>>>>>>>>>>>>                /*
>>>>>>>>>>>>> @@ -552,18 +588,22 @@ static enum bp_state
>>>>>>>>>>>>> decrease_reservation(unsigned long nr_pages, gfp_t gfp)
>>>>>>>>>>>>>        #endif
>>>>>>>>>>>>>                list_del(&page->lru);
>>>>>>>>>>>>>        -        balloon_append(page);
>>>>>>>>>>>>> +        if (!ext_pages)
>>>>>>>>>>>>> +            balloon_append(page);
>>>>>>>>>>>> So what you are proposing is not really ballooning. You are
>>>>>>>>>>>> just
>>>>>>>>>>>> piggybacking on existing interfaces, aren't you?
>>>>>>>>>>> Sort of. Basically I need to
>>>>>>>>>>> {increase|decrease}_reservation, not
>>>>>>>>>>> actually
>>>>>>>>>>> allocating ballooned pages.
>>>>>>>>>>> Do you think I can simply EXPORT_SYMBOL for
>>>>>>>>>>> {increase|decrease}_reservation?
>>>>>>>>>>> Any other suggestion?
>>>>>>>>>> I am actually wondering how much of that code you end up
>>>>>>>>>> reusing.
>>>>>>>>>> You
>>>>>>>>>> pretty much create new code paths in both routines and common
>>>>>>>>>> code
>>>>>>>>>> ends
>>>>>>>>>> up being essentially the hypercall.
>>>>>>>>> Well, I hoped that it would be easier to maintain if I modify
>>>>>>>>> existing
>>>>>>>>> code
>>>>>>>>> to support both use-cases, but I am also ok to create new
>>>>>>>>> routines if
>>>>>>>>> this
>>>>>>>>> seems to be reasonable - please let me know
>>>>>>>>>>       So the question is --- would it make
>>>>>>>>>> sense to do all of this separately from the balloon driver?
>>>>>>>>> This can be done, but which driver will host this code then?
>>>>>>>>> If we
>>>>>>>>> move from
>>>>>>>>> the balloon driver, then this could go to either gntdev or
>>>>>>>>> grant-table.
>>>>>>>>> What's your preference?
>>>>>>>> A separate module?
>>>>>>>> Is there any use for this feature outside of your zero-copy DRM
>>>>>>>> driver?
>>>>>>> Intel's hyper dma-buf (Dongwon/Matt CC'ed), V4L/GPU at least.
>>>>>>>
>>>>>>> At the time I tried to upstream zcopy driver it was discussed and
>>>>>>> decided that
>>>>>>> it would be better if I remove all DRM specific code and move it to
>>>>>>> Xen drivers.
>>>>>>> Thus, this RFC.
>>>>>>>
>>>>>>> But it can also be implemented as a dedicated Xen dma-buf driver
>>>>>>> which
>>>>>>> will have all the
>>>>>>> code from this RFC + a bit more (char/misc device handling at
>>>>>>> least).
>>>>>>> This will also require a dedicated user-space library, just like
>>>>>>> libxengnttab.so
>>>>>>> for gntdev (now I have all new IOCTLs covered there).
>>>>>>>
>>>>>>> If the idea of a dedicated Xen dma-buf driver seems to be more
>>>>>>> attractive we
>>>>>>> can work toward this solution. BTW, I do support this idea, but
>>>>>>> was not
>>>>>>> sure if Xen community accepts yet another driver which duplicates
>>>>>>> quite some code
>>>>>>> of the existing gntdev/balloon/grant-table. And now after this
>>>>>>> RFC I
>>>>>>> hope that all cons
>>>>>>> and pros of both dedicated driver and gntdev/balloon/grant-table
>>>>>>> extension are
>>>>>>> clearly seen and we can make a decision.
>>>>>> IIRC the objection for a separate module was in the context of
>>>>>> gntdev
>>>>>> was discussion, because (among other things) people didn't want to
>>>>>> have
>>>>>> yet another file in /dev/xen/
>>>>>>
>>>>>> Here we are talking about (a new) balloon-like module which doesn't
>>>>>> create any new user-visible interfaces. And as for duplicating code
>>>>>> ---
>>>>>> as I said, I am not convinced there is much of duplication.
>>>>>>
>>>>>> I might even argue that we should add a new config option for this
>>>>>> module.
>>>>> I am not quite sure I am fully following you here: so, you suggest
>>>>> that we have balloon.c unchanged, but instead create a new
>>>>> module (namely a file under the same folder as balloon.c, e.g.
>>>>> dma-buf-reservation.c) and move those {increase|decrease}_reservation
>>>>> routines (specific to dma-buf) to that new file? And make it
>>>>> selectable
>>>>> via Kconfig? If so, then how about the changes to grant-table and
>>>>> gntdev?
>>>>> Those will look inconsistent then.
>>>> Inconsistent with what? The changes to grant code will also be
>>>> under the
>>>> new config option.
>>> Ah, ok.
>>>
>>> Option 1. We will have Kconfig option which will cover dma-buf
>>> changes in balloon,
>> I really don't think your changes to balloon driver belong there. The
>> have nothing to do with ballooning,
>>
>>> grant-table and gntdev. And for that we will
>>> create dedicated routines in balloon and grant-table (copy of
>>> the existing ones, but modified to fit dma-buf use-case) and
>>> those under something like "#if CONFIG_XEN_DMABUF"?
>>> This is relatively easy to do for balloon/grant-table, but not that
>>> easy for gntdev: there still seems to be lots of code which can be
>>> reused,
>>> so I'll have to put lots of "#if CONFIG_XEN_DMABUF" there. Even more,
>>> I change
>>> interfaces of the existing gntdev routines which won't look cute with
>>> #if's, IMO.
>>>
>>> Option 2. Try moving dma-buf related changes from balloon and
>>> grant-table to a new file. Then gntdev's Kconfig concerns from above
>>> will still
>>> be there, but balloon/grant-table functionality will be localized in a
>>> new module.
>> I don't see a problem with leaving your code (from patch 2) where it is
>> now, in grant table. It's a small change and it seems to me a single
>> #ifdef/#endif would cover it, even if you factor out common code there
>> as we've discussed. To my eye it logically belongs there. Just like your
>> gntdev changes belong to gntdev file. (Presumably, because I haven't
>> actually looked at them ;-))
>>
>> So my suggestion is
>> - separate module for your changes in balloon.c
> Ok, so, basically, the changes I need from the balloon driver is
> {increase|decrease}_reservation and DMAable memory allocations, so
> I'll move that into a separate file: what could be the name for such a
> file?


Naming would be your job ;-)


>
>> - keep grant-table changes, with config option
> Can we consider moving ex-balloon code into grant-table?

On the second thought ---  yes, if the code is compact enough, which I
think it is, you should be able to keep it there.


>
>> - keep gntdev changes, with config option.
> I'll try to see what happens to gntdev with Kconfig option wrt
> function prototype
> changes. I also have to check if UAPI of gntdev can also support
> CONFIG_XXX ifdefs
> w/o problems - do you by chance know if #if CONFIG_ is ok for UAPI files?


I would think that not but:

ostr@workbase> git grep "#ifdef CONFIG_" include/uapi/
include/uapi/asm-generic/mman-common.h:#ifdef
CONFIG_MMAP_ALLOW_UNINITIALIZED
include/uapi/linux/atmdev.h:#ifdef CONFIG_COMPAT
include/uapi/linux/elfcore.h:#ifdef CONFIG_BINFMT_ELF_FDPIC
include/uapi/linux/eventpoll.h:#ifdef CONFIG_PM_SLEEP
include/uapi/linux/fb.h:#ifdef CONFIG_FB_BACKLIGHT
include/uapi/linux/flat.h:#ifdef CONFIG_BINFMT_SHARED_FLAT
include/uapi/linux/hw_breakpoint.h:#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
ostr@workbase>


-boris


> Or I can leave UAPI as is and ifdef in .ioctl callback.
>>   (but when you get to post
>> actual patches I would appreciate if you could split this into a series
>> of logical changes and not post a one giant patch).
> Of course, as this is at RFC stage the idea was to roll out all the
> changes at once, so
> everyone has the full picture and don't need to collect changes from
> set of patches.
>>
>> -boris
>>
> Thank you,
> Oleksandr
>>> I am still missing your point here?
>>>
>>>>> If you suggest a new kernel driver module:
>>>>> IMO, there is nothing bad if we create a dedicated kernel module
>>>>> (driver) for Xen dma-buf handling selectable under Kconfig option.
>>>>> Yes, this will create a yet another device under /dev/xen,
>>>>> but most people will never see it if we set Kconfig to default to
>>>>> "n".
>>>>> And then we'll need user-space support for that, so Xen tools will
>>>>> be extended with libxendmabuf.so or so.
>>>>> This way all Xen dma-buf support can be localized at one place which
>>>>> might be easier to maintain. What is more it could be totally
>>>>> transparent
>>>>> to most of us as Kconfig option won't be set by default (both kernel
>>>>> and Xen).
>>>> The downside is that we will end up having another device for doing
>>>> things that are not that different from what we are already doing with
>>>> existing gnttab device. Or are they?
>>> Agree, but Kconfig option, IMO, won't make it look nice because
>>> of gntdev changes and code reuse.
>>>> -boris
>>> Thank you,
>>> Oleksandr
>>>
>>> _______________________________________________
>>> Xen-devel mailing list
>>> Xen-devel@lists.xenproject.org
>>> https://lists.xenproject.org/mailman/listinfo/xen-devel
>
diff mbox

Patch

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e4db19e88ab1..e3a145aa9f29 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -415,8 +415,10 @@  static bool balloon_is_inflated(void)
 	return balloon_stats.balloon_low || balloon_stats.balloon_high;
 }
 
-static enum bp_state increase_reservation(unsigned long nr_pages)
+static enum bp_state increase_reservation(unsigned long nr_pages,
+					  struct page **ext_pages)
 {
+	enum bp_state ret = BP_DONE;
 	int rc;
 	unsigned long i;
 	struct page   *page;
@@ -425,32 +427,49 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 		.extent_order = EXTENT_ORDER,
 		.domid        = DOMID_SELF
 	};
+	xen_pfn_t *frames;
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	if (nr_pages > ARRAY_SIZE(frame_list)) {
+		frames = kcalloc(nr_pages, sizeof(xen_pfn_t), GFP_KERNEL);
+		if (!frames)
+			return BP_ECANCELED;
+	} else {
+		frames = frame_list;
+	}
 
-	page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
-	for (i = 0; i < nr_pages; i++) {
-		if (!page) {
-			nr_pages = i;
-			break;
-		}
+	/* XENMEM_populate_physmap requires a PFN based on Xen
+	 * granularity.
+	 */
+	if (ext_pages) {
+		for (i = 0; i < nr_pages; i++)
+			frames[i] = page_to_xen_pfn(ext_pages[i]);
+	} else {
+		page = list_first_entry_or_null(&ballooned_pages,
+						struct page, lru);
+		for (i = 0; i < nr_pages; i++) {
+			if (!page) {
+				nr_pages = i;
+				break;
+			}
 
-		/* XENMEM_populate_physmap requires a PFN based on Xen
-		 * granularity.
-		 */
-		frame_list[i] = page_to_xen_pfn(page);
-		page = balloon_next_page(page);
+			frames[i] = page_to_xen_pfn(page);
+			page = balloon_next_page(page);
+		}
 	}
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
+	set_xen_guest_handle(reservation.extent_start, frames);
 	reservation.nr_extents = nr_pages;
 	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
-	if (rc <= 0)
-		return BP_EAGAIN;
+	if (rc <= 0) {
+		ret = BP_EAGAIN;
+		goto out;
+	}
 
 	for (i = 0; i < rc; i++) {
-		page = balloon_retrieve(false);
+		if (ext_pages)
+			page = ext_pages[i];
+		else
+			page = balloon_retrieve(false);
 		BUG_ON(page == NULL);
 
 #ifdef CONFIG_XEN_HAVE_PVMMU
@@ -463,14 +482,14 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 			unsigned long pfn = page_to_pfn(page);
 
-			set_phys_to_machine(pfn, frame_list[i]);
+			set_phys_to_machine(pfn, frames[i]);
 
 			/* Link back into the page tables if not highmem. */
 			if (!PageHighMem(page)) {
 				int ret;
 				ret = HYPERVISOR_update_va_mapping(
 						(unsigned long)__va(pfn << PAGE_SHIFT),
-						mfn_pte(frame_list[i], PAGE_KERNEL),
+						mfn_pte(frames[i], PAGE_KERNEL),
 						0);
 				BUG_ON(ret);
 			}
@@ -478,15 +497,22 @@  static enum bp_state increase_reservation(unsigned long nr_pages)
 #endif
 
 		/* Relinquish the page back to the allocator. */
-		__free_reserved_page(page);
+		if (!ext_pages)
+			__free_reserved_page(page);
 	}
 
-	balloon_stats.current_pages += rc;
+	if (!ext_pages)
+		balloon_stats.current_pages += rc;
 
-	return BP_DONE;
+out:
+	if (frames != frame_list)
+		kfree(frames);
+
+	return ret;
 }
 
-static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
+static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp,
+					  struct page **ext_pages)
 {
 	enum bp_state state = BP_DONE;
 	unsigned long i;
@@ -498,16 +524,26 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 		.domid        = DOMID_SELF
 	};
 	LIST_HEAD(pages);
+	xen_pfn_t *frames;
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	if (nr_pages > ARRAY_SIZE(frame_list)) {
+		frames = kcalloc(nr_pages, sizeof(xen_pfn_t), GFP_KERNEL);
+		if (!frames)
+			return BP_ECANCELED;
+	} else {
+		frames = frame_list;
+	}
 
 	for (i = 0; i < nr_pages; i++) {
-		page = alloc_page(gfp);
-		if (page == NULL) {
-			nr_pages = i;
-			state = BP_EAGAIN;
-			break;
+		if (ext_pages) {
+			page = ext_pages[i];
+		} else {
+			page = alloc_page(gfp);
+			if (page == NULL) {
+				nr_pages = i;
+				state = BP_EAGAIN;
+				break;
+			}
 		}
 		scrub_page(page);
 		list_add(&page->lru, &pages);
@@ -529,7 +565,7 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	i = 0;
 	list_for_each_entry_safe(page, tmp, &pages, lru) {
 		/* XENMEM_decrease_reservation requires a GFN */
-		frame_list[i++] = xen_page_to_gfn(page);
+		frames[i++] = xen_page_to_gfn(page);
 
 #ifdef CONFIG_XEN_HAVE_PVMMU
 		/*
@@ -552,18 +588,22 @@  static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 #endif
 		list_del(&page->lru);
 
-		balloon_append(page);
+		if (!ext_pages)
+			balloon_append(page);
 	}
 
 	flush_tlb_all();
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
+	set_xen_guest_handle(reservation.extent_start, frames);
 	reservation.nr_extents   = nr_pages;
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	BUG_ON(ret != nr_pages);
 
-	balloon_stats.current_pages -= nr_pages;
+	if (!ext_pages)
+		balloon_stats.current_pages -= nr_pages;
 
+	if (frames != frame_list)
+		kfree(frames);
 	return state;
 }
 
@@ -586,13 +626,13 @@  static void balloon_process(struct work_struct *work)
 
 		if (credit > 0) {
 			if (balloon_is_inflated())
-				state = increase_reservation(credit);
+				state = increase_reservation(credit, NULL);
 			else
 				state = reserve_additional_memory();
 		}
 
 		if (credit < 0)
-			state = decrease_reservation(-credit, GFP_BALLOON);
+			state = decrease_reservation(-credit, GFP_BALLOON, NULL);
 
 		state = update_schedule(state);
 
@@ -631,7 +671,7 @@  static int add_ballooned_pages(int nr_pages)
 		}
 	}
 
-	st = decrease_reservation(nr_pages, GFP_USER);
+	st = decrease_reservation(nr_pages, GFP_USER, NULL);
 	if (st != BP_DONE)
 		return -ENOMEM;
 
@@ -710,6 +750,102 @@  void free_xenballooned_pages(int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL(free_xenballooned_pages);
 
+int alloc_dma_xenballooned_pages(struct device *dev, bool coherent,
+				 int nr_pages, struct page **pages,
+				 void **vaddr, dma_addr_t *dev_bus_addr)
+{
+	enum bp_state state;
+	unsigned long pfn, start_pfn;
+	int i, ret;
+
+	mutex_lock(&balloon_mutex);
+
+	balloon_stats.dma_pages += nr_pages;
+
+	if (coherent)
+		*vaddr = dma_alloc_coherent(dev, nr_pages << PAGE_SHIFT,
+					    dev_bus_addr,
+					    GFP_KERNEL | __GFP_NOWARN);
+
+	else
+		*vaddr = dma_alloc_wc(dev, nr_pages << PAGE_SHIFT,
+				      dev_bus_addr,
+				      GFP_KERNEL | __GFP_NOWARN);
+	if (!*vaddr) {
+		pr_err("Failed to allocate DMA buffer of size %d\n",
+		       nr_pages << PAGE_SHIFT);
+		mutex_unlock(&balloon_mutex);
+		return -ENOMEM;
+	}
+
+	start_pfn = __phys_to_pfn(*dev_bus_addr);
+	for (pfn = start_pfn, i = 0; pfn < start_pfn + nr_pages; pfn++, i++)
+		pages[i] = pfn_to_page(pfn);
+
+	state = decrease_reservation(nr_pages, GFP_KERNEL, pages);
+	if (state != BP_DONE) {
+		pr_err("Failed to decrease reservation for DMA buffer\n");
+		ret = -ENOMEM;
+		goto out_undo;
+	}
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = pages[i];
+
+		/*
+		 * We don't support PV MMU when Linux and Xen is using
+		 * different page granularity.
+		 */
+		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
+
+		ret = xen_alloc_p2m_entry(page_to_pfn(page));
+		if (ret < 0)
+			goto out_undo;
+	}
+#endif
+	mutex_unlock(&balloon_mutex);
+	return 0;
+
+out_undo:
+	mutex_unlock(&balloon_mutex);
+	free_dma_xenballooned_pages(dev, coherent, nr_pages, pages,
+				    *vaddr, *dev_bus_addr);
+	return ret;
+}
+EXPORT_SYMBOL(alloc_dma_xenballooned_pages);
+
+void free_dma_xenballooned_pages(struct device *dev, bool coherent,
+				 int nr_pages, struct page **pages,
+				 void *vaddr, dma_addr_t dev_bus_addr)
+{
+	enum bp_state state;
+
+	mutex_lock(&balloon_mutex);
+
+	balloon_stats.dma_pages -= nr_pages;
+
+	state = increase_reservation(nr_pages, pages);
+	if (state != BP_DONE) {
+		pr_err("Failed to increase reservation for DMA buffer\n");
+		goto out;
+	}
+
+	if (vaddr) {
+		if (coherent)
+			dma_free_coherent(dev, nr_pages << PAGE_SHIFT,
+					  vaddr, dev_bus_addr);
+		else
+			dma_free_wc(dev, nr_pages << PAGE_SHIFT,
+				    vaddr, dev_bus_addr);
+	}
+
+out:
+	mutex_unlock(&balloon_mutex);
+}
+EXPORT_SYMBOL(free_dma_xenballooned_pages);
+
+
 static void __init balloon_add_region(unsigned long start_pfn,
 				      unsigned long pages)
 {
@@ -756,6 +892,8 @@  static int __init balloon_init(void)
 	balloon_stats.retry_count = 1;
 	balloon_stats.max_retry_count = RETRY_UNLIMITED;
 
+	balloon_stats.dma_pages = 0;
+
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
 	set_online_page_callback(&xen_online_page);
 	register_memory_notifier(&xen_memory_nb);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 79865b8901ba..62b8c1e4422b 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -123,6 +123,7 @@  subsys_initcall(balloon_init);
 BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
 BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
+BALLOON_SHOW(dma_kb, "%lu\n", PAGES2KB(balloon_stats.dma_pages));
 
 static DEVICE_ULONG_ATTR(schedule_delay, 0444, balloon_stats.schedule_delay);
 static DEVICE_ULONG_ATTR(max_schedule_delay, 0644, balloon_stats.max_schedule_delay);
@@ -205,6 +206,7 @@  static struct attribute *balloon_info_attrs[] = {
 	&dev_attr_current_kb.attr,
 	&dev_attr_low_kb.attr,
 	&dev_attr_high_kb.attr,
+	&dev_attr_dma_kb.attr,
 	NULL
 };
 
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index d1767dfb0d95..eb917aa911e6 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -17,16 +17,25 @@  struct balloon_stats {
 	unsigned long max_schedule_delay;
 	unsigned long retry_count;
 	unsigned long max_retry_count;
+	unsigned long dma_pages;
 };
 
 extern struct balloon_stats balloon_stats;
 
+struct device;
+
 void balloon_set_new_target(unsigned long target);
 
 int alloc_xenballooned_pages(int nr_pages, struct page **pages);
 void free_xenballooned_pages(int nr_pages, struct page **pages);
 
-struct device;
+int alloc_dma_xenballooned_pages(struct device *dev, bool coherent,
+				 int nr_pages, struct page **pages,
+				 void **vaddr, dma_addr_t *dev_bus_addr);
+void free_dma_xenballooned_pages(struct device *dev, bool coherent,
+				 int nr_pages, struct page **pages,
+				 void *vaddr, dma_addr_t dev_bus_addr);
+
 #ifdef CONFIG_XEN_SELFBALLOONING
 extern int register_xen_selfballooning(struct device *dev);
 #else