Message ID | Yo0LwmVUDSBZb44K@itl-email (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen/gntdev: Avoid blocking in unmap_grant_pages() | expand |
On 24.05.22 18:45, Demi Marie Obenour wrote: > unmap_grant_pages() currently waits for the pages to no longer be used. > In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a > deadlock against i915: i915 was waiting for gntdev's MMU notifier to > finish, while gntdev was waiting for i915 to free its pages. I also > believe this is responsible for various deadlocks I have experienced in > the past. > > Avoid these problems by making unmap_grant_pages async. This requires > making it return void, as any errors will not be available when the > function returns. Fortunately, the only use of the return value is a > WARN_ON(). Replace this with WARN_ON()s where errors are detected. Not all callers of unmap_grant_pages() are issuing a WARN_ON(). Are you sure that this change can't result in a flood of WARN()s? Please note that you are modifying the semantics in case of an unmap operation returning an error. Previously there were no further unmaps done in this case, while now you are basically continue unmapping even after hitting an error. This seems to be fine, but is worth mentioning in the commit message. > > Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") > Cc: stable@vger.kernel.org > Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com> > --- > drivers/xen/gntdev-common.h | 4 ++ > drivers/xen/gntdev.c | 82 ++++++++++++++++++------------------- > 2 files changed, 45 insertions(+), 41 deletions(-) > > diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h > index 20d7d059dadb..a6e2805ea2ce 100644 > --- a/drivers/xen/gntdev-common.h > +++ b/drivers/xen/gntdev-common.h > @@ -16,6 +16,7 @@ > #include <linux/mmu_notifier.h> > #include <linux/types.h> > #include <xen/interface/event_channel.h> > +#include <xen/grant_table.h> > > struct gntdev_dmabuf_priv; > > @@ -73,6 +74,9 @@ struct gntdev_grant_map { > /* Needed to avoid allocation in gnttab_dma_free_pages(). */ > xen_pfn_t *frames; > #endif > + > + /* Needed to avoid allocation in __unmap_grant_pages */ > + struct gntab_unmap_queue_data unmap_data; > }; > > struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > index 59ffea800079..670d800e4a89 100644 > --- a/drivers/xen/gntdev.c > +++ b/drivers/xen/gntdev.c > @@ -62,8 +62,8 @@ MODULE_PARM_DESC(limit, > > static int use_ptemod; > > -static int unmap_grant_pages(struct gntdev_grant_map *map, > - int offset, int pages); > +static void unmap_grant_pages(struct gntdev_grant_map *map, > + int offset, int pages); > > static struct miscdevice gntdev_miscdev; > > @@ -349,61 +349,65 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) > return err; > } > > -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, > - int pages) > +static void __unmap_grant_pages_done(int result, > + struct gntab_unmap_queue_data *data) > { > - int i, err = 0; > - struct gntab_unmap_queue_data unmap_data; > - > - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { > - int pgno = (map->notify.addr >> PAGE_SHIFT); > - if (pgno >= offset && pgno < offset + pages) { > - /* No need for kmap, pages are in lowmem */ > - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); > - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; > - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > - } > - } > - > - unmap_data.unmap_ops = map->unmap_ops + offset; > - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; > - unmap_data.pages = map->pages + offset; > - unmap_data.count = pages; > - > - err = gnttab_unmap_refs_sync(&unmap_data); > - if (err) > - return err; > + int i; Mind using unsigned int instead? > + struct gntdev_grant_map *map = data->data; > + int offset = data->unmap_ops - map->unmap_ops; > > - for (i = 0; i < pages; i++) { > - if (map->unmap_ops[offset+i].status) > - err = -EINVAL; > + for (i = 0; i < data->count; i++) { > + WARN_ON(map->unmap_ops[offset+i].status); > pr_debug("unmap handle=%d st=%d\n", > map->unmap_ops[offset+i].handle, > map->unmap_ops[offset+i].status); > map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; > if (use_ptemod) { > - if (map->kunmap_ops[offset+i].status) > - err = -EINVAL; > + WARN_ON(map->kunmap_ops[offset+i].status); > pr_debug("kunmap handle=%u st=%d\n", > map->kunmap_ops[offset+i].handle, > map->kunmap_ops[offset+i].status); > map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; > } > } > - return err; > } > > -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, > - int pages) > +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, > + int pages) > +{ > + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { > + int pgno = (map->notify.addr >> PAGE_SHIFT); > + > + if (pgno >= offset && pgno < offset + pages) { > + /* No need for kmap, pages are in lowmem */ > + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); > + > + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; > + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > + } > + } > + > + map->unmap_data.unmap_ops = map->unmap_ops + offset; > + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; > + map->unmap_data.pages = map->pages + offset; > + map->unmap_data.count = pages; > + map->unmap_data.done = __unmap_grant_pages_done; > + map->unmap_data.data = map; I think you need to call refcount_inc(&map->users) here and do the related gntdev_put_map() call in __unmap_grant_pages_done(). Otherwise you are risking to have map freed before the async operation has finished. > + > + gnttab_unmap_refs_async(&map->unmap_data); > +} > + > +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, > + int pages) > { > - int range, err = 0; > + int range; > > pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); > > /* It is possible the requested range will have a "hole" where we > * already unmapped some of the grants. Only unmap valid ranges. > */ > - while (pages && !err) { > + while (pages) { > while (pages && > map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { > offset++; > @@ -416,12 +420,10 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, > break; > range++; > } > - err = __unmap_grant_pages(map, offset, range); > + __unmap_grant_pages(map, offset, range); > offset += range; > pages -= range; > } > - > - return err; > } > > /* ------------------------------------------------------------------ */ > @@ -473,7 +475,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, > struct gntdev_grant_map *map = > container_of(mn, struct gntdev_grant_map, notifier); > unsigned long mstart, mend; > - int err; > > if (!mmu_notifier_range_blockable(range)) > return false; > @@ -494,10 +495,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, > map->index, map->count, > map->vma->vm_start, map->vma->vm_end, > range->start, range->end, mstart, mend); > - err = unmap_grant_pages(map, > + unmap_grant_pages(map, > (mstart - map->vma->vm_start) >> PAGE_SHIFT, > (mend - mstart) >> PAGE_SHIFT); > - WARN_ON(err); > > return true; > } Juergen
On Wed, May 25, 2022 at 11:17:39AM +0200, Juergen Gross wrote: > On 24.05.22 18:45, Demi Marie Obenour wrote: > > unmap_grant_pages() currently waits for the pages to no longer be used. > > In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a > > deadlock against i915: i915 was waiting for gntdev's MMU notifier to > > finish, while gntdev was waiting for i915 to free its pages. I also > > believe this is responsible for various deadlocks I have experienced in > > the past. > > > > Avoid these problems by making unmap_grant_pages async. This requires > > making it return void, as any errors will not be available when the > > function returns. Fortunately, the only use of the return value is a > > WARN_ON(). Replace this with WARN_ON()s where errors are detected. > > Not all callers of unmap_grant_pages() are issuing a WARN_ON(). Are you > sure that this change can't result in a flood of WARN()s? It probably can, but for a different reason. Since the actual unmap is now asynchronous, the handle is set to INVALID_GRANT_HANDLE too late to prevent double unmaps. I will fix this by using a separate bool array in v2. Another option is to (ab)use the paddding bits in the hypercall struct, but that seems too fragile. > Please note that you are modifying the semantics in case of an unmap > operation returning an error. Previously there were no further unmaps > done in this case, while now you are basically continue unmapping even > after hitting an error. This seems to be fine, but is worth mentioning > in the commit message. Will fix in v2, thanks. > > Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") > > Cc: stable@vger.kernel.org > > Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com> > > --- > > drivers/xen/gntdev-common.h | 4 ++ > > drivers/xen/gntdev.c | 82 ++++++++++++++++++------------------- > > 2 files changed, 45 insertions(+), 41 deletions(-) > > > > diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h > > index 20d7d059dadb..a6e2805ea2ce 100644 > > --- a/drivers/xen/gntdev-common.h > > +++ b/drivers/xen/gntdev-common.h > > @@ -16,6 +16,7 @@ > > #include <linux/mmu_notifier.h> > > #include <linux/types.h> > > #include <xen/interface/event_channel.h> > > +#include <xen/grant_table.h> > > struct gntdev_dmabuf_priv; > > @@ -73,6 +74,9 @@ struct gntdev_grant_map { > > /* Needed to avoid allocation in gnttab_dma_free_pages(). */ > > xen_pfn_t *frames; > > #endif > > + > > + /* Needed to avoid allocation in __unmap_grant_pages */ > > + struct gntab_unmap_queue_data unmap_data; > > }; > > struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, > > diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c > > index 59ffea800079..670d800e4a89 100644 > > --- a/drivers/xen/gntdev.c > > +++ b/drivers/xen/gntdev.c > > @@ -62,8 +62,8 @@ MODULE_PARM_DESC(limit, > > static int use_ptemod; > > -static int unmap_grant_pages(struct gntdev_grant_map *map, > > - int offset, int pages); > > +static void unmap_grant_pages(struct gntdev_grant_map *map, > > + int offset, int pages); > > static struct miscdevice gntdev_miscdev; > > @@ -349,61 +349,65 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) > > return err; > > } > > -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, > > - int pages) > > +static void __unmap_grant_pages_done(int result, > > + struct gntab_unmap_queue_data *data) > > { > > - int i, err = 0; > > - struct gntab_unmap_queue_data unmap_data; > > - > > - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { > > - int pgno = (map->notify.addr >> PAGE_SHIFT); > > - if (pgno >= offset && pgno < offset + pages) { > > - /* No need for kmap, pages are in lowmem */ > > - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); > > - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; > > - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > > - } > > - } > > - > > - unmap_data.unmap_ops = map->unmap_ops + offset; > > - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; > > - unmap_data.pages = map->pages + offset; > > - unmap_data.count = pages; > > - > > - err = gnttab_unmap_refs_sync(&unmap_data); > > - if (err) > > - return err; > > + int i; > > Mind using unsigned int instead? Not at all. I used int because that is what the original code used, but an unsigned type is definitely better. > > + struct gntdev_grant_map *map = data->data; > > + int offset = data->unmap_ops - map->unmap_ops; > > - for (i = 0; i < pages; i++) { > > - if (map->unmap_ops[offset+i].status) > > - err = -EINVAL; > > + for (i = 0; i < data->count; i++) { > > + WARN_ON(map->unmap_ops[offset+i].status); > pr_debug("unmap handle=%d st=%d\n", > > map->unmap_ops[offset+i].handle, > > map->unmap_ops[offset+i].status); > > map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; > > if (use_ptemod) { > > - if (map->kunmap_ops[offset+i].status) > > - err = -EINVAL; > > + WARN_ON(map->kunmap_ops[offset+i].status); > > pr_debug("kunmap handle=%u st=%d\n", > > map->kunmap_ops[offset+i].handle, > > map->kunmap_ops[offset+i].status); > > map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; > > } > > } > > - return err; > > } > > -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, > > - int pages) > > +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, > > + int pages) > > +{ > > + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { > > + int pgno = (map->notify.addr >> PAGE_SHIFT); > > + > > + if (pgno >= offset && pgno < offset + pages) { > > + /* No need for kmap, pages are in lowmem */ > > + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); > > + > > + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; > > + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; > > + } > > + } > > + > > + map->unmap_data.unmap_ops = map->unmap_ops + offset; > > + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; > > + map->unmap_data.pages = map->pages + offset; > > + map->unmap_data.count = pages; > > + map->unmap_data.done = __unmap_grant_pages_done; > > + map->unmap_data.data = map; > > I think you need to call refcount_inc(&map->users) here and do the related > gntdev_put_map() call in __unmap_grant_pages_done(). Otherwise you are risking > to have map freed before the async operation has finished. Whoops, good catch. Will fix in v2. > > + > > + gnttab_unmap_refs_async(&map->unmap_data); > > +} > > + > > +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, > > + int pages) > > { > > - int range, err = 0; > > + int range; > > pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); > > /* It is possible the requested range will have a "hole" where we > > * already unmapped some of the grants. Only unmap valid ranges. > > */ > > - while (pages && !err) { > > + while (pages) { > > while (pages && > > map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { > > offset++; > > @@ -416,12 +420,10 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, > > break; > > range++; > > } > > - err = __unmap_grant_pages(map, offset, range); > > + __unmap_grant_pages(map, offset, range); > > offset += range; > > pages -= range; > > } > > - > > - return err; > > } > > /* ------------------------------------------------------------------ */ > > @@ -473,7 +475,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, > > struct gntdev_grant_map *map = > > container_of(mn, struct gntdev_grant_map, notifier); > > unsigned long mstart, mend; > > - int err; > > if (!mmu_notifier_range_blockable(range)) > > return false; > > @@ -494,10 +495,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, > > map->index, map->count, > > map->vma->vm_start, map->vma->vm_end, > > range->start, range->end, mstart, mend); > > - err = unmap_grant_pages(map, > > + unmap_grant_pages(map, > > (mstart - map->vma->vm_start) >> PAGE_SHIFT, > > (mend - mstart) >> PAGE_SHIFT); > > - WARN_ON(err); > > return true; > > }
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 20d7d059dadb..a6e2805ea2ce 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -16,6 +16,7 @@ #include <linux/mmu_notifier.h> #include <linux/types.h> #include <xen/interface/event_channel.h> +#include <xen/grant_table.h> struct gntdev_dmabuf_priv; @@ -73,6 +74,9 @@ struct gntdev_grant_map { /* Needed to avoid allocation in gnttab_dma_free_pages(). */ xen_pfn_t *frames; #endif + + /* Needed to avoid allocation in __unmap_grant_pages */ + struct gntab_unmap_queue_data unmap_data; }; struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 59ffea800079..670d800e4a89 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -62,8 +62,8 @@ MODULE_PARM_DESC(limit, static int use_ptemod; -static int unmap_grant_pages(struct gntdev_grant_map *map, - int offset, int pages); +static void unmap_grant_pages(struct gntdev_grant_map *map, + int offset, int pages); static struct miscdevice gntdev_miscdev; @@ -349,61 +349,65 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) return err; } -static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages_done(int result, + struct gntab_unmap_queue_data *data) { - int i, err = 0; - struct gntab_unmap_queue_data unmap_data; - - if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { - int pgno = (map->notify.addr >> PAGE_SHIFT); - if (pgno >= offset && pgno < offset + pages) { - /* No need for kmap, pages are in lowmem */ - uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); - tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; - map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; - } - } - - unmap_data.unmap_ops = map->unmap_ops + offset; - unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; - unmap_data.pages = map->pages + offset; - unmap_data.count = pages; - - err = gnttab_unmap_refs_sync(&unmap_data); - if (err) - return err; + int i; + struct gntdev_grant_map *map = data->data; + int offset = data->unmap_ops - map->unmap_ops; - for (i = 0; i < pages; i++) { - if (map->unmap_ops[offset+i].status) - err = -EINVAL; + for (i = 0; i < data->count; i++) { + WARN_ON(map->unmap_ops[offset+i].status); pr_debug("unmap handle=%d st=%d\n", map->unmap_ops[offset+i].handle, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { - if (map->kunmap_ops[offset+i].status) - err = -EINVAL; + WARN_ON(map->kunmap_ops[offset+i].status); pr_debug("kunmap handle=%u st=%d\n", map->kunmap_ops[offset+i].handle, map->kunmap_ops[offset+i].status); map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } - return err; } -static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, - int pages) +static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) +{ + if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { + int pgno = (map->notify.addr >> PAGE_SHIFT); + + if (pgno >= offset && pgno < offset + pages) { + /* No need for kmap, pages are in lowmem */ + uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno])); + + tmp[map->notify.addr & (PAGE_SIZE-1)] = 0; + map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE; + } + } + + map->unmap_data.unmap_ops = map->unmap_ops + offset; + map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + map->unmap_data.pages = map->pages + offset; + map->unmap_data.count = pages; + map->unmap_data.done = __unmap_grant_pages_done; + map->unmap_data.data = map; + + gnttab_unmap_refs_async(&map->unmap_data); +} + +static void unmap_grant_pages(struct gntdev_grant_map *map, int offset, + int pages) { - int range, err = 0; + int range; pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages); /* It is possible the requested range will have a "hole" where we * already unmapped some of the grants. Only unmap valid ranges. */ - while (pages && !err) { + while (pages) { while (pages && map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) { offset++; @@ -416,12 +420,10 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, break; range++; } - err = __unmap_grant_pages(map, offset, range); + __unmap_grant_pages(map, offset, range); offset += range; pages -= range; } - - return err; } /* ------------------------------------------------------------------ */ @@ -473,7 +475,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; - int err; if (!mmu_notifier_range_blockable(range)) return false; @@ -494,10 +495,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, map->index, map->count, map->vma->vm_start, map->vma->vm_end, range->start, range->end, mstart, mend); - err = unmap_grant_pages(map, + unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); - WARN_ON(err); return true; }
unmap_grant_pages() currently waits for the pages to no longer be used. In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a deadlock against i915: i915 was waiting for gntdev's MMU notifier to finish, while gntdev was waiting for i915 to free its pages. I also believe this is responsible for various deadlocks I have experienced in the past. Avoid these problems by making unmap_grant_pages async. This requires making it return void, as any errors will not be available when the function returns. Fortunately, the only use of the return value is a WARN_ON(). Replace this with WARN_ON()s where errors are detected. Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use") Cc: stable@vger.kernel.org Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com> --- drivers/xen/gntdev-common.h | 4 ++ drivers/xen/gntdev.c | 82 ++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 41 deletions(-)