Message ID | 20220311223028.1865-1-beaub@linux.microsoft.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | user_events: Use alloc_pages instead of kzalloc for register pages | expand |
On Fri, 11 Mar 2022 14:30:28 -0800 Beau Belgrave <beaub@linux.microsoft.com> wrote: > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > index 2b5e9fdb63a0..59c900789757 100644 > --- a/kernel/trace/trace_events_user.c > +++ b/kernel/trace/trace_events_user.c > @@ -1587,16 +1587,20 @@ static void set_page_reservations(bool set) > static int __init trace_events_user_init(void) > { > int ret; > + struct page *register_pages; The int ret should come last. > > /* Zero all bits beside 0 (which is reserved for failures) */ > bitmap_zero(page_bitmap, MAX_EVENTS); > set_bit(0, page_bitmap); > > - register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); > + register_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, > + get_order(MAX_EVENTS)); > > - if (!register_page_data) > + if (!register_pages) > return -ENOMEM; > > + register_page_data = page_address(register_pages); > + > set_page_reservations(true); > > ret = create_user_tracefs(); > @@ -1604,7 +1608,7 @@ static int __init trace_events_user_init(void) > if (ret) { > pr_warn("user_events could not register with tracefs\n"); > set_page_reservations(false); > - kfree(register_page_data); > + __free_pages(register_pages, get_order(MAX_EVENTS)); > return ret; > } > I tried it slightly differently. Why waist bits if MAX_EVENTS is greater than the order. That is, make MAX_EVENTS depend on the order, not the other way around. -- Steve diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 4febc1d6ae72..6941d0794347 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -30,9 +30,10 @@ /* * Limits how many trace_event calls user processes can create: - * Must be multiple of PAGE_SIZE. + * Must be a power of two of PAGE_SIZE. */ -#define MAX_PAGES 1 +#define MAX_PAGE_ORDER 0 +#define MAX_PAGES (1 << MAX_PAGE_ORDER) #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE) /* Limit how long of an event name plus args within the subsystem. */ @@ -1606,41 +1607,25 @@ static int create_user_tracefs(void) return -ENODEV; } -static void set_page_reservations(bool set) -{ - int page; - - for (page = 0; page < MAX_PAGES; ++page) { - void *addr = register_page_data + (PAGE_SIZE * page); - - if (set) - SetPageReserved(virt_to_page(addr)); - else - ClearPageReserved(virt_to_page(addr)); - } -} - static int __init trace_events_user_init(void) { + struct page pages; int ret; /* Zero all bits beside 0 (which is reserved for failures) */ bitmap_zero(page_bitmap, MAX_EVENTS); set_bit(0, page_bitmap); - register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); - - if (!register_page_data) + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER); + if (!pages) return -ENOMEM; - - set_page_reservations(true); + register_page_data = page_address(pages); ret = create_user_tracefs(); if (ret) { pr_warn("user_events could not register with tracefs\n"); - set_page_reservations(false); - kfree(register_page_data); + free_page((unsigned long)register_page_data); return ret; }
On Fri, 11 Mar 2022 18:33:43 -0500 Steven Rostedt <rostedt@goodmis.org> wrote: > I tried it slightly differently. Why waist bits if MAX_EVENTS is greater > than the order. That is, make MAX_EVENTS depend on the order, not the other > way around. Here's a version that keeps the reserving part as well as some bug fixes (I didn't even compile the previous version ;-) -- Steve diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 4febc1d6ae72..e10ad057e797 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -30,9 +30,10 @@ /* * Limits how many trace_event calls user processes can create: - * Must be multiple of PAGE_SIZE. + * Must be a power of two of PAGE_SIZE. */ -#define MAX_PAGES 1 +#define MAX_PAGE_ORDER 0 +#define MAX_PAGES (1 << MAX_PAGE_ORDER) #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE) /* Limit how long of an event name plus args within the subsystem. */ @@ -1622,16 +1623,17 @@ static void set_page_reservations(bool set) static int __init trace_events_user_init(void) { + struct page *pages; int ret; /* Zero all bits beside 0 (which is reserved for failures) */ bitmap_zero(page_bitmap, MAX_EVENTS); set_bit(0, page_bitmap); - register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); - - if (!register_page_data) + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER); + if (!pages) return -ENOMEM; + register_page_data = page_address(pages); set_page_reservations(true); @@ -1640,7 +1642,7 @@ static int __init trace_events_user_init(void) if (ret) { pr_warn("user_events could not register with tracefs\n"); set_page_reservations(false); - kfree(register_page_data); + __free_pages(pages, MAX_PAGE_ORDER); return ret; }
On Fri, Mar 11, 2022 at 06:44:40PM -0500, Steven Rostedt wrote: > On Fri, 11 Mar 2022 18:33:43 -0500 > Steven Rostedt <rostedt@goodmis.org> wrote: > > > I tried it slightly differently. Why waist bits if MAX_EVENTS is greater > > than the order. That is, make MAX_EVENTS depend on the order, not the other > > way around. > > Here's a version that keeps the reserving part as well as some bug fixes (I > didn't even compile the previous version ;-) > > -- Steve > > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > index 4febc1d6ae72..e10ad057e797 100644 > --- a/kernel/trace/trace_events_user.c > +++ b/kernel/trace/trace_events_user.c > @@ -30,9 +30,10 @@ > > /* > * Limits how many trace_event calls user processes can create: > - * Must be multiple of PAGE_SIZE. > + * Must be a power of two of PAGE_SIZE. > */ > -#define MAX_PAGES 1 > +#define MAX_PAGE_ORDER 0 > +#define MAX_PAGES (1 << MAX_PAGE_ORDER) > #define MAX_EVENTS (MAX_PAGES * PAGE_SIZE) > > /* Limit how long of an event name plus args within the subsystem. */ > @@ -1622,16 +1623,17 @@ static void set_page_reservations(bool set) > > static int __init trace_events_user_init(void) > { > + struct page *pages; > int ret; > > /* Zero all bits beside 0 (which is reserved for failures) */ > bitmap_zero(page_bitmap, MAX_EVENTS); > set_bit(0, page_bitmap); > > - register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); > - > - if (!register_page_data) > + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER); > + if (!pages) > return -ENOMEM; > + register_page_data = page_address(pages); > > set_page_reservations(true); > > @@ -1640,7 +1642,7 @@ static int __init trace_events_user_init(void) > if (ret) { > pr_warn("user_events could not register with tracefs\n"); > set_page_reservations(false); > - kfree(register_page_data); > + __free_pages(pages, MAX_PAGE_ORDER); > return ret; > } > This looks good to me, I agree having the max events aligning to page order makes more sense going forward. Thanks, -Beau
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 2b5e9fdb63a0..59c900789757 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1587,16 +1587,20 @@ static void set_page_reservations(bool set) static int __init trace_events_user_init(void) { int ret; + struct page *register_pages; /* Zero all bits beside 0 (which is reserved for failures) */ bitmap_zero(page_bitmap, MAX_EVENTS); set_bit(0, page_bitmap); - register_page_data = kzalloc(MAX_EVENTS, GFP_KERNEL); + register_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(MAX_EVENTS)); - if (!register_page_data) + if (!register_pages) return -ENOMEM; + register_page_data = page_address(register_pages); + set_page_reservations(true); ret = create_user_tracefs(); @@ -1604,7 +1608,7 @@ static int __init trace_events_user_init(void) if (ret) { pr_warn("user_events could not register with tracefs\n"); set_page_reservations(false); - kfree(register_page_data); + __free_pages(register_pages, get_order(MAX_EVENTS)); return ret; }
kzalloc virtual addresses do not work with SetPageReserved, use the actual page virtual addresses instead via alloc_pages. The issue is reported when booting with user_events and DEBUG_VM_PGFLAGS=y. Link: https://lore.kernel.org/linux-trace-devel/CADYN=9+xY5Vku3Ws5E9S60SM5dCFfeGeRBkmDFbcxX0ZMoFing@mail.gmail.com/#R Reported-by: Anders Roxell <anders.roxell@linaro.org> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com> --- kernel/trace/trace_events_user.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) base-commit: 864ea0e10cc90416a01b46f0d47a6f26dc020820