Message ID | 20180327203904.GA1151@beast (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 27 Mar 2018 13:39:04 -0700 Kees Cook <keescook@chromium.org> wrote: > On the quest to remove all VLAs from the kernel[1] this adjusts several > cases where allocation is made after an array of structures that points > back into the allocation. The allocations are changed to perform explicit > calculations instead of using a Variable Length Array in a structure. > Additionally, this lets Clang compile this code now, since Clang does not > support VLAIS[2]. > > [1] https://lkml.org/lkml/2018/3/7/621 > [2] https://lkml.org/lkml/2013/9/23/500 > > ... > > I not sure the best way to test this. Kconfig implies I need special hardware? Yeah, I was wondering about that. It's a tricky-looking patch. Boaz, are you able to give it a spin? Thanks.
On Tue, Mar 27, 2018 at 1:39 PM Kees Cook <keescook@chromium.org> wrote: > On the quest to remove all VLAs from the kernel[1] this adjusts several > cases where allocation is made after an array of structures that points > back into the allocation. The allocations are changed to perform explicit > calculations instead of using a Variable Length Array in a structure. > Additionally, this lets Clang compile this code now, since Clang does not > support VLAIS[2]. > [1] https://lkml.org/lkml/2018/3/7/621 > [2] https://lkml.org/lkml/2013/9/23/500 > Signed-off-by: Kees Cook <keescook@chromium.org> > --- > I not sure the best way to test this. Kconfig implies I need special hardware? > --- > fs/exofs/ore.c | 84 +++++++++++++++++++++++++++++++---------------------- > fs/exofs/ore_raid.c | 73 ++++++++++++++++++++++++++++++++++------------ > fs/exofs/super.c | 23 +++++++-------- > 3 files changed, 114 insertions(+), 66 deletions(-) > diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c > index 3c6a9c156b7a..cfa862ea19d2 100644 > --- a/fs/exofs/ore.c > +++ b/fs/exofs/ore.c > @@ -146,68 +146,82 @@ int _ore_get_io_state(struct ore_layout *layout, > struct ore_io_state **pios) > { > struct ore_io_state *ios; > - struct page **pages; > - struct osd_sg_entry *sgilist; > + size_t size_ios, size_extra, size_total; > + void *ios_extra; > + > + /* > + * The desired layout looks like this, with the extra_allocation > + * items pointed at from fields within ios or per_dev: > + > struct __alloc_all_io_state { > struct ore_io_state ios; > struct ore_per_dev_state per_dev[numdevs]; > union { > struct osd_sg_entry sglist[sgs_per_dev * numdevs]; > struct page *pages[num_par_pages]; > - }; > - } *_aios; > - > - if (likely(sizeof(*_aios) <= PAGE_SIZE)) { > - _aios = kzalloc(sizeof(*_aios), GFP_KERNEL); > - if (unlikely(!_aios)) { > - ORE_DBGMSG("Failed kzalloc bytes=%zd\n", > - sizeof(*_aios)); > + } extra_allocation; > + } whole_allocation; > + > + */ > + > + /* This should never happen, so abort early if it ever does. */ > + if (sgs_per_dev && num_par_pages) { > + ORE_DBGMSG("Tried to use both pages and sglist\n"); > + *pios = NULL; > + return -EINVAL; > + } > + > + if (numdevs > (INT_MAX - sizeof(*ios)) / > + sizeof(struct ore_per_dev_state)) > + return -ENOMEM; > + size_ios = sizeof(*ios) + sizeof(struct ore_per_dev_state) * numdevs; Can all these invariant checks that return -ENOMEM be grouped together, as it seems we have ... > + > + if (sgs_per_dev * numdevs > INT_MAX / sizeof(struct osd_sg_entry)) > + return -ENOMEM; > + if (num_par_pages > INT_MAX / sizeof(struct page *)) > + return -ENOMEM; ...a whole bunch of single conditions with the same body, can be combined with one: if (a) return d; if (b) return d; if (c) return d; -> if (a || b || c) return d; > + size_extra = max(sizeof(struct osd_sg_entry) * (sgs_per_dev * numdevs), Do you need parens around the sub-expression `(sgs_per_dev * numdevs)`? > + sizeof(struct page *) * num_par_pages); > + > + size_total = size_ios + size_extra; > + > + if (likely(size_total <= PAGE_SIZE)) { > + ios = kzalloc(size_total, GFP_KERNEL); > + if (unlikely(!ios)) { > + ORE_DBGMSG("Failed kzalloc bytes=%zd\n", size_total); > *pios = NULL; > return -ENOMEM; > } > - pages = num_par_pages ? _aios->pages : NULL; > - sgilist = sgs_per_dev ? _aios->sglist : NULL; > - ios = &_aios->ios; > + ios_extra = (char *)ios + size_ios; > } else { > - struct __alloc_small_io_state { > - struct ore_io_state ios; > - struct ore_per_dev_state per_dev[numdevs]; > - } *_aio_small; > - union __extra_part { > - struct osd_sg_entry sglist[sgs_per_dev * numdevs]; > - struct page *pages[num_par_pages]; > - } *extra_part; > - > - _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL); > - if (unlikely(!_aio_small)) { > + ios = kzalloc(size_ios, GFP_KERNEL); > + if (unlikely(!ios)) { > ORE_DBGMSG("Failed alloc first part bytes=%zd\n", > - sizeof(*_aio_small)); > + size_ios); > *pios = NULL; > return -ENOMEM; > } > - extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL); > - if (unlikely(!extra_part)) { > + ios_extra = kzalloc(size_extra, GFP_KERNEL); > + if (unlikely(!ios_extra)) { > ORE_DBGMSG("Failed alloc second part bytes=%zd\n", > - sizeof(*extra_part)); > - kfree(_aio_small); > + size_extra); > + kfree(ios); > *pios = NULL; > return -ENOMEM; > } > - pages = num_par_pages ? extra_part->pages : NULL; > - sgilist = sgs_per_dev ? extra_part->sglist : NULL; > /* In this case the per_dev[0].sgilist holds the pointer to > * be freed > */ > - ios = &_aio_small->ios; > ios->extra_part_alloc = true; > } > - if (pages) { > - ios->parity_pages = pages; > + if (num_par_pages) { > + ios->parity_pages = ios_extra; > ios->max_par_pages = num_par_pages; > } > - if (sgilist) { > + if (sgs_per_dev) { > + struct osd_sg_entry *sgilist = ios_extra; > unsigned d; > for (d = 0; d < numdevs; ++d) { > diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c > index 27cbdb697649..659129d5e9f7 100644 > --- a/fs/exofs/ore_raid.c > +++ b/fs/exofs/ore_raid.c > @@ -71,6 +71,11 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, > { > struct __stripe_pages_2d *sp2d; > unsigned data_devs = group_width - parity; > + > + /* > + * Desired allocation layout is, though when larger than PAGE_SIZE, > + * each struct __alloc_1p_arrays is separately allocated: > + > struct _alloc_all_bytes { > struct __alloc_stripe_pages_2d { > struct __stripe_pages_2d sp2d; > @@ -82,55 +87,85 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, > char page_is_read[data_devs]; > } __a1pa[pages_in_unit]; > } *_aab; > + > struct __alloc_1p_arrays *__a1pa; > struct __alloc_1p_arrays *__a1pa_end; > - const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); > + > + */ > + > + char *__a1pa; > + char *__a1pa_end; > + Just my notes, since this block could use another eye for review: > + const size_t sizeof_stripe_pages_2d = > + sizeof(struct __stripe_pages_2d) + > + sizeof(struct __1_page_stripe) * pages_in_unit; Ok, so this replaces sizeof(_aab->__asp2d). > + const size_t sizeof__a1pa = > + ALIGN(sizeof(struct page *) * (2 * group_width) + data_devs, > + sizeof(void *)); And this replaces sizeof(_aab->__a1pa[0]); > + const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit; This replaces sizeof(_aab->__a1pa[pages_in_unit]); > + const size_t alloc_total = sizeof_stripe_pages_2d + > + sizeof__a1pa_arrays; Replaces sizeof(*_aab); This is the trickiest part of this patch IMO, I think it needs careful review, but looks correct to me. > + > unsigned num_a1pa, alloc_size, i; > /* FIXME: check these numbers in ore_verify_layout */ > - BUG_ON(sizeof(_aab->__asp2d) > PAGE_SIZE); > + BUG_ON(sizeof_stripe_pages_2d > PAGE_SIZE); > BUG_ON(sizeof__a1pa > PAGE_SIZE); > - if (sizeof(*_aab) > PAGE_SIZE) { > - num_a1pa = (PAGE_SIZE - sizeof(_aab->__asp2d)) / sizeof__a1pa; > - alloc_size = sizeof(_aab->__asp2d) + sizeof__a1pa * num_a1pa; > + /* > + * If alloc_total would be larger than PAGE_SIZE, only allocate > + * as many a1pa items as would fill the rest of the page, instead > + * of the full pages_in_unit count. > + */ > + if (alloc_total > PAGE_SIZE) { > + num_a1pa = (PAGE_SIZE - sizeof_stripe_pages_2d) / sizeof__a1pa; > + alloc_size = sizeof_stripe_pages_2d + sizeof__a1pa * num_a1pa; > } else { > num_a1pa = pages_in_unit; > - alloc_size = sizeof(*_aab); > + alloc_size = alloc_total; > } > - _aab = kzalloc(alloc_size, GFP_KERNEL); > - if (unlikely(!_aab)) { > + *psp2d = sp2d = kzalloc(alloc_size, GFP_KERNEL); > + if (unlikely(!sp2d)) { > ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size); > return -ENOMEM; > } > + /* From here Just call _sp2d_free */ > - sp2d = &_aab->__asp2d.sp2d; > - *psp2d = sp2d; /* From here Just call _sp2d_free */ > - > - __a1pa = _aab->__a1pa; > - __a1pa_end = __a1pa + num_a1pa; > + /* Find start of a1pa area. */ > + __a1pa = (char *)sp2d + sizeof_stripe_pages_2d; > + /* Find end of the _allocated_ a1pa area. */ > + __a1pa_end = __a1pa + alloc_size; > + /* Allocate additionally needed a1pa items in PAGE_SIZE chunks. */ > for (i = 0; i < pages_in_unit; ++i) { > if (unlikely(__a1pa >= __a1pa_end)) { > num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, > pages_in_unit - i); > + alloc_size = sizeof__a1pa * num_a1pa; > - __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL); > + __a1pa = kzalloc(alloc_size, GFP_KERNEL); > if (unlikely(!__a1pa)) { > ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", > num_a1pa); > return -ENOMEM; > } > - __a1pa_end = __a1pa + num_a1pa; > + __a1pa_end = __a1pa + alloc_size; > /* First *pages is marked for kfree of the buffer */ > sp2d->_1p_stripes[i].alloc = true; > } > - sp2d->_1p_stripes[i].pages = __a1pa->pages; > - sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; > - sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; > - ++__a1pa; > + /* > + * Attach all _lp_stripes pointers to the allocation for > + * it which was either part of the original PAGE_SIZE > + * allocation or the subsequent allocation in this loop. > + */ > + sp2d->_1p_stripes[i].pages = (void *)__a1pa; > + sp2d->_1p_stripes[i].scribble = > + sp2d->_1p_stripes[i].pages + group_width; > + sp2d->_1p_stripes[i].page_is_read = > + (char *)(sp2d->_1p_stripes[i].scribble + group_width); Can you DRY up `sp2d->_1p_stripes[i]`? ex. struct _1p_stripes* stripe; for (i = 0; i < pages_in_unit; ... ... stripe = &sp2d->_1p_stripes[i]; stripe->pages = (void*)__a1pa; stripe->scribble = stripe->pages + group_width; stripe->page_is_read = (char*)stripe->scribble + group_width; > + __a1pa += sizeof__a1pa; > } > sp2d->parity = parity; > diff --git a/fs/exofs/super.c b/fs/exofs/super.c > index 179cd5c2f52a..f3c29e9326f1 100644 > --- a/fs/exofs/super.c > +++ b/fs/exofs/super.c > @@ -549,27 +549,26 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, > static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, > struct exofs_dev **peds) > { > - struct __alloc_ore_devs_and_exofs_devs { > - /* Twice bigger table: See exofs_init_comps() and comment at > - * exofs_read_lookup_dev_table() > - */ > - struct ore_dev *oreds[numdevs * 2 - 1]; > - struct exofs_dev eds[numdevs]; > - } *aoded; > + /* Twice bigger table: See exofs_init_comps() and comment at > + * exofs_read_lookup_dev_table() > + */ > + size_t numores = numdevs * 2 - 1; const size_t > struct exofs_dev *eds; > unsigned i; > - aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); > - if (unlikely(!aoded)) { > + sbi->oc.ods = kzalloc(numores * sizeof(struct ore_dev *) + > + numdevs * sizeof(struct exofs_dev), GFP_KERNEL); > + if (unlikely(!sbi->oc.ods)) { > EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", > numdevs); > return -ENOMEM; > } > - sbi->oc.ods = aoded->oreds; > - *peds = eds = aoded->eds; > + /* Start of allocated struct exofs_dev entries */ > + *peds = eds = (void *)sbi->oc.ods[numores]; > + /* Initialize pointers into struct exofs_dev */ > for (i = 0; i < numdevs; ++i) > - aoded->oreds[i] = &eds[i].ored; > + sbi->oc.ods[i] = &eds[i].ored; > return 0; > } > -- > 2.7.4 > -- > Kees Cook > Pixel Security The sizeof calculations replacing the VLAIS I pointed out are pretty tricky to follow, but I feel pretty confident about this patch. With the changes I recommend, feel free to add my Reviewed-by tag. It would be good to get additional review and testing from the maintainer. Thanks for taking the time to tackle this. -- Thanks, ~Nick Desaulniers
On Thu, Mar 29, 2018 at 2:32 PM, Nick Desaulniers <ndesaulniers@google.com> wrote: > On Tue, Mar 27, 2018 at 1:39 PM Kees Cook <keescook@chromium.org> wrote: >>[...] >> + if (numdevs > (INT_MAX - sizeof(*ios)) / >> + sizeof(struct ore_per_dev_state)) >> + return -ENOMEM; >> + size_ios = sizeof(*ios) + sizeof(struct ore_per_dev_state) * > numdevs; > > Can all these invariant checks that return -ENOMEM be grouped together, as > it seems we have ... > >> + >> + if (sgs_per_dev * numdevs > INT_MAX / sizeof(struct osd_sg_entry)) >> + return -ENOMEM; >> + if (num_par_pages > INT_MAX / sizeof(struct page *)) >> + return -ENOMEM; > > ...a whole bunch of single conditions with the same body, can be combined > with one: > > if (a) > return d; > if (b) > return d; > if (c) > return d; > > -> > > if (a || b || c) > return d; I find that harder to read, so I let the compiler do the grouping for me. As each "if" maps to a portion of the assignment that follows it, I like it how it is. If there's agreement that they should be grouped, that's fine by me, of course. :) >> + size_extra = max(sizeof(struct osd_sg_entry) * (sgs_per_dev * > numdevs), > > Do you need parens around the sub-expression `(sgs_per_dev * numdevs)`? No, max() correctly protects those. >> diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c >> index 27cbdb697649..659129d5e9f7 100644 >> --- a/fs/exofs/ore_raid.c >> +++ b/fs/exofs/ore_raid.c >> @@ -71,6 +71,11 @@ static int _sp2d_alloc(unsigned pages_in_unit, > unsigned group_width, >> { >> struct __stripe_pages_2d *sp2d; >> unsigned data_devs = group_width - parity; >> + >> + /* >> + * Desired allocation layout is, though when larger than > PAGE_SIZE, >> + * each struct __alloc_1p_arrays is separately allocated: >> + >> struct _alloc_all_bytes { >> struct __alloc_stripe_pages_2d { >> struct __stripe_pages_2d sp2d; >> @@ -82,55 +87,85 @@ static int _sp2d_alloc(unsigned pages_in_unit, > unsigned group_width, >> char page_is_read[data_devs]; >> } __a1pa[pages_in_unit]; >> } *_aab; >> + >> struct __alloc_1p_arrays *__a1pa; >> struct __alloc_1p_arrays *__a1pa_end; >> - const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); >> + >> + */ >> + >> + char *__a1pa; >> + char *__a1pa_end; >> + > > Just my notes, since this block could use another eye for review: Tell me about it. :P These could may be named better, but I'm not familiar with the naming conventions of this code... > >> + const size_t sizeof_stripe_pages_2d = >> + sizeof(struct __stripe_pages_2d) + >> + sizeof(struct __1_page_stripe) * pages_in_unit; > > Ok, so this replaces sizeof(_aab->__asp2d). > >> + const size_t sizeof__a1pa = >> + ALIGN(sizeof(struct page *) * (2 * group_width) + > data_devs, >> + sizeof(void *)); > > And this replaces sizeof(_aab->__a1pa[0]); > >> + const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit; > > This replaces sizeof(_aab->__a1pa[pages_in_unit]); Technically what you've written is still a single element of the array (same as _aab->__a1pa[0] above). This replaces sizeof(_aab->__a1pa) (the entire array size in bytes). > >> + const size_t alloc_total = sizeof_stripe_pages_2d + >> + sizeof__a1pa_arrays; > > Replaces sizeof(*_aab); > > This is the trickiest part of this patch IMO, I think it needs careful > review, but looks correct to me. Thanks! That's why I added a bunch of comments. It was not obvious to me what was happening. >> - sp2d->_1p_stripes[i].pages = __a1pa->pages; >> - sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; >> - sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; >> - ++__a1pa; >> + /* >> + * Attach all _lp_stripes pointers to the allocation for >> + * it which was either part of the original PAGE_SIZE >> + * allocation or the subsequent allocation in this loop. >> + */ >> + sp2d->_1p_stripes[i].pages = (void *)__a1pa; >> + sp2d->_1p_stripes[i].scribble = >> + sp2d->_1p_stripes[i].pages + group_width; >> + sp2d->_1p_stripes[i].page_is_read = >> + (char *)(sp2d->_1p_stripes[i].scribble + > group_width); > > Can you DRY up `sp2d->_1p_stripes[i]`? ex. > > struct _1p_stripes* stripe; > > for (i = 0; i < pages_in_unit; ... > ... > stripe = &sp2d->_1p_stripes[i]; > stripe->pages = (void*)__a1pa; > stripe->scribble = stripe->pages + group_width; > stripe->page_is_read = (char*)stripe->scribble + group_width; Yeah, that could make it more readable. > >> + __a1pa += sizeof__a1pa; >> } > >> sp2d->parity = parity; >> diff --git a/fs/exofs/super.c b/fs/exofs/super.c >> index 179cd5c2f52a..f3c29e9326f1 100644 >> --- a/fs/exofs/super.c >> +++ b/fs/exofs/super.c >> @@ -549,27 +549,26 @@ static int exofs_devs_2_odi(struct > exofs_dt_device_info *dt_dev, >> static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, >> struct exofs_dev **peds) >> { >> - struct __alloc_ore_devs_and_exofs_devs { >> - /* Twice bigger table: See exofs_init_comps() and comment > at >> - * exofs_read_lookup_dev_table() >> - */ >> - struct ore_dev *oreds[numdevs * 2 - 1]; >> - struct exofs_dev eds[numdevs]; >> - } *aoded; >> + /* Twice bigger table: See exofs_init_comps() and comment at >> + * exofs_read_lookup_dev_table() >> + */ >> + size_t numores = numdevs * 2 - 1; > > const size_t Good call. > The sizeof calculations replacing the VLAIS I pointed out are pretty tricky > to follow, but I feel pretty confident about this patch. With the changes > I recommend, feel free to add my Reviewed-by tag. It would be good to get > additional review and testing from the maintainer. Yes, agreed. I'll send a v2 with your suggestions. > Thanks for taking the time to tackle this. Thanks for the review! -Kees
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 3c6a9c156b7a..cfa862ea19d2 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -146,68 +146,82 @@ int _ore_get_io_state(struct ore_layout *layout, struct ore_io_state **pios) { struct ore_io_state *ios; - struct page **pages; - struct osd_sg_entry *sgilist; + size_t size_ios, size_extra, size_total; + void *ios_extra; + + /* + * The desired layout looks like this, with the extra_allocation + * items pointed at from fields within ios or per_dev: + struct __alloc_all_io_state { struct ore_io_state ios; struct ore_per_dev_state per_dev[numdevs]; union { struct osd_sg_entry sglist[sgs_per_dev * numdevs]; struct page *pages[num_par_pages]; - }; - } *_aios; - - if (likely(sizeof(*_aios) <= PAGE_SIZE)) { - _aios = kzalloc(sizeof(*_aios), GFP_KERNEL); - if (unlikely(!_aios)) { - ORE_DBGMSG("Failed kzalloc bytes=%zd\n", - sizeof(*_aios)); + } extra_allocation; + } whole_allocation; + + */ + + /* This should never happen, so abort early if it ever does. */ + if (sgs_per_dev && num_par_pages) { + ORE_DBGMSG("Tried to use both pages and sglist\n"); + *pios = NULL; + return -EINVAL; + } + + if (numdevs > (INT_MAX - sizeof(*ios)) / + sizeof(struct ore_per_dev_state)) + return -ENOMEM; + size_ios = sizeof(*ios) + sizeof(struct ore_per_dev_state) * numdevs; + + if (sgs_per_dev * numdevs > INT_MAX / sizeof(struct osd_sg_entry)) + return -ENOMEM; + if (num_par_pages > INT_MAX / sizeof(struct page *)) + return -ENOMEM; + size_extra = max(sizeof(struct osd_sg_entry) * (sgs_per_dev * numdevs), + sizeof(struct page *) * num_par_pages); + + size_total = size_ios + size_extra; + + if (likely(size_total <= PAGE_SIZE)) { + ios = kzalloc(size_total, GFP_KERNEL); + if (unlikely(!ios)) { + ORE_DBGMSG("Failed kzalloc bytes=%zd\n", size_total); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? _aios->pages : NULL; - sgilist = sgs_per_dev ? _aios->sglist : NULL; - ios = &_aios->ios; + ios_extra = (char *)ios + size_ios; } else { - struct __alloc_small_io_state { - struct ore_io_state ios; - struct ore_per_dev_state per_dev[numdevs]; - } *_aio_small; - union __extra_part { - struct osd_sg_entry sglist[sgs_per_dev * numdevs]; - struct page *pages[num_par_pages]; - } *extra_part; - - _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL); - if (unlikely(!_aio_small)) { + ios = kzalloc(size_ios, GFP_KERNEL); + if (unlikely(!ios)) { ORE_DBGMSG("Failed alloc first part bytes=%zd\n", - sizeof(*_aio_small)); + size_ios); *pios = NULL; return -ENOMEM; } - extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL); - if (unlikely(!extra_part)) { + ios_extra = kzalloc(size_extra, GFP_KERNEL); + if (unlikely(!ios_extra)) { ORE_DBGMSG("Failed alloc second part bytes=%zd\n", - sizeof(*extra_part)); - kfree(_aio_small); + size_extra); + kfree(ios); *pios = NULL; return -ENOMEM; } - pages = num_par_pages ? extra_part->pages : NULL; - sgilist = sgs_per_dev ? extra_part->sglist : NULL; /* In this case the per_dev[0].sgilist holds the pointer to * be freed */ - ios = &_aio_small->ios; ios->extra_part_alloc = true; } - if (pages) { - ios->parity_pages = pages; + if (num_par_pages) { + ios->parity_pages = ios_extra; ios->max_par_pages = num_par_pages; } - if (sgilist) { + if (sgs_per_dev) { + struct osd_sg_entry *sgilist = ios_extra; unsigned d; for (d = 0; d < numdevs; ++d) { diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 27cbdb697649..659129d5e9f7 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -71,6 +71,11 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, { struct __stripe_pages_2d *sp2d; unsigned data_devs = group_width - parity; + + /* + * Desired allocation layout is, though when larger than PAGE_SIZE, + * each struct __alloc_1p_arrays is separately allocated: + struct _alloc_all_bytes { struct __alloc_stripe_pages_2d { struct __stripe_pages_2d sp2d; @@ -82,55 +87,85 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width, char page_is_read[data_devs]; } __a1pa[pages_in_unit]; } *_aab; + struct __alloc_1p_arrays *__a1pa; struct __alloc_1p_arrays *__a1pa_end; - const unsigned sizeof__a1pa = sizeof(_aab->__a1pa[0]); + + */ + + char *__a1pa; + char *__a1pa_end; + + const size_t sizeof_stripe_pages_2d = + sizeof(struct __stripe_pages_2d) + + sizeof(struct __1_page_stripe) * pages_in_unit; + const size_t sizeof__a1pa = + ALIGN(sizeof(struct page *) * (2 * group_width) + data_devs, + sizeof(void *)); + const size_t sizeof__a1pa_arrays = sizeof__a1pa * pages_in_unit; + const size_t alloc_total = sizeof_stripe_pages_2d + + sizeof__a1pa_arrays; + unsigned num_a1pa, alloc_size, i; /* FIXME: check these numbers in ore_verify_layout */ - BUG_ON(sizeof(_aab->__asp2d) > PAGE_SIZE); + BUG_ON(sizeof_stripe_pages_2d > PAGE_SIZE); BUG_ON(sizeof__a1pa > PAGE_SIZE); - if (sizeof(*_aab) > PAGE_SIZE) { - num_a1pa = (PAGE_SIZE - sizeof(_aab->__asp2d)) / sizeof__a1pa; - alloc_size = sizeof(_aab->__asp2d) + sizeof__a1pa * num_a1pa; + /* + * If alloc_total would be larger than PAGE_SIZE, only allocate + * as many a1pa items as would fill the rest of the page, instead + * of the full pages_in_unit count. + */ + if (alloc_total > PAGE_SIZE) { + num_a1pa = (PAGE_SIZE - sizeof_stripe_pages_2d) / sizeof__a1pa; + alloc_size = sizeof_stripe_pages_2d + sizeof__a1pa * num_a1pa; } else { num_a1pa = pages_in_unit; - alloc_size = sizeof(*_aab); + alloc_size = alloc_total; } - _aab = kzalloc(alloc_size, GFP_KERNEL); - if (unlikely(!_aab)) { + *psp2d = sp2d = kzalloc(alloc_size, GFP_KERNEL); + if (unlikely(!sp2d)) { ORE_DBGMSG("!! Failed to alloc sp2d size=%d\n", alloc_size); return -ENOMEM; } + /* From here Just call _sp2d_free */ - sp2d = &_aab->__asp2d.sp2d; - *psp2d = sp2d; /* From here Just call _sp2d_free */ - - __a1pa = _aab->__a1pa; - __a1pa_end = __a1pa + num_a1pa; + /* Find start of a1pa area. */ + __a1pa = (char *)sp2d + sizeof_stripe_pages_2d; + /* Find end of the _allocated_ a1pa area. */ + __a1pa_end = __a1pa + alloc_size; + /* Allocate additionally needed a1pa items in PAGE_SIZE chunks. */ for (i = 0; i < pages_in_unit; ++i) { if (unlikely(__a1pa >= __a1pa_end)) { num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, pages_in_unit - i); + alloc_size = sizeof__a1pa * num_a1pa; - __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL); + __a1pa = kzalloc(alloc_size, GFP_KERNEL); if (unlikely(!__a1pa)) { ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", num_a1pa); return -ENOMEM; } - __a1pa_end = __a1pa + num_a1pa; + __a1pa_end = __a1pa + alloc_size; /* First *pages is marked for kfree of the buffer */ sp2d->_1p_stripes[i].alloc = true; } - sp2d->_1p_stripes[i].pages = __a1pa->pages; - sp2d->_1p_stripes[i].scribble = __a1pa->scribble ; - sp2d->_1p_stripes[i].page_is_read = __a1pa->page_is_read; - ++__a1pa; + /* + * Attach all _lp_stripes pointers to the allocation for + * it which was either part of the original PAGE_SIZE + * allocation or the subsequent allocation in this loop. + */ + sp2d->_1p_stripes[i].pages = (void *)__a1pa; + sp2d->_1p_stripes[i].scribble = + sp2d->_1p_stripes[i].pages + group_width; + sp2d->_1p_stripes[i].page_is_read = + (char *)(sp2d->_1p_stripes[i].scribble + group_width); + __a1pa += sizeof__a1pa; } sp2d->parity = parity; diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 179cd5c2f52a..f3c29e9326f1 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -549,27 +549,26 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, struct exofs_dev **peds) { - struct __alloc_ore_devs_and_exofs_devs { - /* Twice bigger table: See exofs_init_comps() and comment at - * exofs_read_lookup_dev_table() - */ - struct ore_dev *oreds[numdevs * 2 - 1]; - struct exofs_dev eds[numdevs]; - } *aoded; + /* Twice bigger table: See exofs_init_comps() and comment at + * exofs_read_lookup_dev_table() + */ + size_t numores = numdevs * 2 - 1; struct exofs_dev *eds; unsigned i; - aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); - if (unlikely(!aoded)) { + sbi->oc.ods = kzalloc(numores * sizeof(struct ore_dev *) + + numdevs * sizeof(struct exofs_dev), GFP_KERNEL); + if (unlikely(!sbi->oc.ods)) { EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", numdevs); return -ENOMEM; } - sbi->oc.ods = aoded->oreds; - *peds = eds = aoded->eds; + /* Start of allocated struct exofs_dev entries */ + *peds = eds = (void *)sbi->oc.ods[numores]; + /* Initialize pointers into struct exofs_dev */ for (i = 0; i < numdevs; ++i) - aoded->oreds[i] = &eds[i].ored; + sbi->oc.ods[i] = &eds[i].ored; return 0; }
On the quest to remove all VLAs from the kernel[1] this adjusts several cases where allocation is made after an array of structures that points back into the allocation. The allocations are changed to perform explicit calculations instead of using a Variable Length Array in a structure. Additionally, this lets Clang compile this code now, since Clang does not support VLAIS[2]. [1] https://lkml.org/lkml/2018/3/7/621 [2] https://lkml.org/lkml/2013/9/23/500 Signed-off-by: Kees Cook <keescook@chromium.org> --- I not sure the best way to test this. Kconfig implies I need special hardware? --- fs/exofs/ore.c | 84 +++++++++++++++++++++++++++++++---------------------- fs/exofs/ore_raid.c | 73 ++++++++++++++++++++++++++++++++++------------ fs/exofs/super.c | 23 +++++++-------- 3 files changed, 114 insertions(+), 66 deletions(-)