Message ID | 20180413212427.18261-1-labbott@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
G'day Laura, On 14/04/2018 05:24, Laura Abbott wrote: > The new challenge is to remove VLAs from the kernel > (see https://lkml.org/lkml/2018/3/7/621) to eventually > turn on -Wvla. > > Using a kmalloc array is the easy way to fix this but kmalloc is still > more expensive than stack allocation. Introduce a fast path with a > fixed size stack array to cover most chip with gpios below some fixed > amount. The slow path dynamically allocates an array to cover those > chips with a large number of gpios. > > Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> > Signed-off-by: Lukas Wunner <lukas@wunner.de> > Signed-off-by: Laura Abbott <labbott@redhat.com> > --- > v5: Dropped some outdated comments and extra whitespace. Switched to > ARCH_NR_GPIOS per suggestion of Linus Walleij. > --- > drivers/gpio/gpiolib.c | 76 +++++++++++++++++++++++++++++++++---------- > drivers/gpio/gpiolib.h | 2 +- > include/linux/gpio/consumer.h | 10 +++--- > 3 files changed, 66 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c > index d66de67ef307..79ec7a29b684 100644 > --- a/drivers/gpio/gpiolib.c > +++ b/drivers/gpio/gpiolib.c > @@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = { > .name = "gpio", > }; > > +/* > + * Number of GPIOs to use for the fast path in set array > + */ > +#define FASTPATH_NGPIO ARCH_NR_GPIOS > + > /* gpio_lock prevents conflicts during gpio_desc[] table updates. > * While any GPIO is requested, its gpio_chip is not removable; > * each GPIO's "requested" flag serves as a lock and refcount. > @@ -399,12 +404,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, > vals[i] = !!ghd.values[i]; > > /* Reuse the array setting function */ > - gpiod_set_array_value_complex(false, > + return gpiod_set_array_value_complex(false, > true, > lh->numdescs, > lh->descs, > vals); > - return 0; > } > return -EINVAL; > } > @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, > goto err_free_descs; > } > > + if (chip->ngpio > FASTPATH_NGPIO) > + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", > + chip->ngpio, FASTPATH_NGPIO); > + > gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL); > if (!gdev->label) { > status = -ENOMEM; > @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, > > while (i < array_size) { > struct gpio_chip *chip = desc_array[i]->gdev->chip; > - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; > - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; > + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; > + unsigned long *mask, *bits; > int first, j, ret; > > + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { > + memset(fastpath, 0, sizeof(fastpath)); > + mask = fastpath; > + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); Previously it looks like just mask was zeroed. So could this just be: memset(mask, 0, BITS_TO_LONGS(chip->ngpio)); I'm guessing it's not a huge additional overhead as it is, but it's more in line with what was there. > + } else { > + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), > + sizeof(*mask), > + can_sleep ? GFP_KERNEL : GFP_ATOMIC); > + if (!mask) > + return -ENOMEM; > + bits = mask + BITS_TO_LONGS(chip->ngpio); > + } > + > if (!can_sleep) > WARN_ON(chip->can_sleep); > > /* collect all inputs belonging to the same chip */ > first = i; > - memset(mask, 0, sizeof(mask)); > do { > const struct gpio_desc *desc = desc_array[i]; > int hwgpio = gpio_chip_hwgpio(desc); > @@ -2682,8 +2702,11 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, > (desc_array[i]->gdev->chip == chip)); > > ret = gpio_chip_get_multiple(chip, mask, bits); > - if (ret) > + if (ret) { > + if (mask != fastpath) > + kfree(mask); > return ret; > + } > > for (j = first; j < i; j++) { > const struct gpio_desc *desc = desc_array[j]; > @@ -2695,6 +2718,9 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, > value_array[j] = value; > trace_gpio_value(desc_to_gpio(desc), 1, value); > } > + > + if (mask != fastpath) > + kfree(mask); > } > return 0; > } > @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, > } > } > > -void gpiod_set_array_value_complex(bool raw, bool can_sleep, > +int gpiod_set_array_value_complex(bool raw, bool can_sleep, > unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array) > @@ -2887,14 +2913,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, > > while (i < array_size) { > struct gpio_chip *chip = desc_array[i]->gdev->chip; > - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; > - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; > + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; > + unsigned long *mask, *bits; > int count = 0; > > + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { > + memset(fastpath, 0, sizeof(fastpath)); > + mask = fastpath; > + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); > + } else { > + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), > + sizeof(*mask), > + can_sleep ? GFP_KERNEL : GFP_ATOMIC); > + if (!mask) > + return -ENOMEM; > + bits = mask + BITS_TO_LONGS(chip->ngpio); > + } > + > if (!can_sleep) > WARN_ON(chip->can_sleep); > > - memset(mask, 0, sizeof(mask)); > do { > struct gpio_desc *desc = desc_array[i]; > int hwgpio = gpio_chip_hwgpio(desc); > @@ -2925,7 +2963,11 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, > /* push collected bits to outputs */ > if (count != 0) > gpio_chip_set_multiple(chip, mask, bits); > + > + if (mask != fastpath) > + kfree(mask); > } > + return 0; > } > > /** > @@ -3000,13 +3042,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value); > * This function should be called from contexts where we cannot sleep, and will > * complain if the GPIO chip functions potentially sleep. > */ > -void gpiod_set_raw_array_value(unsigned int array_size, > +int gpiod_set_raw_array_value(unsigned int array_size, > struct gpio_desc **desc_array, int *value_array) > { > if (!desc_array) > - return; > - gpiod_set_array_value_complex(true, false, array_size, desc_array, > - value_array); > + return -EINVAL; > + return gpiod_set_array_value_complex(true, false, array_size, > + desc_array, value_array); > } > EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); > > @@ -3326,14 +3368,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); > * > * This function is to be called from contexts that can sleep. > */ > -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, > +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array) > { > might_sleep_if(extra_checks); > if (!desc_array) > - return; > - gpiod_set_array_value_complex(true, true, array_size, desc_array, > + return -EINVAL; > + return gpiod_set_array_value_complex(true, true, array_size, desc_array, > value_array); > } > EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); > diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h > index b17ec6795c81..b64813e3876e 100644 > --- a/drivers/gpio/gpiolib.h > +++ b/drivers/gpio/gpiolib.h > @@ -188,7 +188,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, > unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > -void gpiod_set_array_value_complex(bool raw, bool can_sleep, > +int gpiod_set_array_value_complex(bool raw, bool can_sleep, > unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h > index dbd065963296..243112c7fa7d 100644 > --- a/include/linux/gpio/consumer.h > +++ b/include/linux/gpio/consumer.h > @@ -116,7 +116,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > void gpiod_set_raw_value(struct gpio_desc *desc, int value); > -void gpiod_set_raw_array_value(unsigned int array_size, > +int gpiod_set_raw_array_value(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > > @@ -134,7 +134,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); > -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, > +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array); > > @@ -369,12 +369,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) > /* GPIO can never have been requested */ > WARN_ON(1); > } > -static inline void gpiod_set_raw_array_value(unsigned int array_size, > +static inline int gpiod_set_raw_array_value(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array) > { > /* GPIO can never have been requested */ > WARN_ON(1); > + return 0; > } > > static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) > @@ -423,12 +424,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, > /* GPIO can never have been requested */ > WARN_ON(1); > } > -static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, > +static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, > struct gpio_desc **desc_array, > int *value_array) > { > /* GPIO can never have been requested */ > WARN_ON(1); > + return 0; > } > > static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) >
G'day Laura, One more comment. On 16/04/2018 12:41, Phil Reid wrote: > G'day Laura, > > On 14/04/2018 05:24, Laura Abbott wrote: >> The new challenge is to remove VLAs from the kernel >> (see https://lkml.org/lkml/2018/3/7/621) to eventually >> turn on -Wvla. >> >> Using a kmalloc array is the easy way to fix this but kmalloc is still >> more expensive than stack allocation. Introduce a fast path with a >> fixed size stack array to cover most chip with gpios below some fixed >> amount. The slow path dynamically allocates an array to cover those >> chips with a large number of gpios. >> >> Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> >> Signed-off-by: Lukas Wunner <lukas@wunner.de> >> Signed-off-by: Laura Abbott <labbott@redhat.com> >> --- >> v5: Dropped some outdated comments and extra whitespace. Switched to >> ARCH_NR_GPIOS per suggestion of Linus Walleij. >> --- >> drivers/gpio/gpiolib.c | 76 +++++++++++++++++++++++++++++++++---------- >> drivers/gpio/gpiolib.h | 2 +- >> include/linux/gpio/consumer.h | 10 +++--- >> 3 files changed, 66 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c >> index d66de67ef307..79ec7a29b684 100644 >> --- a/drivers/gpio/gpiolib.c >> +++ b/drivers/gpio/gpiolib.c >> @@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = { >> .name = "gpio", >> }; >> +/* >> + * Number of GPIOs to use for the fast path in set array >> + */ >> +#define FASTPATH_NGPIO ARCH_NR_GPIOS Also wouldn't this mean that fast path will never be triggered now... >> + >> /* gpio_lock prevents conflicts during gpio_desc[] table updates. >> * While any GPIO is requested, its gpio_chip is not removable; >> * each GPIO's "requested" flag serves as a lock and refcount. >> @@ -399,12 +404,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, >> vals[i] = !!ghd.values[i]; >> /* Reuse the array setting function */ >> - gpiod_set_array_value_complex(false, >> + return gpiod_set_array_value_complex(false, >> true, >> lh->numdescs, >> lh->descs, >> vals); >> - return 0; >> } >> return -EINVAL; >> } >> @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, >> goto err_free_descs; >> } >> + if (chip->ngpio > FASTPATH_NGPIO) >> + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", >> + chip->ngpio, FASTPATH_NGPIO); >> + >> gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL); >> if (!gdev->label) { >> status = -ENOMEM; >> @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >> while (i < array_size) { >> struct gpio_chip *chip = desc_array[i]->gdev->chip; >> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >> + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; >> + unsigned long *mask, *bits; >> int first, j, ret; >> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >> + memset(fastpath, 0, sizeof(fastpath)); >> + mask = fastpath; >> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); > Previously it looks like just mask was zeroed. > So could this just be: > memset(mask, 0, BITS_TO_LONGS(chip->ngpio)); > > I'm guessing it's not a huge additional overhead as it is, but it's more in line with what was there. > > >> + } else { >> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >> + sizeof(*mask), >> + can_sleep ? GFP_KERNEL : GFP_ATOMIC); >> + if (!mask) >> + return -ENOMEM; >> + bits = mask + BITS_TO_LONGS(chip->ngpio); >> + } >> + >> if (!can_sleep) >> WARN_ON(chip->can_sleep); >> /* collect all inputs belonging to the same chip */ >> first = i; >> - memset(mask, 0, sizeof(mask)); >> do { >> const struct gpio_desc *desc = desc_array[i]; >> int hwgpio = gpio_chip_hwgpio(desc); >> @@ -2682,8 +2702,11 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >> (desc_array[i]->gdev->chip == chip)); >> ret = gpio_chip_get_multiple(chip, mask, bits); >> - if (ret) >> + if (ret) { >> + if (mask != fastpath) >> + kfree(mask); >> return ret; >> + } >> for (j = first; j < i; j++) { >> const struct gpio_desc *desc = desc_array[j]; >> @@ -2695,6 +2718,9 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >> value_array[j] = value; >> trace_gpio_value(desc_to_gpio(desc), 1, value); >> } >> + >> + if (mask != fastpath) >> + kfree(mask); >> } >> return 0; >> } >> @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, >> } >> } >> -void gpiod_set_array_value_complex(bool raw, bool can_sleep, >> +int gpiod_set_array_value_complex(bool raw, bool can_sleep, >> unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array) >> @@ -2887,14 +2913,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, >> while (i < array_size) { >> struct gpio_chip *chip = desc_array[i]->gdev->chip; >> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >> + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; >> + unsigned long *mask, *bits; >> int count = 0; >> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >> + memset(fastpath, 0, sizeof(fastpath)); >> + mask = fastpath; >> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >> + } else { >> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >> + sizeof(*mask), >> + can_sleep ? GFP_KERNEL : GFP_ATOMIC); >> + if (!mask) >> + return -ENOMEM; >> + bits = mask + BITS_TO_LONGS(chip->ngpio); >> + } >> + >> if (!can_sleep) >> WARN_ON(chip->can_sleep); >> - memset(mask, 0, sizeof(mask)); >> do { >> struct gpio_desc *desc = desc_array[i]; >> int hwgpio = gpio_chip_hwgpio(desc); >> @@ -2925,7 +2963,11 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, >> /* push collected bits to outputs */ >> if (count != 0) >> gpio_chip_set_multiple(chip, mask, bits); >> + >> + if (mask != fastpath) >> + kfree(mask); >> } >> + return 0; >> } >> /** >> @@ -3000,13 +3042,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value); >> * This function should be called from contexts where we cannot sleep, and will >> * complain if the GPIO chip functions potentially sleep. >> */ >> -void gpiod_set_raw_array_value(unsigned int array_size, >> +int gpiod_set_raw_array_value(unsigned int array_size, >> struct gpio_desc **desc_array, int *value_array) >> { >> if (!desc_array) >> - return; >> - gpiod_set_array_value_complex(true, false, array_size, desc_array, >> - value_array); >> + return -EINVAL; >> + return gpiod_set_array_value_complex(true, false, array_size, >> + desc_array, value_array); >> } >> EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); >> @@ -3326,14 +3368,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); >> * >> * This function is to be called from contexts that can sleep. >> */ >> -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array) >> { >> might_sleep_if(extra_checks); >> if (!desc_array) >> - return; >> - gpiod_set_array_value_complex(true, true, array_size, desc_array, >> + return -EINVAL; >> + return gpiod_set_array_value_complex(true, true, array_size, desc_array, >> value_array); >> } >> EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); >> diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h >> index b17ec6795c81..b64813e3876e 100644 >> --- a/drivers/gpio/gpiolib.h >> +++ b/drivers/gpio/gpiolib.h >> @@ -188,7 +188,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >> unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> -void gpiod_set_array_value_complex(bool raw, bool can_sleep, >> +int gpiod_set_array_value_complex(bool raw, bool can_sleep, >> unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h >> index dbd065963296..243112c7fa7d 100644 >> --- a/include/linux/gpio/consumer.h >> +++ b/include/linux/gpio/consumer.h >> @@ -116,7 +116,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> void gpiod_set_raw_value(struct gpio_desc *desc, int value); >> -void gpiod_set_raw_array_value(unsigned int array_size, >> +int gpiod_set_raw_array_value(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> @@ -134,7 +134,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); >> -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array); >> @@ -369,12 +369,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) >> /* GPIO can never have been requested */ >> WARN_ON(1); >> } >> -static inline void gpiod_set_raw_array_value(unsigned int array_size, >> +static inline int gpiod_set_raw_array_value(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array) >> { >> /* GPIO can never have been requested */ >> WARN_ON(1); >> + return 0; >> } >> static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) >> @@ -423,12 +424,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, >> /* GPIO can never have been requested */ >> WARN_ON(1); >> } >> -static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> +static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >> struct gpio_desc **desc_array, >> int *value_array) >> { >> /* GPIO can never have been requested */ >> WARN_ON(1); >> + return 0; >> } >> static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) >> > >
On 16/04/2018 13:19, Phil Reid wrote: > G'day Laura, > > One more comment. > On 16/04/2018 12:41, Phil Reid wrote: >> G'day Laura, >> >> On 14/04/2018 05:24, Laura Abbott wrote: >>> The new challenge is to remove VLAs from the kernel >>> (see https://lkml.org/lkml/2018/3/7/621) to eventually >>> turn on -Wvla. >>> >>> Using a kmalloc array is the easy way to fix this but kmalloc is still >>> more expensive than stack allocation. Introduce a fast path with a >>> fixed size stack array to cover most chip with gpios below some fixed >>> amount. The slow path dynamically allocates an array to cover those >>> chips with a large number of gpios. >>> >>> Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> >>> Signed-off-by: Lukas Wunner <lukas@wunner.de> >>> Signed-off-by: Laura Abbott <labbott@redhat.com> >>> --- >>> v5: Dropped some outdated comments and extra whitespace. Switched to >>> ARCH_NR_GPIOS per suggestion of Linus Walleij. >>> --- >>> drivers/gpio/gpiolib.c | 76 +++++++++++++++++++++++++++++++++---------- >>> drivers/gpio/gpiolib.h | 2 +- >>> include/linux/gpio/consumer.h | 10 +++--- >>> 3 files changed, 66 insertions(+), 22 deletions(-) >>> >>> diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c >>> index d66de67ef307..79ec7a29b684 100644 >>> --- a/drivers/gpio/gpiolib.c >>> +++ b/drivers/gpio/gpiolib.c >>> @@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = { >>> .name = "gpio", >>> }; >>> +/* >>> + * Number of GPIOs to use for the fast path in set array >>> + */ >>> +#define FASTPATH_NGPIO ARCH_NR_GPIOS > > Also wouldn't this mean that fast path will never be triggered now... Just to be clearer. That this will always be true. (chip->ngpio <= FASTPATH_NGPIO) > > >>> + >>> /* gpio_lock prevents conflicts during gpio_desc[] table updates. >>> * While any GPIO is requested, its gpio_chip is not removable; >>> * each GPIO's "requested" flag serves as a lock and refcount. >>> @@ -399,12 +404,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, >>> vals[i] = !!ghd.values[i]; >>> /* Reuse the array setting function */ >>> - gpiod_set_array_value_complex(false, >>> + return gpiod_set_array_value_complex(false, >>> true, >>> lh->numdescs, >>> lh->descs, >>> vals); >>> - return 0; >>> } >>> return -EINVAL; >>> } >>> @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, >>> goto err_free_descs; >>> } >>> + if (chip->ngpio > FASTPATH_NGPIO) >>> + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", >>> + chip->ngpio, FASTPATH_NGPIO); >>> + >>> gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL); >>> if (!gdev->label) { >>> status = -ENOMEM; >>> @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >>> while (i < array_size) { >>> struct gpio_chip *chip = desc_array[i]->gdev->chip; >>> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >>> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >>> + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; >>> + unsigned long *mask, *bits; >>> int first, j, ret; >>> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >>> + memset(fastpath, 0, sizeof(fastpath)); >>> + mask = fastpath; >>> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >> Previously it looks like just mask was zeroed. >> So could this just be: >> memset(mask, 0, BITS_TO_LONGS(chip->ngpio)); >> >> I'm guessing it's not a huge additional overhead as it is, but it's more in line with what was there. >> >> >>> + } else { >>> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >>> + sizeof(*mask), >>> + can_sleep ? GFP_KERNEL : GFP_ATOMIC); >>> + if (!mask) >>> + return -ENOMEM; >>> + bits = mask + BITS_TO_LONGS(chip->ngpio); >>> + } >>> + >>> if (!can_sleep) >>> WARN_ON(chip->can_sleep); >>> /* collect all inputs belonging to the same chip */ >>> first = i; >>> - memset(mask, 0, sizeof(mask)); >>> do { >>> const struct gpio_desc *desc = desc_array[i]; >>> int hwgpio = gpio_chip_hwgpio(desc); >>> @@ -2682,8 +2702,11 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >>> (desc_array[i]->gdev->chip == chip)); >>> ret = gpio_chip_get_multiple(chip, mask, bits); >>> - if (ret) >>> + if (ret) { >>> + if (mask != fastpath) >>> + kfree(mask); >>> return ret; >>> + } >>> for (j = first; j < i; j++) { >>> const struct gpio_desc *desc = desc_array[j]; >>> @@ -2695,6 +2718,9 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >>> value_array[j] = value; >>> trace_gpio_value(desc_to_gpio(desc), 1, value); >>> } >>> + >>> + if (mask != fastpath) >>> + kfree(mask); >>> } >>> return 0; >>> } >>> @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, >>> } >>> } >>> -void gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> +int gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array) >>> @@ -2887,14 +2913,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> while (i < array_size) { >>> struct gpio_chip *chip = desc_array[i]->gdev->chip; >>> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >>> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >>> + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; >>> + unsigned long *mask, *bits; >>> int count = 0; >>> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >>> + memset(fastpath, 0, sizeof(fastpath)); >>> + mask = fastpath; >>> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >>> + } else { >>> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >>> + sizeof(*mask), >>> + can_sleep ? GFP_KERNEL : GFP_ATOMIC); >>> + if (!mask) >>> + return -ENOMEM; >>> + bits = mask + BITS_TO_LONGS(chip->ngpio); >>> + } >>> + >>> if (!can_sleep) >>> WARN_ON(chip->can_sleep); >>> - memset(mask, 0, sizeof(mask)); >>> do { >>> struct gpio_desc *desc = desc_array[i]; >>> int hwgpio = gpio_chip_hwgpio(desc); >>> @@ -2925,7 +2963,11 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> /* push collected bits to outputs */ >>> if (count != 0) >>> gpio_chip_set_multiple(chip, mask, bits); >>> + >>> + if (mask != fastpath) >>> + kfree(mask); >>> } >>> + return 0; >>> } >>> /** >>> @@ -3000,13 +3042,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value); >>> * This function should be called from contexts where we cannot sleep, and will >>> * complain if the GPIO chip functions potentially sleep. >>> */ >>> -void gpiod_set_raw_array_value(unsigned int array_size, >>> +int gpiod_set_raw_array_value(unsigned int array_size, >>> struct gpio_desc **desc_array, int *value_array) >>> { >>> if (!desc_array) >>> - return; >>> - gpiod_set_array_value_complex(true, false, array_size, desc_array, >>> - value_array); >>> + return -EINVAL; >>> + return gpiod_set_array_value_complex(true, false, array_size, >>> + desc_array, value_array); >>> } >>> EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); >>> @@ -3326,14 +3368,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); >>> * >>> * This function is to be called from contexts that can sleep. >>> */ >>> -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array) >>> { >>> might_sleep_if(extra_checks); >>> if (!desc_array) >>> - return; >>> - gpiod_set_array_value_complex(true, true, array_size, desc_array, >>> + return -EINVAL; >>> + return gpiod_set_array_value_complex(true, true, array_size, desc_array, >>> value_array); >>> } >>> EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); >>> diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h >>> index b17ec6795c81..b64813e3876e 100644 >>> --- a/drivers/gpio/gpiolib.h >>> +++ b/drivers/gpio/gpiolib.h >>> @@ -188,7 +188,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >>> unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> -void gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> +int gpiod_set_array_value_complex(bool raw, bool can_sleep, >>> unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h >>> index dbd065963296..243112c7fa7d 100644 >>> --- a/include/linux/gpio/consumer.h >>> +++ b/include/linux/gpio/consumer.h >>> @@ -116,7 +116,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> void gpiod_set_raw_value(struct gpio_desc *desc, int value); >>> -void gpiod_set_raw_array_value(unsigned int array_size, >>> +int gpiod_set_raw_array_value(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> @@ -134,7 +134,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); >>> -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array); >>> @@ -369,12 +369,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) >>> /* GPIO can never have been requested */ >>> WARN_ON(1); >>> } >>> -static inline void gpiod_set_raw_array_value(unsigned int array_size, >>> +static inline int gpiod_set_raw_array_value(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array) >>> { >>> /* GPIO can never have been requested */ >>> WARN_ON(1); >>> + return 0; >>> } >>> static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) >>> @@ -423,12 +424,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, >>> /* GPIO can never have been requested */ >>> WARN_ON(1); >>> } >>> -static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> +static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, >>> struct gpio_desc **desc_array, >>> int *value_array) >>> { >>> /* GPIO can never have been requested */ >>> WARN_ON(1); >>> + return 0; >>> } >>> static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) >>> >> >> > >
Hi Laura, Thanks for your patch! On Fri, Apr 13, 2018 at 11:24 PM, Laura Abbott <labbott@redhat.com> wrote: > The new challenge is to remove VLAs from the kernel > (see https://lkml.org/lkml/2018/3/7/621) to eventually > turn on -Wvla. > > Using a kmalloc array is the easy way to fix this but kmalloc is still > more expensive than stack allocation. Introduce a fast path with a > fixed size stack array to cover most chip with gpios below some fixed > amount. The slow path dynamically allocates an array to cover those > chips with a large number of gpios. Blindly replacing VLAs by allocated arrays is IMHO not such a good solution. On the largest systems, NR_GPIOS is 2048, so that makes 2 arrays of 256 bytes. That's an uppper limit, and assumes they are all on the same gpiochip, which they probably aren't. Still, 2 x 256 bytes is a lot, so I agree it should be fixed. So, wouldn't it make more sense to not allocate memory, but just process the request in chunks (say, at most 128 gpios per chunk, i.e. 2 x 16 bytes)? The code already caters for handling chunks due to not all gpios belonging to the same gpiochip. That will probably also be faster than allocating memory, which is the main idea behind this API. > Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> > Signed-off-by: Lukas Wunner <lukas@wunner.de> > Signed-off-by: Laura Abbott <labbott@redhat.com> > --- a/drivers/gpio/gpiolib.c > +++ b/drivers/gpio/gpiolib.c > @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, > goto err_free_descs; > } > > + if (chip->ngpio > FASTPATH_NGPIO) > + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", > + chip->ngpio, FASTPATH_NGPIO); FWIW, can this warning be triggered from userspace? > @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, > > while (i < array_size) { > struct gpio_chip *chip = desc_array[i]->gdev->chip; > - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; > - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; Hence just use a fixed size here... > + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; > + unsigned long *mask, *bits; > int first, j, ret; > > + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { > + memset(fastpath, 0, sizeof(fastpath)); > + mask = fastpath; > + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); > + } else { > + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), > + sizeof(*mask), > + can_sleep ? GFP_KERNEL : GFP_ATOMIC); > + if (!mask) > + return -ENOMEM; > + bits = mask + BITS_TO_LONGS(chip->ngpio); > + } > + > if (!can_sleep) > WARN_ON(chip->can_sleep); > > /* collect all inputs belonging to the same chip */ > first = i; > - memset(mask, 0, sizeof(mask)); > do { > const struct gpio_desc *desc = desc_array[i]; > int hwgpio = gpio_chip_hwgpio(desc); Out-of-context, the code does: | __set_bit(hwgpio, mask); | i++; | } while ((i < array_size) && ... and change this limit to "(i < min(array_size, first + ARRAY_SIZE(mask) * BITS_PER_BYTE))" | (desc_array[i]->gdev->chip == chip)); ... and you're done? > @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, > } > } > > -void gpiod_set_array_value_complex(bool raw, bool can_sleep, > +int gpiod_set_array_value_complex(bool raw, bool can_sleep, Same here. Gr{oetje,eeting}s, Geert
On 04/20/2018 02:02 AM, Geert Uytterhoeven wrote: > Hi Laura, > > Thanks for your patch! > > On Fri, Apr 13, 2018 at 11:24 PM, Laura Abbott <labbott@redhat.com> wrote: >> The new challenge is to remove VLAs from the kernel >> (see https://lkml.org/lkml/2018/3/7/621) to eventually >> turn on -Wvla. >> >> Using a kmalloc array is the easy way to fix this but kmalloc is still >> more expensive than stack allocation. Introduce a fast path with a >> fixed size stack array to cover most chip with gpios below some fixed >> amount. The slow path dynamically allocates an array to cover those >> chips with a large number of gpios. > > Blindly replacing VLAs by allocated arrays is IMHO not such a good solution. > On the largest systems, NR_GPIOS is 2048, so that makes 2 arrays of 256 > bytes. That's an uppper limit, and assumes they are all on the same gpiochip, > which they probably aren't. > > Still, 2 x 256 bytes is a lot, so I agree it should be fixed. > > So, wouldn't it make more sense to not allocate memory, but just process > the request in chunks (say, at most 128 gpios per chunk, i.e. 2 x > 16 bytes)? The code already caters for handling chunks due to not all gpios > belonging to the same gpiochip. That will probably also be faster than > allocating memory, which is the main idea behind this API. > >> Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> >> Signed-off-by: Lukas Wunner <lukas@wunner.de> >> Signed-off-by: Laura Abbott <labbott@redhat.com> > >> --- a/drivers/gpio/gpiolib.c >> +++ b/drivers/gpio/gpiolib.c > >> @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, >> goto err_free_descs; >> } >> >> + if (chip->ngpio > FASTPATH_NGPIO) >> + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", >> + chip->ngpio, FASTPATH_NGPIO); > > FWIW, can this warning be triggered from userspace? > >> @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, >> >> while (i < array_size) { >> struct gpio_chip *chip = desc_array[i]->gdev->chip; >> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; > > Hence just use a fixed size here... > >> + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; >> + unsigned long *mask, *bits; >> int first, j, ret; >> >> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >> + memset(fastpath, 0, sizeof(fastpath)); >> + mask = fastpath; >> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >> + } else { >> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >> + sizeof(*mask), >> + can_sleep ? GFP_KERNEL : GFP_ATOMIC); >> + if (!mask) >> + return -ENOMEM; >> + bits = mask + BITS_TO_LONGS(chip->ngpio); >> + } >> + >> if (!can_sleep) >> WARN_ON(chip->can_sleep); >> >> /* collect all inputs belonging to the same chip */ >> first = i; >> - memset(mask, 0, sizeof(mask)); >> do { >> const struct gpio_desc *desc = desc_array[i]; >> int hwgpio = gpio_chip_hwgpio(desc); > > Out-of-context, the code does: > > | __set_bit(hwgpio, mask); > | i++; > | } while ((i < array_size) && > > ... and change this limit to "(i < min(array_size, first + > ARRAY_SIZE(mask) * BITS_PER_BYTE))" > > | (desc_array[i]->gdev->chip == chip)); > > ... and you're done? > I don't think this approach will work since gpio_chip_{get,set}_multiple expect to be working on arrays for the entire chip. There doesn't seem to be a nice way to work on a subset of GPIOs without defeating the point of the multiple API. is 2*256 = 512 bytes really too much stack space? I guess we could switch to a Kconfig to allow for better bounds. Thanks, Laura >> @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, >> } >> } >> >> -void gpiod_set_array_value_complex(bool raw, bool can_sleep, >> +int gpiod_set_array_value_complex(bool raw, bool can_sleep, > > Same here. > > Gr{oetje,eeting}s, > > Geert >
Hi Laura, On Tue, May 15, 2018 at 12:49 AM, Laura Abbott <labbott@redhat.com> wrote: > On 04/20/2018 02:02 AM, Geert Uytterhoeven wrote: >> On Fri, Apr 13, 2018 at 11:24 PM, Laura Abbott <labbott@redhat.com> wrote: >>> The new challenge is to remove VLAs from the kernel >>> (see https://lkml.org/lkml/2018/3/7/621) to eventually >>> turn on -Wvla. >>> >>> Using a kmalloc array is the easy way to fix this but kmalloc is still >>> more expensive than stack allocation. Introduce a fast path with a >>> fixed size stack array to cover most chip with gpios below some fixed >>> amount. The slow path dynamically allocates an array to cover those >>> chips with a large number of gpios. >> >> >> Blindly replacing VLAs by allocated arrays is IMHO not such a good >> solution. >> On the largest systems, NR_GPIOS is 2048, so that makes 2 arrays of 256 >> bytes. That's an uppper limit, and assumes they are all on the same >> gpiochip, >> which they probably aren't. >> >> Still, 2 x 256 bytes is a lot, so I agree it should be fixed. >> >> So, wouldn't it make more sense to not allocate memory, but just process >> the request in chunks (say, at most 128 gpios per chunk, i.e. 2 x >> 16 bytes)? The code already caters for handling chunks due to not all >> gpios >> belonging to the same gpiochip. That will probably also be faster than >> allocating memory, which is the main idea behind this API. >> >>> Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> >>> Signed-off-by: Lukas Wunner <lukas@wunner.de> >>> Signed-off-by: Laura Abbott <labbott@redhat.com> >> >> >>> --- a/drivers/gpio/gpiolib.c >>> +++ b/drivers/gpio/gpiolib.c >> >> >>> @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip >>> *chip, void *data, >>> goto err_free_descs; >>> } >>> >>> + if (chip->ngpio > FASTPATH_NGPIO) >>> + chip_warn(chip, "line cnt %d is greater than fast path >>> cnt %d\n", >>> + chip->ngpio, FASTPATH_NGPIO); >> >> >> FWIW, can this warning be triggered from userspace? >> >>> @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool >>> can_sleep, >>> >>> while (i < array_size) { >>> struct gpio_chip *chip = desc_array[i]->gdev->chip; >>> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >>> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >> >> >> Hence just use a fixed size here... >> >>> + unsigned long fastpath[2 * >>> BITS_TO_LONGS(FASTPATH_NGPIO)]; >>> + unsigned long *mask, *bits; >>> int first, j, ret; >>> >>> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >>> + memset(fastpath, 0, sizeof(fastpath)); >>> + mask = fastpath; >>> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >>> + } else { >>> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >>> + sizeof(*mask), >>> + can_sleep ? GFP_KERNEL : >>> GFP_ATOMIC); >>> + if (!mask) >>> + return -ENOMEM; >>> + bits = mask + BITS_TO_LONGS(chip->ngpio); >>> + } >>> + >>> if (!can_sleep) >>> WARN_ON(chip->can_sleep); >>> >>> /* collect all inputs belonging to the same chip */ >>> first = i; >>> - memset(mask, 0, sizeof(mask)); >>> do { >>> const struct gpio_desc *desc = desc_array[i]; >>> int hwgpio = gpio_chip_hwgpio(desc); >> >> >> Out-of-context, the code does: >> >> | __set_bit(hwgpio, mask); >> | i++; >> | } while ((i < array_size) && >> >> ... and change this limit to "(i < min(array_size, first + >> ARRAY_SIZE(mask) * BITS_PER_BYTE))" >> >> | (desc_array[i]->gdev->chip == chip)); >> >> ... and you're done? >> > I don't think this approach will work since gpio_chip_{get,set}_multiple > expect to be working on arrays for the entire chip. There doesn't seem > to be a nice way to work on a subset of GPIOs without defeating the point > of the multiple API. You're right, sorry for missing that. > is 2*256 = 512 bytes really too much stack space? I guess we could > switch to a Kconfig to allow for better bounds. That's a good question. As long as a VLA is used, it's probably fine, as chip->ngpio is quite small. If you would change it to an array that can accommodate NR_GPIOS bits, you have to start caring about recursion (e.g. gpio-74x164 driven from spi-gpio, where I can extend the chain to increase the level of recursion arbitrarily). Gr{oetje,eeting}s, Geert
On 15/05/2018 15:10, Geert Uytterhoeven wrote: > Hi Laura, > > On Tue, May 15, 2018 at 12:49 AM, Laura Abbott <labbott@redhat.com> wrote: >> On 04/20/2018 02:02 AM, Geert Uytterhoeven wrote: >>> On Fri, Apr 13, 2018 at 11:24 PM, Laura Abbott <labbott@redhat.com> wrote: >>>> The new challenge is to remove VLAs from the kernel >>>> (see https://lkml.org/lkml/2018/3/7/621) to eventually >>>> turn on -Wvla. >>>> >>>> Using a kmalloc array is the easy way to fix this but kmalloc is still >>>> more expensive than stack allocation. Introduce a fast path with a >>>> fixed size stack array to cover most chip with gpios below some fixed >>>> amount. The slow path dynamically allocates an array to cover those >>>> chips with a large number of gpios. >>> >>> >>> Blindly replacing VLAs by allocated arrays is IMHO not such a good >>> solution. >>> On the largest systems, NR_GPIOS is 2048, so that makes 2 arrays of 256 >>> bytes. That's an uppper limit, and assumes they are all on the same >>> gpiochip, >>> which they probably aren't. >>> >>> Still, 2 x 256 bytes is a lot, so I agree it should be fixed. >>> >>> So, wouldn't it make more sense to not allocate memory, but just process >>> the request in chunks (say, at most 128 gpios per chunk, i.e. 2 x >>> 16 bytes)? The code already caters for handling chunks due to not all >>> gpios >>> belonging to the same gpiochip. That will probably also be faster than >>> allocating memory, which is the main idea behind this API. >>> >>>> Reviewed-and-tested-by: Lukas Wunner <lukas@wunner.de> >>>> Signed-off-by: Lukas Wunner <lukas@wunner.de> >>>> Signed-off-by: Laura Abbott <labbott@redhat.com> >>> >>> >>>> --- a/drivers/gpio/gpiolib.c >>>> +++ b/drivers/gpio/gpiolib.c >>> >>> >>>> @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip >>>> *chip, void *data, >>>> goto err_free_descs; >>>> } >>>> >>>> + if (chip->ngpio > FASTPATH_NGPIO) >>>> + chip_warn(chip, "line cnt %d is greater than fast path >>>> cnt %d\n", >>>> + chip->ngpio, FASTPATH_NGPIO); >>> >>> >>> FWIW, can this warning be triggered from userspace? >>> >>>> @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool >>>> can_sleep, >>>> >>>> while (i < array_size) { >>>> struct gpio_chip *chip = desc_array[i]->gdev->chip; >>>> - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; >>>> - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; >>> >>> >>> Hence just use a fixed size here... >>> >>>> + unsigned long fastpath[2 * >>>> BITS_TO_LONGS(FASTPATH_NGPIO)]; >>>> + unsigned long *mask, *bits; >>>> int first, j, ret; >>>> >>>> + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { >>>> + memset(fastpath, 0, sizeof(fastpath)); >>>> + mask = fastpath; >>>> + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); >>>> + } else { >>>> + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), >>>> + sizeof(*mask), >>>> + can_sleep ? GFP_KERNEL : >>>> GFP_ATOMIC); >>>> + if (!mask) >>>> + return -ENOMEM; >>>> + bits = mask + BITS_TO_LONGS(chip->ngpio); >>>> + } >>>> + >>>> if (!can_sleep) >>>> WARN_ON(chip->can_sleep); >>>> >>>> /* collect all inputs belonging to the same chip */ >>>> first = i; >>>> - memset(mask, 0, sizeof(mask)); >>>> do { >>>> const struct gpio_desc *desc = desc_array[i]; >>>> int hwgpio = gpio_chip_hwgpio(desc); >>> >>> >>> Out-of-context, the code does: >>> >>> | __set_bit(hwgpio, mask); >>> | i++; >>> | } while ((i < array_size) && >>> >>> ... and change this limit to "(i < min(array_size, first + >>> ARRAY_SIZE(mask) * BITS_PER_BYTE))" >>> >>> | (desc_array[i]->gdev->chip == chip)); >>> >>> ... and you're done? >>> >> I don't think this approach will work since gpio_chip_{get,set}_multiple >> expect to be working on arrays for the entire chip. There doesn't seem >> to be a nice way to work on a subset of GPIOs without defeating the point >> of the multiple API. > > You're right, sorry for missing that. > >> is 2*256 = 512 bytes really too much stack space? I guess we could >> switch to a Kconfig to allow for better bounds. > > That's a good question. > > As long as a VLA is used, it's probably fine, as chip->ngpio is quite small. > If you would change it to an array that can accommodate NR_GPIOS bits, you > have to start caring about recursion (e.g. gpio-74x164 driven from spi-gpio, > where I can extend the chain to increase the level of recursion arbitrarily). > I think a config option for FASTPATH_NGPIO is preferable. As I've mentioned ARCH_NR_GPIOS is much greater than any chip->ngpio on my platform. It's at least one order of magnitude, almost 2.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d66de67ef307..79ec7a29b684 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -61,6 +61,11 @@ static struct bus_type gpio_bus_type = { .name = "gpio", }; +/* + * Number of GPIOs to use for the fast path in set array + */ +#define FASTPATH_NGPIO ARCH_NR_GPIOS + /* gpio_lock prevents conflicts during gpio_desc[] table updates. * While any GPIO is requested, its gpio_chip is not removable; * each GPIO's "requested" flag serves as a lock and refcount. @@ -399,12 +404,11 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, vals[i] = !!ghd.values[i]; /* Reuse the array setting function */ - gpiod_set_array_value_complex(false, + return gpiod_set_array_value_complex(false, true, lh->numdescs, lh->descs, vals); - return 0; } return -EINVAL; } @@ -1192,6 +1196,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, goto err_free_descs; } + if (chip->ngpio > FASTPATH_NGPIO) + chip_warn(chip, "line cnt %d is greater than fast path cnt %d\n", + chip->ngpio, FASTPATH_NGPIO); + gdev->label = kstrdup_const(chip->label ?: "unknown", GFP_KERNEL); if (!gdev->label) { status = -ENOMEM; @@ -2662,16 +2670,28 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, while (i < array_size) { struct gpio_chip *chip = desc_array[i]->gdev->chip; - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; + unsigned long *mask, *bits; int first, j, ret; + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { + memset(fastpath, 0, sizeof(fastpath)); + mask = fastpath; + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); + } else { + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), + sizeof(*mask), + can_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!mask) + return -ENOMEM; + bits = mask + BITS_TO_LONGS(chip->ngpio); + } + if (!can_sleep) WARN_ON(chip->can_sleep); /* collect all inputs belonging to the same chip */ first = i; - memset(mask, 0, sizeof(mask)); do { const struct gpio_desc *desc = desc_array[i]; int hwgpio = gpio_chip_hwgpio(desc); @@ -2682,8 +2702,11 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, (desc_array[i]->gdev->chip == chip)); ret = gpio_chip_get_multiple(chip, mask, bits); - if (ret) + if (ret) { + if (mask != fastpath) + kfree(mask); return ret; + } for (j = first; j < i; j++) { const struct gpio_desc *desc = desc_array[j]; @@ -2695,6 +2718,9 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, value_array[j] = value; trace_gpio_value(desc_to_gpio(desc), 1, value); } + + if (mask != fastpath) + kfree(mask); } return 0; } @@ -2878,7 +2904,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip, } } -void gpiod_set_array_value_complex(bool raw, bool can_sleep, +int gpiod_set_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, int *value_array) @@ -2887,14 +2913,26 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, while (i < array_size) { struct gpio_chip *chip = desc_array[i]->gdev->chip; - unsigned long mask[BITS_TO_LONGS(chip->ngpio)]; - unsigned long bits[BITS_TO_LONGS(chip->ngpio)]; + unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; + unsigned long *mask, *bits; int count = 0; + if (likely(chip->ngpio <= FASTPATH_NGPIO)) { + memset(fastpath, 0, sizeof(fastpath)); + mask = fastpath; + bits = fastpath + BITS_TO_LONGS(FASTPATH_NGPIO); + } else { + mask = kcalloc(2 * BITS_TO_LONGS(chip->ngpio), + sizeof(*mask), + can_sleep ? GFP_KERNEL : GFP_ATOMIC); + if (!mask) + return -ENOMEM; + bits = mask + BITS_TO_LONGS(chip->ngpio); + } + if (!can_sleep) WARN_ON(chip->can_sleep); - memset(mask, 0, sizeof(mask)); do { struct gpio_desc *desc = desc_array[i]; int hwgpio = gpio_chip_hwgpio(desc); @@ -2925,7 +2963,11 @@ void gpiod_set_array_value_complex(bool raw, bool can_sleep, /* push collected bits to outputs */ if (count != 0) gpio_chip_set_multiple(chip, mask, bits); + + if (mask != fastpath) + kfree(mask); } + return 0; } /** @@ -3000,13 +3042,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value); * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ -void gpiod_set_raw_array_value(unsigned int array_size, +int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { if (!desc_array) - return; - gpiod_set_array_value_complex(true, false, array_size, desc_array, - value_array); + return -EINVAL; + return gpiod_set_array_value_complex(true, false, array_size, + desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); @@ -3326,14 +3368,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); * * This function is to be called from contexts that can sleep. */ -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { might_sleep_if(extra_checks); if (!desc_array) - return; - gpiod_set_array_value_complex(true, true, array_size, desc_array, + return -EINVAL; + return gpiod_set_array_value_complex(true, true, array_size, desc_array, value_array); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index b17ec6795c81..b64813e3876e 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -188,7 +188,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, int *value_array); -void gpiod_set_array_value_complex(bool raw, bool can_sleep, +int gpiod_set_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, int *value_array); diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index dbd065963296..243112c7fa7d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -116,7 +116,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); void gpiod_set_raw_value(struct gpio_desc *desc, int value); -void gpiod_set_raw_array_value(unsigned int array_size, +int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); @@ -134,7 +134,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); -void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); @@ -369,12 +369,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array_value(unsigned int array_size, +static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) @@ -423,12 +424,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, /* GPIO can never have been requested */ WARN_ON(1); } -static inline void gpiod_set_raw_array_value_cansleep(unsigned int array_size, +static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array) { /* GPIO can never have been requested */ WARN_ON(1); + return 0; } static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)