Message ID | 20200624043341.33364-2-drosen@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Prepare for upcoming Casefolding/Encryption patches | expand |
Daniel Rosenberg <drosen@google.com> writes: > This adds a case insensitive hash function to allow taking the hash > without needing to allocate a casefolded copy of the string. > > Signed-off-by: Daniel Rosenberg <drosen@google.com> > --- > fs/unicode/utf8-core.c | 23 ++++++++++++++++++++++- > include/linux/unicode.h | 3 +++ > 2 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c > index 2a878b739115d..90656b9980720 100644 > --- a/fs/unicode/utf8-core.c > +++ b/fs/unicode/utf8-core.c > @@ -6,6 +6,7 @@ > #include <linux/parser.h> > #include <linux/errno.h> > #include <linux/unicode.h> > +#include <linux/stringhash.h> > > #include "utf8n.h" > > @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, > } > return -EINVAL; > } > - > EXPORT_SYMBOL(utf8_casefold); > > +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, > + struct qstr *str) > +{ > + const struct utf8data *data = utf8nfdicf(um->version); > + struct utf8cursor cur; > + int c; > + unsigned long hash = init_name_hash(salt); > + > + if (utf8ncursor(&cur, data, str->name, str->len) < 0) > + return -EINVAL; > + > + while ((c = utf8byte(&cur))) { > + if (c < 0) > + return c; Return -EINVAL here to match other unicode functions, since utf8byte will return -1 on a binary blob, which doesn't make sense for this. Other than that, looks good to me. Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.com> > + hash = partial_name_hash((unsigned char)c, hash); > + } > + str->hash = end_name_hash(hash); > + return 0; > +} > +EXPORT_SYMBOL(utf8_casefold_hash); > + > int utf8_normalize(const struct unicode_map *um, const struct qstr *str, > unsigned char *dest, size_t dlen) > { > diff --git a/include/linux/unicode.h b/include/linux/unicode.h > index 990aa97d80496..74484d44c7554 100644 > --- a/include/linux/unicode.h > +++ b/include/linux/unicode.h > @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, > int utf8_casefold(const struct unicode_map *um, const struct qstr *str, > unsigned char *dest, size_t dlen); > > +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, > + struct qstr *str); > + > struct unicode_map *utf8_load(const char *version); > void utf8_unload(struct unicode_map *um);
On Tue, Jun 23, 2020 at 09:33:38PM -0700, Daniel Rosenberg wrote: > This adds a case insensitive hash function to allow taking the hash > without needing to allocate a casefolded copy of the string. It would be helpful to add a few more details in this commit message. Somewhat along the lines of: ->d_hash() for casefolding currently allocates memory, it needs to use GFP_ATOMIC due to ->d_hash() being called in rcu-walk mode, this is unreliable and inefficient, and this patch allows solving that problem by removing the need to allocate memory. - Eric
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 2a878b739115d..90656b9980720 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include <linux/parser.h> #include <linux/errno.h> #include <linux/unicode.h> +#include <linux/stringhash.h> #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(&cur, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte(&cur))) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d80496..74484d44c7554 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um);
This adds a case insensitive hash function to allow taking the hash without needing to allocate a casefolded copy of the string. Signed-off-by: Daniel Rosenberg <drosen@google.com> --- fs/unicode/utf8-core.c | 23 ++++++++++++++++++++++- include/linux/unicode.h | 3 +++ 2 files changed, 25 insertions(+), 1 deletion(-)