From patchwork Mon Feb 8 19:48:11 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Peter Jones X-Patchwork-Id: 8254351 Return-Path: X-Original-To: patchwork-linux-fsdevel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 2755B9F3CD for ; Mon, 8 Feb 2016 19:48:27 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 51B41203A5 for ; Mon, 8 Feb 2016 19:48:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 69D84203B8 for ; Mon, 8 Feb 2016 19:48:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753292AbcBHTsX (ORCPT ); Mon, 8 Feb 2016 14:48:23 -0500 Received: from mx1.redhat.com ([209.132.183.28]:48431 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752912AbcBHTsV (ORCPT ); Mon, 8 Feb 2016 14:48:21 -0500 Received: from int-mx13.intmail.prod.int.phx2.redhat.com (int-mx13.intmail.prod.int.phx2.redhat.com [10.5.11.26]) by mx1.redhat.com (Postfix) with ESMTPS id E3E6AC0AAB3C; Mon, 8 Feb 2016 19:48:20 +0000 (UTC) Received: from random.internal.datastacks.com (ovpn-113-54.phx2.redhat.com [10.3.113.54]) by int-mx13.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u18JmJIw007373; Mon, 8 Feb 2016 14:48:20 -0500 From: Peter Jones To: Matt Fleming Cc: linux-efi@vger.kernel.org, linux-fsdevel@vger.kernel.org, Peter Jones Subject: [PATCH 1/5] Add ucs2 -> utf8 helper functions Date: Mon, 8 Feb 2016 14:48:11 -0500 Message-Id: <1454960895-3473-2-git-send-email-pjones@redhat.com> In-Reply-To: <1454960895-3473-1-git-send-email-pjones@redhat.com> References: <20160204234211.GI2586@codeblueprint.co.uk> <1454960895-3473-1-git-send-email-pjones@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.26 Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Spam-Status: No, score=-7.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This adds ucs2_utf8size(), which tells us how big our ucs2 string is in bytes, and ucs2_as_utf8, which translates from ucs2 to utf8.. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett --- include/linux/ucs2_string.h | 4 +++ lib/ucs2_string.c | 62 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cbb20af..bb679b4 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); +unsigned long ucs2_utf8size(const ucs2_char_t *src); +unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, + unsigned long maxlength); + #endif /* _LINUX_UCS2_STRING_H_ */ diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 6f500ef..17dd74e 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) } } EXPORT_SYMBOL(ucs2_strncmp); + +unsigned long +ucs2_utf8size(const ucs2_char_t *src) +{ + unsigned long i; + unsigned long j = 0; + + for (i = 0; i < ucs2_strlen(src); i++) { + u16 c = src[i]; + + if (c > 0x800) + j += 3; + else if (c > 0x80) + j += 2; + else + j += 1; + } + + return j; +} +EXPORT_SYMBOL(ucs2_utf8size); + +/* + * copy at most maxlength bytes of whole utf8 characters to dest from the + * ucs2 string src. + * + * The return value is the number of characters copied, not including the + * final NUL character. + */ +unsigned long +ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) +{ + unsigned int i; + unsigned long j = 0; + unsigned long limit = ucs2_strnlen(src, maxlength); + + for (i = 0; maxlength && i < limit; i++) { + u16 c = src[i]; + + if (c > 0x800) { + if (maxlength < 3) + break; + maxlength -= 3; + dest[j++] = 0xe0 | (c & 0xf000) >> 12; + dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x003f); + } else if (c > 0x80) { + if (maxlength < 2) + break; + maxlength -= 2; + dest[j++] = 0xc0 | (c & 0xfe0) >> 5; + dest[j++] = 0x80 | (c & 0x01f); + } else { + maxlength -= 1; + dest[j++] = c & 0x7f; + } + } + if (maxlength) + dest[j] = '\0'; + return j; +} +EXPORT_SYMBOL(ucs2_as_utf8);