diff mbox

[2/4] lib: add crc64 calculation routines

Message ID 20180716165507.23100-3-colyli@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Coly Li July 16, 2018, 4:55 p.m. UTC
This patch adds the re-write crc64 calculation routines for Linux kernel.
The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
by CRC paper of Dr. Ross N. Williams
(see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
implementations.

All the changes work in this way,
- When Linux kernel is built, host program lib/gen_crc64table.c will be
  compiled to lib/gen_crc64table and executed.
- The output of gen_crc64table execution is an array called as lookup
  table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
  numbers, this talbe is dumped into header file lib/crc64table.h.
- Then the header file is included by lib/crc64.c for normal 64bit crc
  calculation.
- Function declaration of the crc64 calculation routines is placed in
  include/linux/crc64.h

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
---
 include/linux/crc64.h | 15 +++++++++
 lib/.gitignore        |  2 ++
 lib/Makefile          | 11 +++++++
 lib/crc64.c           | 71 +++++++++++++++++++++++++++++++++++++++
 lib/gen_crc64table.c  | 77 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 176 insertions(+)
 create mode 100644 include/linux/crc64.h
 create mode 100644 lib/crc64.c
 create mode 100644 lib/gen_crc64table.c

Comments

Randy Dunlap July 16, 2018, 5:57 p.m. UTC | #1
On 07/16/2018 09:55 AM, Coly Li wrote:
> 
> diff --git a/lib/crc64.c b/lib/crc64.c
> new file mode 100644
> index 000000000000..03f078303bd3
> --- /dev/null
> +++ b/lib/crc64.c
> @@ -0,0 +1,71 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Normal 64bit CRC calculation.
> + *
> + * This is a basic crc64 implementation following ECMA-182 specification,
> + * which can be found from,
> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
> + *
> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
> + * algorithm, here the CRC64 code is also inspired by the table-driven
> + * algorithm and detail example from this paper. This paper can be found
> + * from,
> + * http://www.ross.net/crc/download/crc_v3.txt
> + *
> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
> + * calculation, which is generated by gen_crc64table.c in kernel build
> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
> + * as well, which is defined as,
> + *
> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> + * x^7 + x^4 + x + 1
> + *
> + * Copyright 2018 SUSE Linux.
> + *   Author: Coly Li <colyli@suse.de>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <uapi/linux/types.h>
> +#include "crc64table.h"
> +
> +MODULE_DESCRIPTION("CRC64 calculations");
> +MODULE_LICENSE("GPL");
> +
> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
> +{
> +	size_t i, t;
> +
> +	const unsigned char *p = _p;
> +
> +	for (i = 0; i < len; i++) {
> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
> +		crc = crc64table_le[t] ^ (crc << 8);
> +	}
> +
> +	return crc;
> +}
> +EXPORT_SYMBOL_GPL(crc64_le_update);
> +
> +__le64 crc64_le(const void *p, size_t len)
> +{
> +	__le64 crc = 0x0000000000000000ULL;

Hi,
What's wrong with just using 0ULL ?

thanks.

> +
> +	crc = crc64_le_update(crc, p, len);
> +
> +	return crc;
> +}
kernel test robot July 17, 2018, 1:27 a.m. UTC | #2
Hi Coly,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc5 next-20180716]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Coly-Li/add-crc64-calculation-as-kernel-library/20180717-025930
config: i386-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> lib/gen_crc64table.c:21:10: fatal error: ../usr/include/asm/byteorder.h: No such file or directory
    #include "../usr/include/asm/byteorder.h"
             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   compilation terminated.

vim +21 lib/gen_crc64table.c

  > 21	#include "../usr/include/asm/byteorder.h"
    22	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Coly Li July 17, 2018, 3:19 a.m. UTC | #3
On 2018/7/17 1:57 AM, Randy Dunlap wrote:
> On 07/16/2018 09:55 AM, Coly Li wrote:
>>
>> diff --git a/lib/crc64.c b/lib/crc64.c
>> new file mode 100644
>> index 000000000000..03f078303bd3
>> --- /dev/null
>> +++ b/lib/crc64.c
>> @@ -0,0 +1,71 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Normal 64bit CRC calculation.
>> + *
>> + * This is a basic crc64 implementation following ECMA-182 specification,
>> + * which can be found from,
>> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
>> + *
>> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
>> + * algorithm, here the CRC64 code is also inspired by the table-driven
>> + * algorithm and detail example from this paper. This paper can be found
>> + * from,
>> + * http://www.ross.net/crc/download/crc_v3.txt
>> + *
>> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
>> + * calculation, which is generated by gen_crc64table.c in kernel build
>> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
>> + * as well, which is defined as,
>> + *
>> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
>> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
>> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
>> + * x^7 + x^4 + x + 1
>> + *
>> + * Copyright 2018 SUSE Linux.
>> + *   Author: Coly Li <colyli@suse.de>
>> + *
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <uapi/linux/types.h>
>> +#include "crc64table.h"
>> +
>> +MODULE_DESCRIPTION("CRC64 calculations");
>> +MODULE_LICENSE("GPL");
>> +
>> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
>> +{
>> +	size_t i, t;
>> +
>> +	const unsigned char *p = _p;
>> +
>> +	for (i = 0; i < len; i++) {
>> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
>> +		crc = crc64table_le[t] ^ (crc << 8);
>> +	}
>> +
>> +	return crc;
>> +}
>> +EXPORT_SYMBOL_GPL(crc64_le_update);
>> +
>> +__le64 crc64_le(const void *p, size_t len)
>> +{
>> +	__le64 crc = 0x0000000000000000ULL;
> 

Hi Randy,

> Hi,
> What's wrong with just using 0ULL ?

In v2 series it will be 0x0ULL :-)

Thanks.

Coly Li
Eric Biggers July 17, 2018, 3:34 a.m. UTC | #4
Hi Coly,

On Tue, Jul 17, 2018 at 12:55:05AM +0800, Coly Li wrote:
> This patch adds the re-write crc64 calculation routines for Linux kernel.
> The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
> by CRC paper of Dr. Ross N. Williams
> (see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
> implementations.
> 
> All the changes work in this way,
> - When Linux kernel is built, host program lib/gen_crc64table.c will be
>   compiled to lib/gen_crc64table and executed.
> - The output of gen_crc64table execution is an array called as lookup
>   table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
>   numbers, this talbe is dumped into header file lib/crc64table.h.
> - Then the header file is included by lib/crc64.c for normal 64bit crc
>   calculation.
> - Function declaration of the crc64 calculation routines is placed in
>   include/linux/crc64.h
> 
[...]
> diff --git a/lib/crc64.c b/lib/crc64.c
> new file mode 100644
> index 000000000000..03f078303bd3
> --- /dev/null
> +++ b/lib/crc64.c
> @@ -0,0 +1,71 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Normal 64bit CRC calculation.
> + *
> + * This is a basic crc64 implementation following ECMA-182 specification,
> + * which can be found from,
> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
> + *
> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
> + * algorithm, here the CRC64 code is also inspired by the table-driven
> + * algorithm and detail example from this paper. This paper can be found
> + * from,
> + * http://www.ross.net/crc/download/crc_v3.txt
> + *
> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
> + * calculation, which is generated by gen_crc64table.c in kernel build
> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
> + * as well, which is defined as,
> + *
> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> + * x^7 + x^4 + x + 1
> + *
> + * Copyright 2018 SUSE Linux.
> + *   Author: Coly Li <colyli@suse.de>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <uapi/linux/types.h>
> +#include "crc64table.h"
> +
> +MODULE_DESCRIPTION("CRC64 calculations");
> +MODULE_LICENSE("GPL");
> +
> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
> +{
> +	size_t i, t;
> +
> +	const unsigned char *p = _p;
> +
> +	for (i = 0; i < len; i++) {
> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
> +		crc = crc64table_le[t] ^ (crc << 8);
> +	}
> +
> +	return crc;
> +}
> +EXPORT_SYMBOL_GPL(crc64_le_update);
> +
> +__le64 crc64_le(const void *p, size_t len)
> +{
> +	__le64 crc = 0x0000000000000000ULL;
> +
> +	crc = crc64_le_update(crc, p, len);
> +
> +	return crc;
> +}
> +EXPORT_SYMBOL_GPL(crc64_le);
> +
> +/* For checksum calculation in drivers/md/bcache/ */
> +__le64 crc64_le_bch(const void *p, size_t len)
> +{
> +	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
> +
> +	crc = crc64_le_update(crc, p, len);
> +
> +	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
> +}
> +EXPORT_SYMBOL_GPL(crc64_le_bch);

Using __le64 here makes no sense, because that type indicates the endianness of
the *bytes*, whereas with CRC's "little endian" and "big endian" refer to the
order in which the *bits* are mapped to the polynomial coefficients.

Also as you can see for lib/crc32.c you really only need to provide a function

	u64 __pure crc64_le(u64 crc, unsigned char const *p, size_t len);

and the callers can invert at the beginning and/or end if needed.

Also your function names make it sound like inverting the bits is the exception
or not recommended, since you called the function which does the inversions
"crc32_le_bch()" so it sounds like a bcache-specific hack, while the one that
doesn't do the inversions is simply called "crc32_le()".  But actually it's
normally recommended to do CRC's with the inversions, so that leading and
trailing zeroes affect the resulting CRC.

> diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
> new file mode 100644
> index 000000000000..5f292f287498
> --- /dev/null
> +++ b/lib/gen_crc64table.c
> @@ -0,0 +1,77 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Generate lookup table for the talbe-driven CRC64 calculation.
> + *
> + * gen_crc64table is executed in kernel build time and generates
> + * lib/crc64table.h. This header is included by lib/crc64.c for
> + * the table-driver CRC64 calculation.
> + *
> + * See lib/crc64.c for more information about which specification
> + * and polynomical arithmetic that gen_crc64table.c follows to
> + * generate the lookup table.
> + *
> + * Copyright 2018 SUSE Linux.
> + *   Author: Coly Li <colyli@suse.de>
> + *
> + */
> +
> +#include <inttypes.h>
> +#include <linux/swab.h>
> +#include <stdio.h>
> +#include "../usr/include/asm/byteorder.h"
> +
> +#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL

Okay, that's actually the ECMA-182 polynomial in "big endian" form (highest
order bit is the coefficient of x^63, lowest order bit is the coefficient of
x^0), so you're actually doing a "big endian" CRC.  So everything in your patch
series that claims it's a little endian or "le" CRC is incorrect.

> +
> +#ifdef __LITTLE_ENDIAN
> +#  define cpu_to_le64(x) ((__le64)(x))
> +#else
> +#  define cpu_to_le64(x) ((__le64)__swab64(x))
> +#endif
> +
> +static int64_t crc64_table[256] = {0,};
> +
> +static void generate_crc64_table(void)
> +{
> +	uint64_t i, j, c, crc;
> +
> +	for (i = 0; i < 256; i++) {
> +		crc = 0;
> +		c = i << 56;
> +
> +		for (j = 0; j < 8; j++) {
> +			if ((crc ^ c) & 0x8000000000000000ULL)
> +				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
> +			else
> +				crc <<= 1;
> +			c <<= 1;

See here, it's shifting out the most significant bit, which means it's the
coefficient of the x^63 term ("big endian" or "normal" convention), not the x^0
term ("little endian" or "reversed" convention).

Eric
Coly Li July 17, 2018, 6:25 a.m. UTC | #5
On 2018/7/17 11:34 AM, Eric Biggers wrote:
> Hi Coly,
> 
> On Tue, Jul 17, 2018 at 12:55:05AM +0800, Coly Li wrote:
>> This patch adds the re-write crc64 calculation routines for Linux kernel.
>> The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
>> by CRC paper of Dr. Ross N. Williams
>> (see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
>> implementations.
>>
>> All the changes work in this way,
>> - When Linux kernel is built, host program lib/gen_crc64table.c will be
>>   compiled to lib/gen_crc64table and executed.
>> - The output of gen_crc64table execution is an array called as lookup
>>   table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
>>   numbers, this talbe is dumped into header file lib/crc64table.h.
>> - Then the header file is included by lib/crc64.c for normal 64bit crc
>>   calculation.
>> - Function declaration of the crc64 calculation routines is placed in
>>   include/linux/crc64.h
>>
> [...]
>> diff --git a/lib/crc64.c b/lib/crc64.c
>> new file mode 100644
>> index 000000000000..03f078303bd3
>> --- /dev/null
>> +++ b/lib/crc64.c
>> @@ -0,0 +1,71 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Normal 64bit CRC calculation.
>> + *
>> + * This is a basic crc64 implementation following ECMA-182 specification,
>> + * which can be found from,
>> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
>> + *
>> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
>> + * algorithm, here the CRC64 code is also inspired by the table-driven
>> + * algorithm and detail example from this paper. This paper can be found
>> + * from,
>> + * http://www.ross.net/crc/download/crc_v3.txt
>> + *
>> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
>> + * calculation, which is generated by gen_crc64table.c in kernel build
>> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
>> + * as well, which is defined as,
>> + *
>> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
>> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
>> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
>> + * x^7 + x^4 + x + 1
>> + *
>> + * Copyright 2018 SUSE Linux.
>> + *   Author: Coly Li <colyli@suse.de>
>> + *
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <uapi/linux/types.h>
>> +#include "crc64table.h"
>> +
>> +MODULE_DESCRIPTION("CRC64 calculations");
>> +MODULE_LICENSE("GPL");
>> +
>> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
>> +{
>> +	size_t i, t;
>> +
>> +	const unsigned char *p = _p;
>> +
>> +	for (i = 0; i < len; i++) {
>> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
>> +		crc = crc64table_le[t] ^ (crc << 8);
>> +	}
>> +
>> +	return crc;
>> +}
>> +EXPORT_SYMBOL_GPL(crc64_le_update);
>> +
>> +__le64 crc64_le(const void *p, size_t len)
>> +{
>> +	__le64 crc = 0x0000000000000000ULL;
>> +
>> +	crc = crc64_le_update(crc, p, len);
>> +
>> +	return crc;
>> +}
>> +EXPORT_SYMBOL_GPL(crc64_le);
>> +
>> +/* For checksum calculation in drivers/md/bcache/ */
>> +__le64 crc64_le_bch(const void *p, size_t len)
>> +{
>> +	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
>> +
>> +	crc = crc64_le_update(crc, p, len);
>> +
>> +	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
>> +}
>> +EXPORT_SYMBOL_GPL(crc64_le_bch);
> 

Hi Eric,

> Using __le64 here makes no sense, because that type indicates the endianness of
> the *bytes*, whereas with CRC's "little endian" and "big endian" refer to the
> order in which the *bits* are mapped to the polynomial coefficients.
> 
> Also as you can see for lib/crc32.c you really only need to provide a function
> 
> 	u64 __pure crc64_le(u64 crc, unsigned char const *p, size_t len);
> 
> and the callers can invert at the beginning and/or end if needed.

Let me explain why I explicit use __le64 here. When crc64 is used as
on-disk checksum, the input of crc64 calculation should be in a explicit
specific byte order. Currently check sum in bcache code assumes the CPU
is in little endian and just feeds in-memory data into crc64
calculation, then the code does not work on big endian machine like s390x.

To solve such problem, before calculating CRC the in-memory data should
be swapped into a specific byte order (in bcache case it should be
little endian). For data storage or transfer, CRC calculation without
explicit endian is more easy to introduce bugs.

When I declare the type of input and output value as __le64, on big
endian machine, I expect a type mismatch warning if the input memory
buffer is not swapped into little endian. For u64, there is no such type
checking warning.

This is the initial version of lib/crc64.c, people may add their crc64
calculation routines when necessary, e.g. crc64_be() or crc64(). I only
add crc64_le_update() and crc64_le_bch() because bcache code needs them.

Indeed there is no user of crc64_le() for now, but the file is name as
lib/crc64.c, I think there should be a crc64 calculation at least, so I
add crc64_le().

> 
> Also your function names make it sound like inverting the bits is the exception
> or not recommended, since you called the function which does the inversions
> "crc32_le_bch()" so it sounds like a bcache-specific hack, while the one that
> doesn't do the inversions is simply called "crc32_le()".  But actually it's
> normally recommended to do CRC's with the inversions, so that leading and
> trailing zeroes affect the resulting CRC.
> 

I notice this, normally there are two crc routines provided, with and
without inversion. The reason that there is no inversion version is
no-user in Linux kernel. Indeed there is no user of crc64_le() in Linnux
kernel so far. For performance reason, I doubt whether there will be
more user to do 64bit crc in kernel.

I prefer two crc32 calculation for a 64bit value, but meta data checksum
by crc64 calculation is used in bcache for years, the consistency has to
be kept.


>> diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
>> new file mode 100644
>> index 000000000000..5f292f287498
>> --- /dev/null
>> +++ b/lib/gen_crc64table.c
>> @@ -0,0 +1,77 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Generate lookup table for the talbe-driven CRC64 calculation.
>> + *
>> + * gen_crc64table is executed in kernel build time and generates
>> + * lib/crc64table.h. This header is included by lib/crc64.c for
>> + * the table-driver CRC64 calculation.
>> + *
>> + * See lib/crc64.c for more information about which specification
>> + * and polynomical arithmetic that gen_crc64table.c follows to
>> + * generate the lookup table.
>> + *
>> + * Copyright 2018 SUSE Linux.
>> + *   Author: Coly Li <colyli@suse.de>
>> + *
>> + */
>> +
>> +#include <inttypes.h>
>> +#include <linux/swab.h>
>> +#include <stdio.h>
>> +#include "../usr/include/asm/byteorder.h"
>> +
>> +#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
> 
> Okay, that's actually the ECMA-182 polynomial in "big endian" form (highest
> order bit is the coefficient of x^63, lowest order bit is the coefficient of
> x^0), so you're actually doing a "big endian" CRC.  So everything in your patch
> series that claims it's a little endian or "le" CRC is incorrect.
> 
>> +
>> +#ifdef __LITTLE_ENDIAN
>> +#  define cpu_to_le64(x) ((__le64)(x))
>> +#else
>> +#  define cpu_to_le64(x) ((__le64)__swab64(x))
>> +#endif
>> +
>> +static int64_t crc64_table[256] = {0,};
>> +
>> +static void generate_crc64_table(void)
>> +{
>> +	uint64_t i, j, c, crc;
>> +
>> +	for (i = 0; i < 256; i++) {
>> +		crc = 0;
>> +		c = i << 56;
>> +
>> +		for (j = 0; j < 8; j++) {
>> +			if ((crc ^ c) & 0x8000000000000000ULL)
>> +				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
>> +			else
>> +				crc <<= 1;
>> +			c <<= 1;
> 
> See here, it's shifting out the most significant bit, which means it's the
> coefficient of the x^63 term ("big endian" or "normal" convention), not the x^0
> term ("little endian" or "reversed" convention).

I see your point here. I am not expert in coding theory, the knowledge I
have is from wikipedia, ECMA-182 and the document from Dr. Ross
Williams. From ECMA-182 document, I don't see any word with 'big
endian', so I take it as a standard poly and regardless the byte order.

And on wikepedia page
https://en.wikipedia.org/wiki/Cyclic_redundancy_check , CRC-64-ECMA
references the same poly and call "0x42F0E1EBA9EA3693" as normal poly,
which one links to polynomial
	"x^64 + x^62 + x^57 + x^55 + x^54 + ....x^7 + x^4 + x + 1"
if I understand correctly. But from your information, it seems the
polynomial in generate_crc64_table() is x^64 + x^61 ..... Maybe I
misunderstand you, could you please give me more hint ?

Thanks.

Coly Li
Eric Biggers July 17, 2018, 7:13 a.m. UTC | #6
On Tue, Jul 17, 2018 at 02:25:24PM +0800, Coly Li wrote:
> On 2018/7/17 11:34 AM, Eric Biggers wrote:
> > Hi Coly,
> > 
> > On Tue, Jul 17, 2018 at 12:55:05AM +0800, Coly Li wrote:
> >> This patch adds the re-write crc64 calculation routines for Linux kernel.
> >> The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
> >> by CRC paper of Dr. Ross N. Williams
> >> (see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
> >> implementations.
> >>
> >> All the changes work in this way,
> >> - When Linux kernel is built, host program lib/gen_crc64table.c will be
> >>   compiled to lib/gen_crc64table and executed.
> >> - The output of gen_crc64table execution is an array called as lookup
> >>   table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
> >>   numbers, this talbe is dumped into header file lib/crc64table.h.
> >> - Then the header file is included by lib/crc64.c for normal 64bit crc
> >>   calculation.
> >> - Function declaration of the crc64 calculation routines is placed in
> >>   include/linux/crc64.h
> >>
> > [...]
> >> diff --git a/lib/crc64.c b/lib/crc64.c
> >> new file mode 100644
> >> index 000000000000..03f078303bd3
> >> --- /dev/null
> >> +++ b/lib/crc64.c
> >> @@ -0,0 +1,71 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Normal 64bit CRC calculation.
> >> + *
> >> + * This is a basic crc64 implementation following ECMA-182 specification,
> >> + * which can be found from,
> >> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
> >> + *
> >> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
> >> + * algorithm, here the CRC64 code is also inspired by the table-driven
> >> + * algorithm and detail example from this paper. This paper can be found
> >> + * from,
> >> + * http://www.ross.net/crc/download/crc_v3.txt
> >> + *
> >> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
> >> + * calculation, which is generated by gen_crc64table.c in kernel build
> >> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
> >> + * as well, which is defined as,
> >> + *
> >> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> >> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> >> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> >> + * x^7 + x^4 + x + 1
> >> + *
> >> + * Copyright 2018 SUSE Linux.
> >> + *   Author: Coly Li <colyli@suse.de>
> >> + *
> >> + */
> >> +
> >> +#include <linux/module.h>
> >> +#include <uapi/linux/types.h>
> >> +#include "crc64table.h"
> >> +
> >> +MODULE_DESCRIPTION("CRC64 calculations");
> >> +MODULE_LICENSE("GPL");
> >> +
> >> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
> >> +{
> >> +	size_t i, t;
> >> +
> >> +	const unsigned char *p = _p;
> >> +
> >> +	for (i = 0; i < len; i++) {
> >> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
> >> +		crc = crc64table_le[t] ^ (crc << 8);
> >> +	}
> >> +
> >> +	return crc;
> >> +}
> >> +EXPORT_SYMBOL_GPL(crc64_le_update);
> >> +
> >> +__le64 crc64_le(const void *p, size_t len)
> >> +{
> >> +	__le64 crc = 0x0000000000000000ULL;
> >> +
> >> +	crc = crc64_le_update(crc, p, len);
> >> +
> >> +	return crc;
> >> +}
> >> +EXPORT_SYMBOL_GPL(crc64_le);
> >> +
> >> +/* For checksum calculation in drivers/md/bcache/ */
> >> +__le64 crc64_le_bch(const void *p, size_t len)
> >> +{
> >> +	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
> >> +
> >> +	crc = crc64_le_update(crc, p, len);
> >> +
> >> +	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
> >> +}
> >> +EXPORT_SYMBOL_GPL(crc64_le_bch);
> > 
> 
> Hi Eric,
> 
> > Using __le64 here makes no sense, because that type indicates the endianness of
> > the *bytes*, whereas with CRC's "little endian" and "big endian" refer to the
> > order in which the *bits* are mapped to the polynomial coefficients.
> > 
> > Also as you can see for lib/crc32.c you really only need to provide a function
> > 
> > 	u64 __pure crc64_le(u64 crc, unsigned char const *p, size_t len);
> > 
> > and the callers can invert at the beginning and/or end if needed.
> 
> Let me explain why I explicit use __le64 here. When crc64 is used as
> on-disk checksum, the input of crc64 calculation should be in a explicit
> specific byte order. Currently check sum in bcache code assumes the CPU
> is in little endian and just feeds in-memory data into crc64
> calculation, then the code does not work on big endian machine like s390x.
> 
> To solve such problem, before calculating CRC the in-memory data should
> be swapped into a specific byte order (in bcache case it should be
> little endian). For data storage or transfer, CRC calculation without
> explicit endian is more easy to introduce bugs.

No, the implementation never loads multi-byte values, so CPU endianness doesn't
matter for the input.  CPU endianness *does* matter when serializing the final
calculated CRC into a byte array for storing on-disk, so maybe bcache gets that
part wrong, I don't know.  Either way, that has nothing to do with how the
polynomial coefficients (bits) are ordered *within bytes*, which is what the
"_be" and "_le" refer to in the CRC-32 implementation.  Yes, the naming is
unfortunate as it can easily be confused with the usual "bytewise" endianness,
but you need to understand it.

Again, using __le64 makes absolutely no sense.  You're even doing operations
like shifts directly on a "__le64" which sparse will (correctly) complain about.

> 
> When I declare the type of input and output value as __le64, on big
> endian machine, I expect a type mismatch warning if the input memory
> buffer is not swapped into little endian. For u64, there is no such type
> checking warning.
> 
> This is the initial version of lib/crc64.c, people may add their crc64
> calculation routines when necessary, e.g. crc64_be() or crc64(). I only
> add crc64_le_update() and crc64_le_bch() because bcache code needs them.
> 
> Indeed there is no user of crc64_le() for now, but the file is name as
> lib/crc64.c, I think there should be a crc64 calculation at least, so I
> add crc64_le().
> 
> > 
> > Also your function names make it sound like inverting the bits is the exception
> > or not recommended, since you called the function which does the inversions
> > "crc32_le_bch()" so it sounds like a bcache-specific hack, while the one that
> > doesn't do the inversions is simply called "crc32_le()".  But actually it's
> > normally recommended to do CRC's with the inversions, so that leading and
> > trailing zeroes affect the resulting CRC.
> > 
> 
> I notice this, normally there are two crc routines provided, with and
> without inversion. The reason that there is no inversion version is
> no-user in Linux kernel. Indeed there is no user of crc64_le() in Linnux
> kernel so far. For performance reason, I doubt whether there will be
> more user to do 64bit crc in kernel.
> 
> I prefer two crc32 calculation for a 64bit value, but meta data checksum
> by crc64 calculation is used in bcache for years, the consistency has to
> be kept.

Well, your response didn't actually address my points.  But it raises the
question: if there won't be any other users, then why move CRC-64 to lib/ at
all?

> 
> 
> >> diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
> >> new file mode 100644
> >> index 000000000000..5f292f287498
> >> --- /dev/null
> >> +++ b/lib/gen_crc64table.c
> >> @@ -0,0 +1,77 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Generate lookup table for the talbe-driven CRC64 calculation.
> >> + *
> >> + * gen_crc64table is executed in kernel build time and generates
> >> + * lib/crc64table.h. This header is included by lib/crc64.c for
> >> + * the table-driver CRC64 calculation.
> >> + *
> >> + * See lib/crc64.c for more information about which specification
> >> + * and polynomical arithmetic that gen_crc64table.c follows to
> >> + * generate the lookup table.
> >> + *
> >> + * Copyright 2018 SUSE Linux.
> >> + *   Author: Coly Li <colyli@suse.de>
> >> + *
> >> + */
> >> +
> >> +#include <inttypes.h>
> >> +#include <linux/swab.h>
> >> +#include <stdio.h>
> >> +#include "../usr/include/asm/byteorder.h"
> >> +
> >> +#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
> > 
> > Okay, that's actually the ECMA-182 polynomial in "big endian" form (highest
> > order bit is the coefficient of x^63, lowest order bit is the coefficient of
> > x^0), so you're actually doing a "big endian" CRC.  So everything in your patch
> > series that claims it's a little endian or "le" CRC is incorrect.
> > 
> >> +
> >> +#ifdef __LITTLE_ENDIAN
> >> +#  define cpu_to_le64(x) ((__le64)(x))
> >> +#else
> >> +#  define cpu_to_le64(x) ((__le64)__swab64(x))
> >> +#endif
> >> +
> >> +static int64_t crc64_table[256] = {0,};
> >> +
> >> +static void generate_crc64_table(void)
> >> +{
> >> +	uint64_t i, j, c, crc;
> >> +
> >> +	for (i = 0; i < 256; i++) {
> >> +		crc = 0;
> >> +		c = i << 56;
> >> +
> >> +		for (j = 0; j < 8; j++) {
> >> +			if ((crc ^ c) & 0x8000000000000000ULL)
> >> +				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
> >> +			else
> >> +				crc <<= 1;
> >> +			c <<= 1;
> > 
> > See here, it's shifting out the most significant bit, which means it's the
> > coefficient of the x^63 term ("big endian" or "normal" convention), not the x^0
> > term ("little endian" or "reversed" convention).
> 
> I see your point here. I am not expert in coding theory, the knowledge I
> have is from wikipedia, ECMA-182 and the document from Dr. Ross
> Williams. From ECMA-182 document, I don't see any word with 'big
> endian', so I take it as a standard poly and regardless the byte order.
> 
> And on wikepedia page
> https://en.wikipedia.org/wiki/Cyclic_redundancy_check , CRC-64-ECMA
> references the same poly and call "0x42F0E1EBA9EA3693" as normal poly,
> which one links to polynomial
> 	"x^64 + x^62 + x^57 + x^55 + x^54 + ....x^7 + x^4 + x + 1"
> if I understand correctly. But from your information, it seems the
> polynomial in generate_crc64_table() is x^64 + x^61 ..... Maybe I
> misunderstand you, could you please give me more hint ?

As I said, the "normal" convention is the same as "big endian", and the
"reversed" convention is the same as "little endian" (again, meaning "bitwise"
endianness, not the usual "bytewise" endianness).  The polynomial is correct but
you are claiming the polynomial coefficients are mapped to bits in a different
order than they actually are.

- Eric
Coly Li July 17, 2018, 7:34 a.m. UTC | #7
On 2018/7/17 3:13 PM, Eric Biggers wrote:
> On Tue, Jul 17, 2018 at 02:25:24PM +0800, Coly Li wrote:
>> On 2018/7/17 11:34 AM, Eric Biggers wrote:
>>> Hi Coly,
>>>
>>> On Tue, Jul 17, 2018 at 12:55:05AM +0800, Coly Li wrote:
>>>> This patch adds the re-write crc64 calculation routines for Linux kernel.
>>>> The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
>>>> by CRC paper of Dr. Ross N. Williams
>>>> (see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
>>>> implementations.
>>>>
>>>> All the changes work in this way,
>>>> - When Linux kernel is built, host program lib/gen_crc64table.c will be
>>>>   compiled to lib/gen_crc64table and executed.
>>>> - The output of gen_crc64table execution is an array called as lookup
>>>>   table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
>>>>   numbers, this talbe is dumped into header file lib/crc64table.h.
>>>> - Then the header file is included by lib/crc64.c for normal 64bit crc
>>>>   calculation.
>>>> - Function declaration of the crc64 calculation routines is placed in
>>>>   include/linux/crc64.h
>>>>
>>> [...]
>>>> diff --git a/lib/crc64.c b/lib/crc64.c
>>>> new file mode 100644
>>>> index 000000000000..03f078303bd3
>>>> --- /dev/null
>>>> +++ b/lib/crc64.c
>>>> @@ -0,0 +1,71 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Normal 64bit CRC calculation.
>>>> + *
>>>> + * This is a basic crc64 implementation following ECMA-182 specification,
>>>> + * which can be found from,
>>>> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
>>>> + *
>>>> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
>>>> + * algorithm, here the CRC64 code is also inspired by the table-driven
>>>> + * algorithm and detail example from this paper. This paper can be found
>>>> + * from,
>>>> + * http://www.ross.net/crc/download/crc_v3.txt
>>>> + *
>>>> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
>>>> + * calculation, which is generated by gen_crc64table.c in kernel build
>>>> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
>>>> + * as well, which is defined as,
>>>> + *
>>>> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
>>>> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
>>>> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
>>>> + * x^7 + x^4 + x + 1
>>>> + *
>>>> + * Copyright 2018 SUSE Linux.
>>>> + *   Author: Coly Li <colyli@suse.de>
>>>> + *
>>>> + */
>>>> +
>>>> +#include <linux/module.h>
>>>> +#include <uapi/linux/types.h>
>>>> +#include "crc64table.h"
>>>> +
>>>> +MODULE_DESCRIPTION("CRC64 calculations");
>>>> +MODULE_LICENSE("GPL");
>>>> +
>>>> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
>>>> +{
>>>> +	size_t i, t;
>>>> +
>>>> +	const unsigned char *p = _p;
>>>> +
>>>> +	for (i = 0; i < len; i++) {
>>>> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
>>>> +		crc = crc64table_le[t] ^ (crc << 8);
>>>> +	}
>>>> +
>>>> +	return crc;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(crc64_le_update);
>>>> +
>>>> +__le64 crc64_le(const void *p, size_t len)
>>>> +{
>>>> +	__le64 crc = 0x0000000000000000ULL;
>>>> +
>>>> +	crc = crc64_le_update(crc, p, len);
>>>> +
>>>> +	return crc;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(crc64_le);
>>>> +
>>>> +/* For checksum calculation in drivers/md/bcache/ */
>>>> +__le64 crc64_le_bch(const void *p, size_t len)
>>>> +{
>>>> +	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
>>>> +
>>>> +	crc = crc64_le_update(crc, p, len);
>>>> +
>>>> +	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(crc64_le_bch);
>>>
>>
>> Hi Eric,
>>
>>> Using __le64 here makes no sense, because that type indicates the endianness of
>>> the *bytes*, whereas with CRC's "little endian" and "big endian" refer to the
>>> order in which the *bits* are mapped to the polynomial coefficients.
>>>
>>> Also as you can see for lib/crc32.c you really only need to provide a function
>>>
>>> 	u64 __pure crc64_le(u64 crc, unsigned char const *p, size_t len);
>>>
>>> and the callers can invert at the beginning and/or end if needed.
>>
>> Let me explain why I explicit use __le64 here. When crc64 is used as
>> on-disk checksum, the input of crc64 calculation should be in a explicit
>> specific byte order. Currently check sum in bcache code assumes the CPU
>> is in little endian and just feeds in-memory data into crc64
>> calculation, then the code does not work on big endian machine like s390x.
>>
>> To solve such problem, before calculating CRC the in-memory data should
>> be swapped into a specific byte order (in bcache case it should be
>> little endian). For data storage or transfer, CRC calculation without
>> explicit endian is more easy to introduce bugs.
> 
> No, the implementation never loads multi-byte values, so CPU endianness doesn't
> matter for the input.  CPU endianness *does* matter when serializing the final

If the checksum is generated on big endian machine and checked on little
endian machine, non-specific endianness will be problematic.

> calculated CRC into a byte array for storing on-disk, so maybe bcache gets that
> part wrong, I don't know.  Either way, that has nothing to do with how the
> polynomial coefficients (bits) are ordered *within bytes*, which is what the
> "_be" and "_le" refer to in the CRC-32 implementation.  Yes, the naming is
> unfortunate as it can easily be confused with the usual "bytewise" endianness,
> but you need to understand it.
> 

I see, it seems I misunderstand _le and _be in CRC-32 implementation.
OK, I will find a way to fix the naming and data type issues in v3 series.

> Again, using __le64 makes absolutely no sense.  You're even doing operations
> like shifts directly on a "__le64" which sparse will (correctly) complain about.
> 

Sure, you are correct here :-)

>>
>> When I declare the type of input and output value as __le64, on big
>> endian machine, I expect a type mismatch warning if the input memory
>> buffer is not swapped into little endian. For u64, there is no such type
>> checking warning.
>>
>> This is the initial version of lib/crc64.c, people may add their crc64
>> calculation routines when necessary, e.g. crc64_be() or crc64(). I only
>> add crc64_le_update() and crc64_le_bch() because bcache code needs them.
>>
>> Indeed there is no user of crc64_le() for now, but the file is name as
>> lib/crc64.c, I think there should be a crc64 calculation at least, so I
>> add crc64_le().
>>
>>>
>>> Also your function names make it sound like inverting the bits is the exception
>>> or not recommended, since you called the function which does the inversions
>>> "crc32_le_bch()" so it sounds like a bcache-specific hack, while the one that
>>> doesn't do the inversions is simply called "crc32_le()".  But actually it's
>>> normally recommended to do CRC's with the inversions, so that leading and
>>> trailing zeroes affect the resulting CRC.
>>>
>>
>> I notice this, normally there are two crc routines provided, with and
>> without inversion. The reason that there is no inversion version is
>> no-user in Linux kernel. Indeed there is no user of crc64_le() in Linnux
>> kernel so far. For performance reason, I doubt whether there will be
>> more user to do 64bit crc in kernel.
>>
>> I prefer two crc32 calculation for a 64bit value, but meta data checksum
>> by crc64 calculation is used in bcache for years, the consistency has to
>> be kept.
> 
> Well, your response didn't actually address my points.  But it raises the
> question: if there won't be any other users, then why move CRC-64 to lib/ at
> all?
> 

The only motivation I can see is becachefs, which share part of the code
base with bcache, including crc64 calculation.

And before CPU supports build-in instructors for CRC64, I don't see the
reason why people should use 64bit CRC other than 32bit ones.

>>
>>
>>>> diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
>>>> new file mode 100644
>>>> index 000000000000..5f292f287498
>>>> --- /dev/null
>>>> +++ b/lib/gen_crc64table.c
>>>> @@ -0,0 +1,77 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Generate lookup table for the talbe-driven CRC64 calculation.
>>>> + *
>>>> + * gen_crc64table is executed in kernel build time and generates
>>>> + * lib/crc64table.h. This header is included by lib/crc64.c for
>>>> + * the table-driver CRC64 calculation.
>>>> + *
>>>> + * See lib/crc64.c for more information about which specification
>>>> + * and polynomical arithmetic that gen_crc64table.c follows to
>>>> + * generate the lookup table.
>>>> + *
>>>> + * Copyright 2018 SUSE Linux.
>>>> + *   Author: Coly Li <colyli@suse.de>
>>>> + *
>>>> + */
>>>> +
>>>> +#include <inttypes.h>
>>>> +#include <linux/swab.h>
>>>> +#include <stdio.h>
>>>> +#include "../usr/include/asm/byteorder.h"
>>>> +
>>>> +#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
>>>
>>> Okay, that's actually the ECMA-182 polynomial in "big endian" form (highest
>>> order bit is the coefficient of x^63, lowest order bit is the coefficient of
>>> x^0), so you're actually doing a "big endian" CRC.  So everything in your patch
>>> series that claims it's a little endian or "le" CRC is incorrect.
>>>
>>>> +
>>>> +#ifdef __LITTLE_ENDIAN
>>>> +#  define cpu_to_le64(x) ((__le64)(x))
>>>> +#else
>>>> +#  define cpu_to_le64(x) ((__le64)__swab64(x))
>>>> +#endif
>>>> +
>>>> +static int64_t crc64_table[256] = {0,};
>>>> +
>>>> +static void generate_crc64_table(void)
>>>> +{
>>>> +	uint64_t i, j, c, crc;
>>>> +
>>>> +	for (i = 0; i < 256; i++) {
>>>> +		crc = 0;
>>>> +		c = i << 56;
>>>> +
>>>> +		for (j = 0; j < 8; j++) {
>>>> +			if ((crc ^ c) & 0x8000000000000000ULL)
>>>> +				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
>>>> +			else
>>>> +				crc <<= 1;
>>>> +			c <<= 1;
>>>
>>> See here, it's shifting out the most significant bit, which means it's the
>>> coefficient of the x^63 term ("big endian" or "normal" convention), not the x^0
>>> term ("little endian" or "reversed" convention).
>>
>> I see your point here. I am not expert in coding theory, the knowledge I
>> have is from wikipedia, ECMA-182 and the document from Dr. Ross
>> Williams. From ECMA-182 document, I don't see any word with 'big
>> endian', so I take it as a standard poly and regardless the byte order.
>>
>> And on wikepedia page
>> https://en.wikipedia.org/wiki/Cyclic_redundancy_check , CRC-64-ECMA
>> references the same poly and call "0x42F0E1EBA9EA3693" as normal poly,
>> which one links to polynomial
>> 	"x^64 + x^62 + x^57 + x^55 + x^54 + ....x^7 + x^4 + x + 1"
>> if I understand correctly. But from your information, it seems the
>> polynomial in generate_crc64_table() is x^64 + x^61 ..... Maybe I
>> misunderstand you, could you please give me more hint ?
> 
> As I said, the "normal" convention is the same as "big endian", and the
> "reversed" convention is the same as "little endian" (again, meaning "bitwise"
> endianness, not the usual "bytewise" endianness).  The polynomial is correct but
> you are claiming the polynomial coefficients are mapped to bits in a different
> order than they actually are.

Copied, thanks for the hint :-)

Coly Li
Coly Li July 17, 2018, 2:29 p.m. UTC | #8
On 2018/7/17 3:34 PM, Coly Li wrote:
> On 2018/7/17 3:13 PM, Eric Biggers wrote:
>> On Tue, Jul 17, 2018 at 02:25:24PM +0800, Coly Li wrote:
>>> On 2018/7/17 11:34 AM, Eric Biggers wrote:
>>>> Hi Coly,
>>>>
>>>> On Tue, Jul 17, 2018 at 12:55:05AM +0800, Coly Li wrote:
>>>>> This patch adds the re-write crc64 calculation routines for Linux kernel.
>>>>> The CRC64 polynomical arithmetic follows ECMA-182 specification, inspired
>>>>> by CRC paper of Dr. Ross N. Williams
>>>>> (see http://www.ross.net/crc/download/crc_v3.txt) and other public domain
>>>>> implementations.
>>>>>
>>>>> All the changes work in this way,
>>>>> - When Linux kernel is built, host program lib/gen_crc64table.c will be
>>>>>   compiled to lib/gen_crc64table and executed.
>>>>> - The output of gen_crc64table execution is an array called as lookup
>>>>>   table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64bits-long
>>>>>   numbers, this talbe is dumped into header file lib/crc64table.h.
>>>>> - Then the header file is included by lib/crc64.c for normal 64bit crc
>>>>>   calculation.
>>>>> - Function declaration of the crc64 calculation routines is placed in
>>>>>   include/linux/crc64.h
>>>>>
>>>> [...]
>>>>> diff --git a/lib/crc64.c b/lib/crc64.c
>>>>> new file mode 100644
>>>>> index 000000000000..03f078303bd3
>>>>> --- /dev/null
>>>>> +++ b/lib/crc64.c
>>>>> @@ -0,0 +1,71 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>> +/*
>>>>> + * Normal 64bit CRC calculation.
>>>>> + *
>>>>> + * This is a basic crc64 implementation following ECMA-182 specification,
>>>>> + * which can be found from,
>>>>> + * http://www.ecma-international.org/publications/standards/Ecma-182.htm
>>>>> + *
>>>>> + * Dr. Ross N. Williams has a great document to introduce the idea of CRC
>>>>> + * algorithm, here the CRC64 code is also inspired by the table-driven
>>>>> + * algorithm and detail example from this paper. This paper can be found
>>>>> + * from,
>>>>> + * http://www.ross.net/crc/download/crc_v3.txt
>>>>> + *
>>>>> + * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
>>>>> + * calculation, which is generated by gen_crc64table.c in kernel build
>>>>> + * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
>>>>> + * as well, which is defined as,
>>>>> + *
>>>>> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
>>>>> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
>>>>> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
>>>>> + * x^7 + x^4 + x + 1
>>>>> + *
>>>>> + * Copyright 2018 SUSE Linux.
>>>>> + *   Author: Coly Li <colyli@suse.de>
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include <linux/module.h>
>>>>> +#include <uapi/linux/types.h>
>>>>> +#include "crc64table.h"
>>>>> +
>>>>> +MODULE_DESCRIPTION("CRC64 calculations");
>>>>> +MODULE_LICENSE("GPL");
>>>>> +
>>>>> +__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
>>>>> +{
>>>>> +	size_t i, t;
>>>>> +
>>>>> +	const unsigned char *p = _p;
>>>>> +
>>>>> +	for (i = 0; i < len; i++) {
>>>>> +		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
>>>>> +		crc = crc64table_le[t] ^ (crc << 8);
>>>>> +	}
>>>>> +
>>>>> +	return crc;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(crc64_le_update);
>>>>> +
>>>>> +__le64 crc64_le(const void *p, size_t len)
>>>>> +{
>>>>> +	__le64 crc = 0x0000000000000000ULL;
>>>>> +
>>>>> +	crc = crc64_le_update(crc, p, len);
>>>>> +
>>>>> +	return crc;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(crc64_le);
>>>>> +
>>>>> +/* For checksum calculation in drivers/md/bcache/ */
>>>>> +__le64 crc64_le_bch(const void *p, size_t len)
>>>>> +{
>>>>> +	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
>>>>> +
>>>>> +	crc = crc64_le_update(crc, p, len);
>>>>> +
>>>>> +	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(crc64_le_bch);

[snipped]

>>> I see your point here. I am not expert in coding theory, the knowledge I
>>> have is from wikipedia, ECMA-182 and the document from Dr. Ross
>>> Williams. From ECMA-182 document, I don't see any word with 'big
>>> endian', so I take it as a standard poly and regardless the byte order.
>>>
>>> And on wikepedia page
>>> https://en.wikipedia.org/wiki/Cyclic_redundancy_check , CRC-64-ECMA
>>> references the same poly and call "0x42F0E1EBA9EA3693" as normal poly,
>>> which one links to polynomial
>>> 	"x^64 + x^62 + x^57 + x^55 + x^54 + ....x^7 + x^4 + x + 1"
>>> if I understand correctly. But from your information, it seems the
>>> polynomial in generate_crc64_table() is x^64 + x^61 ..... Maybe I
>>> misunderstand you, could you please give me more hint ?
>>
>> As I said, the "normal" convention is the same as "big endian", and the
>> "reversed" convention is the same as "little endian" (again, meaning "bitwise"
>> endianness, not the usual "bytewise" endianness).  The polynomial is correct but
>> you are claiming the polynomial coefficients are mapped to bits in a different
>> order than they actually are.

In v3 series, I remove _le and __le64 from the patch, that means the
calculation is not explicit resitricted to little endian, and the caller
should make sure the data in a proper byte order. The result is, there
is no misleading naming issue. Since so far the only user is bcache, and
potential user is bcachefs, the developers should know how to use
crc64_update() and crc64_bch() properly.

So now the routines are named as crc64(), crc64_update(), crc64_bch(),
and return value is u64.

I hope now the misleading can be avoided.

Thanks.

Coly Li
diff mbox

Patch

diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644
index 000000000000..cbd10a47d861
--- /dev/null
+++ b/include/linux/crc64.h
@@ -0,0 +1,15 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * crc64.h
+ *
+ * See lib/crc64.c for the related specification and polynomical arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+__le64 crc64_le_update(__le64 crc, const void *_p, size_t len);
+__le64 crc64_le(const void *p, size_t len);
+__le64 crc64_le_bch(const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/lib/.gitignore b/lib/.gitignore
index 09aae85418ab..f2a39c9e5485 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -2,5 +2,7 @@ 
 # Generated files
 #
 gen_crc32table
+gen_crc64table
 crc32table.h
+crc64table.h
 oid_registry_data.c
diff --git a/lib/Makefile b/lib/Makefile
index 90dc5520b784..40c215181687 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -102,6 +102,7 @@  obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
+obj-$(CONFIG_CRC64)     += crc64.o
 obj-$(CONFIG_CRC32_SELFTEST)	+= crc32test.o
 obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
@@ -215,7 +216,9 @@  obj-$(CONFIG_FONT_SUPPORT) += fonts/
 obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
 
 hostprogs-y	:= gen_crc32table
+hostprogs-y	+= gen_crc64table
 clean-files	:= crc32table.h
+clean-files	+= crc64table.h
 
 $(obj)/crc32.o: $(obj)/crc32table.h
 
@@ -225,6 +228,14 @@  quiet_cmd_crc32 = GEN     $@
 $(obj)/crc32table.h: $(obj)/gen_crc32table
 	$(call cmd,crc32)
 
+$(obj)/crc64.o: $(obj)/crc64table.h
+
+quiet_cmd_crc64 = GEN     $@
+      cmd_crc64 = $< > $@
+
+$(obj)/crc64table.h: $(obj)/gen_crc64table
+	$(call cmd,crc64)
+
 #
 # Build a fast OID lookip registry from include/linux/oid_registry.h
 #
diff --git a/lib/crc64.c b/lib/crc64.c
new file mode 100644
index 000000000000..03f078303bd3
--- /dev/null
+++ b/lib/crc64.c
@@ -0,0 +1,71 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table_le[256] is the lookup table of a table-driver 64bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ *
+ */
+
+#include <linux/module.h>
+#include <uapi/linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL");
+
+__le64 crc64_le_update(__le64 crc, const void *_p, size_t len)
+{
+	size_t i, t;
+
+	const unsigned char *p = _p;
+
+	for (i = 0; i < len; i++) {
+		t = ((crc >> 56) ^ (__le64)(*p++)) & 0xFF;
+		crc = crc64table_le[t] ^ (crc << 8);
+	}
+
+	return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_le_update);
+
+__le64 crc64_le(const void *p, size_t len)
+{
+	__le64 crc = 0x0000000000000000ULL;
+
+	crc = crc64_le_update(crc, p, len);
+
+	return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_le);
+
+/* For checksum calculation in drivers/md/bcache/ */
+__le64 crc64_le_bch(const void *p, size_t len)
+{
+	__le64 crc = 0xFFFFFFFFFFFFFFFFULL;
+
+	crc = crc64_le_update(crc, p, len);
+
+	return (crc ^ 0xFFFFFFFFFFFFFFFFULL);
+}
+EXPORT_SYMBOL_GPL(crc64_le_bch);
diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
new file mode 100644
index 000000000000..5f292f287498
--- /dev/null
+++ b/lib/gen_crc64table.c
@@ -0,0 +1,77 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate lookup table for the talbe-driven CRC64 calculation.
+ *
+ * gen_crc64table is executed in kernel build time and generates
+ * lib/crc64table.h. This header is included by lib/crc64.c for
+ * the table-driver CRC64 calculation.
+ *
+ * See lib/crc64.c for more information about which specification
+ * and polynomical arithmetic that gen_crc64table.c follows to
+ * generate the lookup table.
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ *
+ */
+
+#include <inttypes.h>
+#include <linux/swab.h>
+#include <stdio.h>
+#include "../usr/include/asm/byteorder.h"
+
+#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
+
+#ifdef __LITTLE_ENDIAN
+#  define cpu_to_le64(x) ((__le64)(x))
+#else
+#  define cpu_to_le64(x) ((__le64)__swab64(x))
+#endif
+
+static int64_t crc64_table[256] = {0,};
+
+static void generate_crc64_table(void)
+{
+	uint64_t i, j, c, crc;
+
+	for (i = 0; i < 256; i++) {
+		crc = 0;
+		c = i << 56;
+
+		for (j = 0; j < 8; j++) {
+			if ((crc ^ c) & 0x8000000000000000ULL)
+				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
+			else
+				crc <<= 1;
+			c <<= 1;
+		}
+
+		crc64_table[i] = crc;
+	}
+
+}
+
+static void print_crc64le_table(void)
+{
+	int i;
+
+	printf("/* this file is generated - do not edit */\n\n");
+	printf("#include <uapi/linux/types.h>\n");
+	printf("#include <linux/cache.h>\n\n");
+	printf("static const __le64 ____cacheline_aligned crc64table_le[256] = {\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t0x%016" PRIx64 "ULL", cpu_to_le64(crc64_table[i]));
+		if (i & 0x1)
+			printf(",\n");
+		else
+			printf(", ");
+	}
+	printf("};\n");
+}
+
+int main(int argc, char *argv[])
+{
+	generate_crc64_table();
+	print_crc64le_table();
+	return 0;
+}