linux/lib/crc32.c
Ard Biesheuvel 16739efac6 crypto: crc32c - Provide crc32c-arch driver for accelerated library code
crc32c-generic is currently backed by the architecture's CRC-32c library
code, which may offer a variety of implementations depending on the
capabilities of the platform. These are not covered by the crypto
subsystem's fuzz testing capabilities because crc32c-generic is the
reference driver that the fuzzing logic uses as a source of truth.

Fix this by providing a crc32c-arch implementation which is based on the
arch library code if available, and modify crc32c-generic so it is
always based on the generic C implementation. If the arch has no CRC-32c
library code, this change does nothing.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2024-10-28 18:33:10 +08:00

349 lines
9.4 KiB
C

/*
* Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin
* cleaned up code to current version of sparse and added the slicing-by-8
* algorithm to the closely similar existing slicing-by-4 algorithm.
*
* Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
* Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks!
* Code was from the public domain, copyright abandoned. Code was
* subsequently included in the kernel, thus was re-licensed under the
* GNU GPL v2.
*
* Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
* Same crc32 function was used in 5 other places in the kernel.
* I made one version, and deleted the others.
* There are various incantations of crc32(). Some use a seed of 0 or ~0.
* Some xor at the end with ~0. The generic crc32() function takes
* seed as an argument, and doesn't xor at the end. Then individual
* users can do whatever they need.
* drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
* fs/jffs2 uses seed 0, doesn't xor with ~0.
* fs/partitions/efi.c uses seed ~0, xor's with ~0.
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
/* see: Documentation/staging/crc32.rst for a description of algorithms */
#include <linux/crc32.h>
#include <linux/crc32poly.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/sched.h>
#include "crc32defs.h"
#if CRC_LE_BITS > 8
# define tole(x) ((__force u32) cpu_to_le32(x))
#else
# define tole(x) (x)
#endif
#if CRC_BE_BITS > 8
# define tobe(x) ((__force u32) cpu_to_be32(x))
#else
# define tobe(x) (x)
#endif
#include "crc32table.h"
MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
MODULE_DESCRIPTION("Various CRC32 calculations");
MODULE_LICENSE("GPL");
#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
/* implements slicing-by-4 or slicing-by-8 algorithm */
static inline u32 __pure
crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
{
# ifdef __LITTLE_ENDIAN
# define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8)
# define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
# define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
# else
# define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
# define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
# define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
# endif
const u32 *b;
size_t rem_len;
# ifdef CONFIG_X86
size_t i;
# endif
const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
# if CRC_LE_BITS != 32
const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
# endif
u32 q;
/* Align it */
if (unlikely((long)buf & 3 && len)) {
do {
DO_CRC(*buf++);
} while ((--len) && ((long)buf)&3);
}
# if CRC_LE_BITS == 32
rem_len = len & 3;
len = len >> 2;
# else
rem_len = len & 7;
len = len >> 3;
# endif
b = (const u32 *)buf;
# ifdef CONFIG_X86
--b;
for (i = 0; i < len; i++) {
# else
for (--b; len; --len) {
# endif
q = crc ^ *++b; /* use pre increment for speed */
# if CRC_LE_BITS == 32
crc = DO_CRC4;
# else
crc = DO_CRC8;
q = *++b;
crc ^= DO_CRC4;
# endif
}
len = rem_len;
/* And the last few bytes */
if (len) {
u8 *p = (u8 *)(b + 1) - 1;
# ifdef CONFIG_X86
for (i = 0; i < len; i++)
DO_CRC(*++p); /* use pre increment for speed */
# else
do {
DO_CRC(*++p); /* use pre increment for speed */
} while (--len);
# endif
}
return crc;
#undef DO_CRC
#undef DO_CRC4
#undef DO_CRC8
}
#endif
/**
* crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
* CRC32/CRC32C
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other
* uses, or the previous crc32/crc32c value if computing incrementally.
* @p: pointer to buffer over which CRC32/CRC32C is run
* @len: length of buffer @p
* @tab: little-endian Ethernet table
* @polynomial: CRC32/CRC32c LE polynomial
*/
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
size_t len, const u32 (*tab)[256],
u32 polynomial)
{
#if CRC_LE_BITS == 1
int i;
while (len--) {
crc ^= *p++;
for (i = 0; i < 8; i++)
crc = (crc >> 1) ^ ((crc & 1) ? polynomial : 0);
}
# elif CRC_LE_BITS == 2
while (len--) {
crc ^= *p++;
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
crc = (crc >> 2) ^ tab[0][crc & 3];
}
# elif CRC_LE_BITS == 4
while (len--) {
crc ^= *p++;
crc = (crc >> 4) ^ tab[0][crc & 15];
crc = (crc >> 4) ^ tab[0][crc & 15];
}
# elif CRC_LE_BITS == 8
/* aka Sarwate algorithm */
while (len--) {
crc ^= *p++;
crc = (crc >> 8) ^ tab[0][crc & 255];
}
# else
crc = (__force u32) __cpu_to_le32(crc);
crc = crc32_body(crc, p, len, tab);
crc = __le32_to_cpu((__force __le32)crc);
#endif
return crc;
}
#if CRC_LE_BITS == 1
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
}
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
}
#endif
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);
u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
EXPORT_SYMBOL(crc32_le_base);
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
EXPORT_SYMBOL(__crc32c_le_base);
u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
* represents the highest power of x, and the msbit represents x^0.
*/
static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
{
u32 product = x & 1 ? y : 0;
int i;
for (i = 0; i < 31; i++) {
product = (product >> 1) ^ (product & 1 ? modulus : 0);
x >>= 1;
product ^= x & 1 ? y : 0;
}
return product;
}
/**
* crc32_generic_shift - Append @len 0 bytes to crc, in logarithmic time
* @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
* @len: The number of bytes. @crc is multiplied by x^(8*@len)
* @polynomial: The modulus used to reduce the result to 32 bits.
*
* It's possible to parallelize CRC computations by computing a CRC
* over separate ranges of a buffer, then summing them.
* This shifts the given CRC by 8*len bits (i.e. produces the same effect
* as appending len bytes of zero to the data), in time proportional
* to log(len).
*/
static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
u32 polynomial)
{
u32 power = polynomial; /* CRC of x^32 */
int i;
/* Shift up to 32 bits in the simple linear way */
for (i = 0; i < 8 * (int)(len & 3); i++)
crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
len >>= 2;
if (!len)
return crc;
for (;;) {
/* "power" is x^(2^i), modulo the polynomial */
if (len & 1)
crc = gf2_multiply(crc, power, polynomial);
len >>= 1;
if (!len)
break;
/* Square power, advancing to x^(2^(i+1)) */
power = gf2_multiply(power, power, polynomial);
}
return crc;
}
u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
{
return crc32_generic_shift(crc, len, CRC32_POLY_LE);
}
u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
{
return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
}
EXPORT_SYMBOL(crc32_le_shift);
EXPORT_SYMBOL(__crc32c_le_shift);
/**
* crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
* @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
* other uses, or the previous crc32 value if computing incrementally.
* @p: pointer to buffer over which CRC32 is run
* @len: length of buffer @p
* @tab: big-endian Ethernet table
* @polynomial: CRC32 BE polynomial
*/
static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
size_t len, const u32 (*tab)[256],
u32 polynomial)
{
#if CRC_BE_BITS == 1
int i;
while (len--) {
crc ^= *p++ << 24;
for (i = 0; i < 8; i++)
crc =
(crc << 1) ^ ((crc & 0x80000000) ? polynomial :
0);
}
# elif CRC_BE_BITS == 2
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
crc = (crc << 2) ^ tab[0][crc >> 30];
}
# elif CRC_BE_BITS == 4
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 4) ^ tab[0][crc >> 28];
crc = (crc << 4) ^ tab[0][crc >> 28];
}
# elif CRC_BE_BITS == 8
while (len--) {
crc ^= *p++ << 24;
crc = (crc << 8) ^ tab[0][crc >> 24];
}
# else
crc = (__force u32) __cpu_to_be32(crc);
crc = crc32_body(crc, p, len, tab);
crc = __be32_to_cpu((__force __be32)crc);
# endif
return crc;
}
#if CRC_BE_BITS == 1
u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
}
#else
u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
{
return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
}
#endif
EXPORT_SYMBOL(crc32_be);