1 /*
   2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.  Oracle designates this
   7  * particular file as subject to the "Classpath" exception as provided
   8  * by Oracle in the LICENSE file that accompanied this code.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  */
  24 
  25 /* crc32.c -- compute the CRC-32 of a data stream
  26  * Copyright (C) 1995-2022 Mark Adler
  27  * For conditions of distribution and use, see copyright notice in zlib.h
  28  *
  29  * This interleaved implementation of a CRC makes use of pipelined multiple
  30  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
  31  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
  32  */
  33 
  34 /* @(#) $Id$ */
  35 
  36 /*
  37   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
  38   protection on the static variables used to control the first-use generation
  39   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
  40   first call get_crc_table() to initialize the tables before allowing more than
  41   one thread to use crc32().
  42 
  43   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
  44   produced, so that this one source file can be compiled to an executable.
  45  */
  46 
  47 #ifdef MAKECRCH
  48 #  include <stdio.h>
  49 #  ifndef DYNAMIC_CRC_TABLE
  50 #    define DYNAMIC_CRC_TABLE
  51 #  endif /* !DYNAMIC_CRC_TABLE */
  52 #endif /* MAKECRCH */
  53 
  54 #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
  55 
  56  /*
  57   A CRC of a message is computed on N braids of words in the message, where
  58   each word consists of W bytes (4 or 8). If N is 3, for example, then three
  59   running sparse CRCs are calculated respectively on each braid, at these
  60   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
  61   This is done starting at a word boundary, and continues until as many blocks
  62   of N * W bytes as are available have been processed. The results are combined
  63   into a single CRC at the end. For this code, N must be in the range 1..6 and
  64   W must be 4 or 8. The upper limit on N can be increased if desired by adding
  65   more #if blocks, extending the patterns apparent in the code. In addition,
  66   crc32.h would need to be regenerated, if the maximum N value is increased.
  67 
  68   N and W are chosen empirically by benchmarking the execution time on a given
  69   processor. The choices for N and W below were based on testing on Intel Kaby
  70   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
  71   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
  72   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
  73   They were all tested with either gcc or clang, all using the -O3 optimization
  74   level. Your mileage may vary.
  75  */
  76 
  77 /* Define N */
  78 #ifdef Z_TESTN
  79 #  define N Z_TESTN
  80 #else
  81 #  define N 5
  82 #endif
  83 #if N < 1 || N > 6
  84 #  error N must be in 1..6
  85 #endif
  86 
  87 /*
  88   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
  89   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
  90   that bytes are eight bits.
  91  */
  92 
  93 /*
  94   Define W and the associated z_word_t type. If W is not defined, then a
  95   braided calculation is not used, and the associated tables and code are not
  96   compiled.
  97  */
  98 #ifdef Z_TESTW
  99 #  if Z_TESTW-1 != -1
 100 #    define W Z_TESTW
 101 #  endif
 102 #else
 103 #  ifdef MAKECRCH
 104 #    define W 8         /* required for MAKECRCH */
 105 #  else
 106 #    if defined(__x86_64__) || defined(__aarch64__)
 107 #      define W 8
 108 #    else
 109 #      define W 4
 110 #    endif
 111 #  endif
 112 #endif
 113 #ifdef W
 114 #  if W == 8 && defined(Z_U8)
 115      typedef Z_U8 z_word_t;
 116 #  elif defined(Z_U4)
 117 #    undef W
 118 #    define W 4
 119      typedef Z_U4 z_word_t;
 120 #  else
 121 #    undef W
 122 #  endif
 123 #endif
 124 
 125 /* If available, use the ARM processor CRC32 instruction. */
 126 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
 127 #  define ARMCRC32
 128 #endif
 129 
 130 /* Local functions. */
 131 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
 132 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
 133 
 134 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 135     local z_word_t byte_swap OF((z_word_t word));
 136 #endif
 137 
 138 #if defined(W) && !defined(ARMCRC32)
 139     local z_crc_t crc_word OF((z_word_t data));
 140     local z_word_t crc_word_big OF((z_word_t data));
 141 #endif
 142 
 143 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 144 /*
 145   Swap the bytes in a z_word_t to convert between little and big endian. Any
 146   self-respecting compiler will optimize this to a single machine byte-swap
 147   instruction, if one is available. This assumes that word_t is either 32 bits
 148   or 64 bits.
 149  */
 150 local z_word_t byte_swap(word)
 151     z_word_t word;
 152 {
 153 #  if W == 8
 154     return
 155         (word & 0xff00000000000000) >> 56 |
 156         (word & 0xff000000000000) >> 40 |
 157         (word & 0xff0000000000) >> 24 |
 158         (word & 0xff00000000) >> 8 |
 159         (word & 0xff000000) << 8 |
 160         (word & 0xff0000) << 24 |
 161         (word & 0xff00) << 40 |
 162         (word & 0xff) << 56;
 163 #  else   /* W == 4 */
 164     return
 165         (word & 0xff000000) >> 24 |
 166         (word & 0xff0000) >> 8 |
 167         (word & 0xff00) << 8 |
 168         (word & 0xff) << 24;
 169 #  endif
 170 }
 171 #endif
 172 
 173 /* CRC polynomial. */
 174 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
 175 
 176 #ifdef DYNAMIC_CRC_TABLE
 177 
 178 local z_crc_t FAR crc_table[256];
 179 local z_crc_t FAR x2n_table[32];
 180 local void make_crc_table OF((void));
 181 #ifdef W
 182    local z_word_t FAR crc_big_table[256];
 183    local z_crc_t FAR crc_braid_table[W][256];
 184    local z_word_t FAR crc_braid_big_table[W][256];
 185    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
 186 #endif
 187 #ifdef MAKECRCH
 188    local void write_table OF((FILE *, const z_crc_t FAR *, int));
 189    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
 190    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
 191 #endif /* MAKECRCH */
 192 
 193 /*
 194   Define a once() function depending on the availability of atomics. If this is
 195   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
 196   multiple threads, and if atomics are not available, then get_crc_table() must
 197   be called to initialize the tables and must return before any threads are
 198   allowed to compute or combine CRCs.
 199  */
 200 
 201 /* Definition of once functionality. */
 202 typedef struct once_s once_t;
 203 local void once OF((once_t *, void (*)(void)));
 204 
 205 /* Check for the availability of atomics. */
 206 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
 207     !defined(__STDC_NO_ATOMICS__)
 208 
 209 #include <stdatomic.h>
 210 
 211 /* Structure for once(), which must be initialized with ONCE_INIT. */
 212 struct once_s {
 213     atomic_flag begun;
 214     atomic_int done;
 215 };
 216 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
 217 
 218 /*
 219   Run the provided init() function exactly once, even if multiple threads
 220   invoke once() at the same time. The state must be a once_t initialized with
 221   ONCE_INIT.
 222  */
 223 local void once(state, init)
 224     once_t *state;
 225     void (*init)(void);
 226 {
 227     if (!atomic_load(&state->done)) {
 228         if (atomic_flag_test_and_set(&state->begun))
 229             while (!atomic_load(&state->done))
 230                 ;
 231         else {
 232             init();
 233             atomic_store(&state->done, 1);
 234         }
 235     }
 236 }
 237 
 238 #else   /* no atomics */
 239 
 240 /* Structure for once(), which must be initialized with ONCE_INIT. */
 241 struct once_s {
 242     volatile int begun;
 243     volatile int done;
 244 };
 245 #define ONCE_INIT {0, 0}
 246 
 247 /* Test and set. Alas, not atomic, but tries to minimize the period of
 248    vulnerability. */
 249 local int test_and_set OF((int volatile *));
 250 local int test_and_set(flag)
 251     int volatile *flag;
 252 {
 253     int was;
 254 
 255     was = *flag;
 256     *flag = 1;
 257     return was;
 258 }
 259 
 260 /* Run the provided init() function once. This is not thread-safe. */
 261 local void once(state, init)
 262     once_t *state;
 263     void (*init)(void);
 264 {
 265     if (!state->done) {
 266         if (test_and_set(&state->begun))
 267             while (!state->done)
 268                 ;
 269         else {
 270             init();
 271             state->done = 1;
 272         }
 273     }
 274 }
 275 
 276 #endif
 277 
 278 /* State for once(). */
 279 local once_t made = ONCE_INIT;
 280 
 281 /*
 282   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
 283   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
 284 
 285   Polynomials over GF(2) are represented in binary, one bit per coefficient,
 286   with the lowest powers in the most significant bit. Then adding polynomials
 287   is just exclusive-or, and multiplying a polynomial by x is a right shift by
 288   one. If we call the above polynomial p, and represent a byte as the
 289   polynomial q, also with the lowest power in the most significant bit (so the
 290   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
 291   where a mod b means the remainder after dividing a by b.
 292 
 293   This calculation is done using the shift-register method of multiplying and
 294   taking the remainder. The register is initialized to zero, and for each
 295   incoming bit, x^32 is added mod p to the register if the bit is a one (where
 296   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
 297   (which is shifting right by one and adding x^32 mod p if the bit shifted out
 298   is a one). We start with the highest power (least significant bit) of q and
 299   repeat for all eight bits of q.
 300 
 301   The table is simply the CRC of all possible eight bit values. This is all the
 302   information needed to generate CRCs on data a byte at a time for all
 303   combinations of CRC register values and incoming bytes.
 304  */
 305 
 306 local void make_crc_table()
 307 {
 308     unsigned i, j, n;
 309     z_crc_t p;
 310 
 311     /* initialize the CRC of bytes tables */
 312     for (i = 0; i < 256; i++) {
 313         p = i;
 314         for (j = 0; j < 8; j++)
 315             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
 316         crc_table[i] = p;
 317 #ifdef W
 318         crc_big_table[i] = byte_swap(p);
 319 #endif
 320     }
 321 
 322     /* initialize the x^2^n mod p(x) table */
 323     p = (z_crc_t)1 << 30;         /* x^1 */
 324     x2n_table[0] = p;
 325     for (n = 1; n < 32; n++)
 326         x2n_table[n] = p = multmodp(p, p);
 327 
 328 #ifdef W
 329     /* initialize the braiding tables -- needs x2n_table[] */
 330     braid(crc_braid_table, crc_braid_big_table, N, W);
 331 #endif
 332 
 333 #ifdef MAKECRCH
 334     {
 335         /*
 336           The crc32.h header file contains tables for both 32-bit and 64-bit
 337           z_word_t's, and so requires a 64-bit type be available. In that case,
 338           z_word_t must be defined to be 64-bits. This code then also generates
 339           and writes out the tables for the case that z_word_t is 32 bits.
 340          */
 341 #if !defined(W) || W != 8
 342 #  error Need a 64-bit integer type in order to generate crc32.h.
 343 #endif
 344         FILE *out;
 345         int k, n;
 346         z_crc_t ltl[8][256];
 347         z_word_t big[8][256];
 348 
 349         out = fopen("crc32.h", "w");
 350         if (out == NULL) return;
 351 
 352         /* write out little-endian CRC table to crc32.h */
 353         fprintf(out,
 354             "/* crc32.h -- tables for rapid CRC calculation\n"
 355             " * Generated automatically by crc32.c\n */\n"
 356             "\n"
 357             "local const z_crc_t FAR crc_table[] = {\n"
 358             "    ");
 359         write_table(out, crc_table, 256);
 360         fprintf(out,
 361             "};\n");
 362 
 363         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
 364         fprintf(out,
 365             "\n"
 366             "#ifdef W\n"
 367             "\n"
 368             "#if W == 8\n"
 369             "\n"
 370             "local const z_word_t FAR crc_big_table[] = {\n"
 371             "    ");
 372         write_table64(out, crc_big_table, 256);
 373         fprintf(out,
 374             "};\n");
 375 
 376         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
 377         fprintf(out,
 378             "\n"
 379             "#else /* W == 4 */\n"
 380             "\n"
 381             "local const z_word_t FAR crc_big_table[] = {\n"
 382             "    ");
 383         write_table32hi(out, crc_big_table, 256);
 384         fprintf(out,
 385             "};\n"
 386             "\n"
 387             "#endif\n");
 388 
 389         /* write out braid tables for each value of N */
 390         for (n = 1; n <= 6; n++) {
 391             fprintf(out,
 392             "\n"
 393             "#if N == %d\n", n);
 394 
 395             /* compute braid tables for this N and 64-bit word_t */
 396             braid(ltl, big, n, 8);
 397 
 398             /* write out braid tables for 64-bit z_word_t to crc32.h */
 399             fprintf(out,
 400             "\n"
 401             "#if W == 8\n"
 402             "\n"
 403             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
 404             for (k = 0; k < 8; k++) {
 405                 fprintf(out, "   {");
 406                 write_table(out, ltl[k], 256);
 407                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
 408             }
 409             fprintf(out,
 410             "};\n"
 411             "\n"
 412             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
 413             for (k = 0; k < 8; k++) {
 414                 fprintf(out, "   {");
 415                 write_table64(out, big[k], 256);
 416                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
 417             }
 418             fprintf(out,
 419             "};\n");
 420 
 421             /* compute braid tables for this N and 32-bit word_t */
 422             braid(ltl, big, n, 4);
 423 
 424             /* write out braid tables for 32-bit z_word_t to crc32.h */
 425             fprintf(out,
 426             "\n"
 427             "#else /* W == 4 */\n"
 428             "\n"
 429             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
 430             for (k = 0; k < 4; k++) {
 431                 fprintf(out, "   {");
 432                 write_table(out, ltl[k], 256);
 433                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
 434             }
 435             fprintf(out,
 436             "};\n"
 437             "\n"
 438             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
 439             for (k = 0; k < 4; k++) {
 440                 fprintf(out, "   {");
 441                 write_table32hi(out, big[k], 256);
 442                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
 443             }
 444             fprintf(out,
 445             "};\n"
 446             "\n"
 447             "#endif\n"
 448             "\n"
 449             "#endif\n");
 450         }
 451         fprintf(out,
 452             "\n"
 453             "#endif\n");
 454 
 455         /* write out zeros operator table to crc32.h */
 456         fprintf(out,
 457             "\n"
 458             "local const z_crc_t FAR x2n_table[] = {\n"
 459             "    ");
 460         write_table(out, x2n_table, 32);
 461         fprintf(out,
 462             "};\n");
 463         fclose(out);
 464     }
 465 #endif /* MAKECRCH */
 466 }
 467 
 468 #ifdef MAKECRCH
 469 
 470 /*
 471    Write the 32-bit values in table[0..k-1] to out, five per line in
 472    hexadecimal separated by commas.
 473  */
 474 local void write_table(out, table, k)
 475     FILE *out;
 476     const z_crc_t FAR *table;
 477     int k;
 478 {
 479     int n;
 480 
 481     for (n = 0; n < k; n++)
 482         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 483                 (unsigned long)(table[n]),
 484                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 485 }
 486 
 487 /*
 488    Write the high 32-bits of each value in table[0..k-1] to out, five per line
 489    in hexadecimal separated by commas.
 490  */
 491 local void write_table32hi(out, table, k)
 492 FILE *out;
 493 const z_word_t FAR *table;
 494 int k;
 495 {
 496     int n;
 497 
 498     for (n = 0; n < k; n++)
 499         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 500                 (unsigned long)(table[n] >> 32),
 501                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 502 }
 503 
 504 /*
 505   Write the 64-bit values in table[0..k-1] to out, three per line in
 506   hexadecimal separated by commas. This assumes that if there is a 64-bit
 507   type, then there is also a long long integer type, and it is at least 64
 508   bits. If not, then the type cast and format string can be adjusted
 509   accordingly.
 510  */
 511 local void write_table64(out, table, k)
 512     FILE *out;
 513     const z_word_t FAR *table;
 514     int k;
 515 {
 516     int n;
 517 
 518     for (n = 0; n < k; n++)
 519         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
 520                 (unsigned long long)(table[n]),
 521                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
 522 }
 523 
 524 /* Actually do the deed. */
 525 int main()
 526 {
 527     make_crc_table();
 528     return 0;
 529 }
 530 
 531 #endif /* MAKECRCH */
 532 
 533 #ifdef W
 534 /*
 535   Generate the little and big-endian braid tables for the given n and z_word_t
 536   size w. Each array must have room for w blocks of 256 elements.
 537  */
 538 local void braid(ltl, big, n, w)
 539     z_crc_t ltl[][256];
 540     z_word_t big[][256];
 541     int n;
 542     int w;
 543 {
 544     int k;
 545     z_crc_t i, p, q;
 546     for (k = 0; k < w; k++) {
 547         p = x2nmodp((n * w + 3 - k) << 3, 0);
 548         ltl[k][0] = 0;
 549         big[w - 1 - k][0] = 0;
 550         for (i = 1; i < 256; i++) {
 551             ltl[k][i] = q = multmodp(i << 24, p);
 552             big[w - 1 - k][i] = byte_swap(q);
 553         }
 554     }
 555 }
 556 #endif
 557 
 558 #else /* !DYNAMIC_CRC_TABLE */
 559 /* ========================================================================
 560  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
 561  * of x for combining CRC-32s, all made by make_crc_table().
 562  */
 563 #include "crc32.h"
 564 #endif /* DYNAMIC_CRC_TABLE */
 565 
 566 /* ========================================================================
 567  * Routines used for CRC calculation. Some are also required for the table
 568  * generation above.
 569  */
 570 
 571 /*
 572   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
 573   reflected. For speed, this requires that a not be zero.
 574  */
 575 local z_crc_t multmodp(a, b)
 576     z_crc_t a;
 577     z_crc_t b;
 578 {
 579     z_crc_t m, p;
 580 
 581     m = (z_crc_t)1 << 31;
 582     p = 0;
 583     for (;;) {
 584         if (a & m) {
 585             p ^= b;
 586             if ((a & (m - 1)) == 0)
 587                 break;
 588         }
 589         m >>= 1;
 590         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
 591     }
 592     return p;
 593 }
 594 
 595 /*
 596   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
 597   initialized.
 598  */
 599 local z_crc_t x2nmodp(n, k)
 600     z_off64_t n;
 601     unsigned k;
 602 {
 603     z_crc_t p;
 604 
 605     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
 606     while (n) {
 607         if (n & 1)
 608             p = multmodp(x2n_table[k & 31], p);
 609         n >>= 1;
 610         k++;
 611     }
 612     return p;
 613 }
 614 
 615 /* =========================================================================
 616  * This function can be used by asm versions of crc32(), and to force the
 617  * generation of the CRC tables in a threaded application.
 618  */
 619 const z_crc_t FAR * ZEXPORT get_crc_table()
 620 {
 621 #ifdef DYNAMIC_CRC_TABLE
 622     once(&made, make_crc_table);
 623 #endif /* DYNAMIC_CRC_TABLE */
 624     return (const z_crc_t FAR *)crc_table;
 625 }
 626 
 627 /* =========================================================================
 628  * Use ARM machine instructions if available. This will compute the CRC about
 629  * ten times faster than the braided calculation. This code does not check for
 630  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
 631  * only be defined if the compilation specifies an ARM processor architecture
 632  * that has the instructions. For example, compiling with -march=armv8.1-a or
 633  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
 634  * instructions.
 635  */
 636 #ifdef ARMCRC32
 637 
 638 /*
 639    Constants empirically determined to maximize speed. These values are from
 640    measurements on a Cortex-A57. Your mileage may vary.
 641  */
 642 #define Z_BATCH 3990                /* number of words in a batch */
 643 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
 644 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
 645 
 646 unsigned long ZEXPORT crc32_z(crc, buf, len)
 647     unsigned long crc;
 648     const unsigned char FAR *buf;
 649     z_size_t len;
 650 {
 651     z_crc_t val;
 652     z_word_t crc1, crc2;
 653     const z_word_t *word;
 654     z_word_t val0, val1, val2;
 655     z_size_t last, last2, i;
 656     z_size_t num;
 657 
 658     /* Return initial CRC, if requested. */
 659     if (buf == Z_NULL) return 0;
 660 
 661 #ifdef DYNAMIC_CRC_TABLE
 662     once(&made, make_crc_table);
 663 #endif /* DYNAMIC_CRC_TABLE */
 664 
 665     /* Pre-condition the CRC */
 666     crc = (~crc) & 0xffffffff;
 667 
 668     /* Compute the CRC up to a word boundary. */
 669     while (len && ((z_size_t)buf & 7) != 0) {
 670         len--;
 671         val = *buf++;
 672         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
 673     }
 674 
 675     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
 676     word = (z_word_t const *)buf;
 677     num = len >> 3;
 678     len &= 7;
 679 
 680     /* Do three interleaved CRCs to realize the throughput of one crc32x
 681        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
 682        three CRCs are combined into a single CRC after each set of batches. */
 683     while (num >= 3 * Z_BATCH) {
 684         crc1 = 0;
 685         crc2 = 0;
 686         for (i = 0; i < Z_BATCH; i++) {
 687             val0 = word[i];
 688             val1 = word[i + Z_BATCH];
 689             val2 = word[i + 2 * Z_BATCH];
 690             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
 691             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
 692             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
 693         }
 694         word += 3 * Z_BATCH;
 695         num -= 3 * Z_BATCH;
 696         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
 697         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
 698     }
 699 
 700     /* Do one last smaller batch with the remaining words, if there are enough
 701        to pay for the combination of CRCs. */
 702     last = num / 3;
 703     if (last >= Z_BATCH_MIN) {
 704         last2 = last << 1;
 705         crc1 = 0;
 706         crc2 = 0;
 707         for (i = 0; i < last; i++) {
 708             val0 = word[i];
 709             val1 = word[i + last];
 710             val2 = word[i + last2];
 711             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
 712             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
 713             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
 714         }
 715         word += 3 * last;
 716         num -= 3 * last;
 717         val = x2nmodp(last, 6);
 718         crc = multmodp(val, crc) ^ crc1;
 719         crc = multmodp(val, crc) ^ crc2;
 720     }
 721 
 722     /* Compute the CRC on any remaining words. */
 723     for (i = 0; i < num; i++) {
 724         val0 = word[i];
 725         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
 726     }
 727     word += num;
 728 
 729     /* Complete the CRC on any remaining bytes. */
 730     buf = (const unsigned char FAR *)word;
 731     while (len) {
 732         len--;
 733         val = *buf++;
 734         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
 735     }
 736 
 737     /* Return the CRC, post-conditioned. */
 738     return crc ^ 0xffffffff;
 739 }
 740 
 741 #else
 742 
 743 #ifdef W
 744 
 745 /*
 746   Return the CRC of the W bytes in the word_t data, taking the
 747   least-significant byte of the word as the first byte of data, without any pre
 748   or post conditioning. This is used to combine the CRCs of each braid.
 749  */
 750 local z_crc_t crc_word(data)
 751     z_word_t data;
 752 {
 753     int k;
 754     for (k = 0; k < W; k++)
 755         data = (data >> 8) ^ crc_table[data & 0xff];
 756     return (z_crc_t)data;
 757 }
 758 
 759 local z_word_t crc_word_big(data)
 760     z_word_t data;
 761 {
 762     int k;
 763     for (k = 0; k < W; k++)
 764         data = (data << 8) ^
 765             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
 766     return data;
 767 }
 768 
 769 #endif
 770 
 771 /* ========================================================================= */
 772 unsigned long ZEXPORT crc32_z(crc, buf, len)
 773     unsigned long crc;
 774     const unsigned char FAR *buf;
 775     z_size_t len;
 776 {
 777     /* Return initial CRC, if requested. */
 778     if (buf == Z_NULL) return 0;
 779 
 780 #ifdef DYNAMIC_CRC_TABLE
 781     once(&made, make_crc_table);
 782 #endif /* DYNAMIC_CRC_TABLE */
 783 
 784     /* Pre-condition the CRC */
 785     crc = (~crc) & 0xffffffff;
 786 
 787 #ifdef W
 788 
 789     /* If provided enough bytes, do a braided CRC calculation. */
 790     if (len >= N * W + W - 1) {
 791         z_size_t blks;
 792         z_word_t const *words;
 793         unsigned endian;
 794         int k;
 795 
 796         /* Compute the CRC up to a z_word_t boundary. */
 797         while (len && ((z_size_t)buf & (W - 1)) != 0) {
 798             len--;
 799             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 800         }
 801 
 802         /* Compute the CRC on as many N z_word_t blocks as are available. */
 803         blks = len / (N * W);
 804         len -= blks * N * W;
 805         words = (z_word_t const *)buf;
 806 
 807         /* Do endian check at execution time instead of compile time, since ARM
 808            processors can change the endianess at execution time. If the
 809            compiler knows what the endianess will be, it can optimize out the
 810            check and the unused branch. */
 811         endian = 1;
 812         if (*(unsigned char *)&endian) {
 813             /* Little endian. */
 814 
 815             z_crc_t crc0;
 816             z_word_t word0;
 817 #if N > 1
 818             z_crc_t crc1;
 819             z_word_t word1;
 820 #if N > 2
 821             z_crc_t crc2;
 822             z_word_t word2;
 823 #if N > 3
 824             z_crc_t crc3;
 825             z_word_t word3;
 826 #if N > 4
 827             z_crc_t crc4;
 828             z_word_t word4;
 829 #if N > 5
 830             z_crc_t crc5;
 831             z_word_t word5;
 832 #endif
 833 #endif
 834 #endif
 835 #endif
 836 #endif
 837 
 838             /* Initialize the CRC for each braid. */
 839             crc0 = crc;
 840 #if N > 1
 841             crc1 = 0;
 842 #if N > 2
 843             crc2 = 0;
 844 #if N > 3
 845             crc3 = 0;
 846 #if N > 4
 847             crc4 = 0;
 848 #if N > 5
 849             crc5 = 0;
 850 #endif
 851 #endif
 852 #endif
 853 #endif
 854 #endif
 855 
 856             /*
 857               Process the first blks-1 blocks, computing the CRCs on each braid
 858               independently.
 859              */
 860             while (--blks) {
 861                 /* Load the word for each braid into registers. */
 862                 word0 = crc0 ^ words[0];
 863 #if N > 1
 864                 word1 = crc1 ^ words[1];
 865 #if N > 2
 866                 word2 = crc2 ^ words[2];
 867 #if N > 3
 868                 word3 = crc3 ^ words[3];
 869 #if N > 4
 870                 word4 = crc4 ^ words[4];
 871 #if N > 5
 872                 word5 = crc5 ^ words[5];
 873 #endif
 874 #endif
 875 #endif
 876 #endif
 877 #endif
 878                 words += N;
 879 
 880                 /* Compute and update the CRC for each word. The loop should
 881                    get unrolled. */
 882                 crc0 = crc_braid_table[0][word0 & 0xff];
 883 #if N > 1
 884                 crc1 = crc_braid_table[0][word1 & 0xff];
 885 #if N > 2
 886                 crc2 = crc_braid_table[0][word2 & 0xff];
 887 #if N > 3
 888                 crc3 = crc_braid_table[0][word3 & 0xff];
 889 #if N > 4
 890                 crc4 = crc_braid_table[0][word4 & 0xff];
 891 #if N > 5
 892                 crc5 = crc_braid_table[0][word5 & 0xff];
 893 #endif
 894 #endif
 895 #endif
 896 #endif
 897 #endif
 898                 for (k = 1; k < W; k++) {
 899                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
 900 #if N > 1
 901                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
 902 #if N > 2
 903                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
 904 #if N > 3
 905                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
 906 #if N > 4
 907                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
 908 #if N > 5
 909                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
 910 #endif
 911 #endif
 912 #endif
 913 #endif
 914 #endif
 915                 }
 916             }
 917 
 918             /*
 919               Process the last block, combining the CRCs of the N braids at the
 920               same time.
 921              */
 922             crc = crc_word(crc0 ^ words[0]);
 923 #if N > 1
 924             crc = crc_word(crc1 ^ words[1] ^ crc);
 925 #if N > 2
 926             crc = crc_word(crc2 ^ words[2] ^ crc);
 927 #if N > 3
 928             crc = crc_word(crc3 ^ words[3] ^ crc);
 929 #if N > 4
 930             crc = crc_word(crc4 ^ words[4] ^ crc);
 931 #if N > 5
 932             crc = crc_word(crc5 ^ words[5] ^ crc);
 933 #endif
 934 #endif
 935 #endif
 936 #endif
 937 #endif
 938             words += N;
 939         }
 940         else {
 941             /* Big endian. */
 942 
 943             z_word_t crc0, word0, comb;
 944 #if N > 1
 945             z_word_t crc1, word1;
 946 #if N > 2
 947             z_word_t crc2, word2;
 948 #if N > 3
 949             z_word_t crc3, word3;
 950 #if N > 4
 951             z_word_t crc4, word4;
 952 #if N > 5
 953             z_word_t crc5, word5;
 954 #endif
 955 #endif
 956 #endif
 957 #endif
 958 #endif
 959 
 960             /* Initialize the CRC for each braid. */
 961             crc0 = byte_swap(crc);
 962 #if N > 1
 963             crc1 = 0;
 964 #if N > 2
 965             crc2 = 0;
 966 #if N > 3
 967             crc3 = 0;
 968 #if N > 4
 969             crc4 = 0;
 970 #if N > 5
 971             crc5 = 0;
 972 #endif
 973 #endif
 974 #endif
 975 #endif
 976 #endif
 977 
 978             /*
 979               Process the first blks-1 blocks, computing the CRCs on each braid
 980               independently.
 981              */
 982             while (--blks) {
 983                 /* Load the word for each braid into registers. */
 984                 word0 = crc0 ^ words[0];
 985 #if N > 1
 986                 word1 = crc1 ^ words[1];
 987 #if N > 2
 988                 word2 = crc2 ^ words[2];
 989 #if N > 3
 990                 word3 = crc3 ^ words[3];
 991 #if N > 4
 992                 word4 = crc4 ^ words[4];
 993 #if N > 5
 994                 word5 = crc5 ^ words[5];
 995 #endif
 996 #endif
 997 #endif
 998 #endif
 999 #endif
1000                 words += N;
1001 
1002                 /* Compute and update the CRC for each word. The loop should
1003                    get unrolled. */
1004                 crc0 = crc_braid_big_table[0][word0 & 0xff];
1005 #if N > 1
1006                 crc1 = crc_braid_big_table[0][word1 & 0xff];
1007 #if N > 2
1008                 crc2 = crc_braid_big_table[0][word2 & 0xff];
1009 #if N > 3
1010                 crc3 = crc_braid_big_table[0][word3 & 0xff];
1011 #if N > 4
1012                 crc4 = crc_braid_big_table[0][word4 & 0xff];
1013 #if N > 5
1014                 crc5 = crc_braid_big_table[0][word5 & 0xff];
1015 #endif
1016 #endif
1017 #endif
1018 #endif
1019 #endif
1020                 for (k = 1; k < W; k++) {
1021                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
1022 #if N > 1
1023                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1024 #if N > 2
1025                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1026 #if N > 3
1027                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1028 #if N > 4
1029                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1030 #if N > 5
1031                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1032 #endif
1033 #endif
1034 #endif
1035 #endif
1036 #endif
1037                 }
1038             }
1039 
1040             /*
1041               Process the last block, combining the CRCs of the N braids at the
1042               same time.
1043              */
1044             comb = crc_word_big(crc0 ^ words[0]);
1045 #if N > 1
1046             comb = crc_word_big(crc1 ^ words[1] ^ comb);
1047 #if N > 2
1048             comb = crc_word_big(crc2 ^ words[2] ^ comb);
1049 #if N > 3
1050             comb = crc_word_big(crc3 ^ words[3] ^ comb);
1051 #if N > 4
1052             comb = crc_word_big(crc4 ^ words[4] ^ comb);
1053 #if N > 5
1054             comb = crc_word_big(crc5 ^ words[5] ^ comb);
1055 #endif
1056 #endif
1057 #endif
1058 #endif
1059 #endif
1060             words += N;
1061             crc = byte_swap(comb);
1062         }
1063 
1064         /*
1065           Update the pointer to the remaining bytes to process.
1066          */
1067         buf = (unsigned char const *)words;
1068     }
1069 
1070 #endif /* W */
1071 
1072     /* Complete the computation of the CRC on any remaining bytes. */
1073     while (len >= 8) {
1074         len -= 8;
1075         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1076         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1077         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1078         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1079         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1080         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1081         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1082         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1083     }
1084     while (len) {
1085         len--;
1086         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1087     }
1088 
1089     /* Return the CRC, post-conditioned. */
1090     return crc ^ 0xffffffff;
1091 }
1092 
1093 #endif
1094 
1095 /* ========================================================================= */
1096 unsigned long ZEXPORT crc32(crc, buf, len)
1097     unsigned long crc;
1098     const unsigned char FAR *buf;
1099     uInt len;
1100 {
1101     return crc32_z(crc, buf, len);
1102 }
1103 
1104 /* ========================================================================= */
1105 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1106     uLong crc1;
1107     uLong crc2;
1108     z_off64_t len2;
1109 {
1110 #ifdef DYNAMIC_CRC_TABLE
1111     once(&made, make_crc_table);
1112 #endif /* DYNAMIC_CRC_TABLE */
1113     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1114 }
1115 
1116 /* ========================================================================= */
1117 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1118     uLong crc1;
1119     uLong crc2;
1120     z_off_t len2;
1121 {
1122     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1123 }
1124 
1125 /* ========================================================================= */
1126 uLong ZEXPORT crc32_combine_gen64(len2)
1127     z_off64_t len2;
1128 {
1129 #ifdef DYNAMIC_CRC_TABLE
1130     once(&made, make_crc_table);
1131 #endif /* DYNAMIC_CRC_TABLE */
1132     return x2nmodp(len2, 3);
1133 }
1134 
1135 /* ========================================================================= */
1136 uLong ZEXPORT crc32_combine_gen(len2)
1137     z_off_t len2;
1138 {
1139     return crc32_combine_gen64((z_off64_t)len2);
1140 }
1141 
1142 /* ========================================================================= */
1143 uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1144     uLong crc1;
1145     uLong crc2;
1146     uLong op;
1147 {
1148     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1149 }