< prev index next >

src/java.base/share/native/libzip/zlib/zcrc32.c

Print this page

 110 #    endif
 111 #  endif
 112 #endif
 113 #ifdef W
 114 #  if W == 8 && defined(Z_U8)
 115      typedef Z_U8 z_word_t;
 116 #  elif defined(Z_U4)
 117 #    undef W
 118 #    define W 4
 119      typedef Z_U4 z_word_t;
 120 #  else
 121 #    undef W
 122 #  endif
 123 #endif
 124 
 125 /* If available, use the ARM processor CRC32 instruction. */
 126 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
 127 #  define ARMCRC32
 128 #endif
 129 
 130 /* Local functions. */
 131 local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
 132 local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
 133 
 134 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 135     local z_word_t byte_swap OF((z_word_t word));
 136 #endif
 137 
 138 #if defined(W) && !defined(ARMCRC32)
 139     local z_crc_t crc_word OF((z_word_t data));
 140     local z_word_t crc_word_big OF((z_word_t data));
 141 #endif
 142 
 143 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 144 /*
 145   Swap the bytes in a z_word_t to convert between little and big endian. Any
 146   self-respecting compiler will optimize this to a single machine byte-swap
 147   instruction, if one is available. This assumes that word_t is either 32 bits
 148   or 64 bits.
 149  */
 150 local z_word_t byte_swap(word)
 151     z_word_t word;
 152 {
 153 #  if W == 8
 154     return
 155         (word & 0xff00000000000000) >> 56 |
 156         (word & 0xff000000000000) >> 40 |
 157         (word & 0xff0000000000) >> 24 |
 158         (word & 0xff00000000) >> 8 |
 159         (word & 0xff000000) << 8 |
 160         (word & 0xff0000) << 24 |
 161         (word & 0xff00) << 40 |
 162         (word & 0xff) << 56;
 163 #  else   /* W == 4 */
 164     return
 165         (word & 0xff000000) >> 24 |
 166         (word & 0xff0000) >> 8 |
 167         (word & 0xff00) << 8 |
 168         (word & 0xff) << 24;
 169 #  endif
 170 }
 171 #endif
 172 














 173 /* CRC polynomial. */
 174 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
 175 
 176 #ifdef DYNAMIC_CRC_TABLE



















 177 






















 178 local z_crc_t FAR crc_table[256];
 179 local z_crc_t FAR x2n_table[32];
 180 local void make_crc_table OF((void));
 181 #ifdef W
 182    local z_word_t FAR crc_big_table[256];
 183    local z_crc_t FAR crc_braid_table[W][256];
 184    local z_word_t FAR crc_braid_big_table[W][256];
 185    local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
 186 #endif
 187 #ifdef MAKECRCH
 188    local void write_table OF((FILE *, const z_crc_t FAR *, int));
 189    local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
 190    local void write_table64 OF((FILE *, const z_word_t FAR *, int));
 191 #endif /* MAKECRCH */
 192 
 193 /*
 194   Define a once() function depending on the availability of atomics. If this is
 195   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
 196   multiple threads, and if atomics are not available, then get_crc_table() must
 197   be called to initialize the tables and must return before any threads are
 198   allowed to compute or combine CRCs.
 199  */
 200 
 201 /* Definition of once functionality. */
 202 typedef struct once_s once_t;
 203 local void once OF((once_t *, void (*)(void)));
 204 
 205 /* Check for the availability of atomics. */
 206 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
 207     !defined(__STDC_NO_ATOMICS__)
 208 
 209 #include <stdatomic.h>
 210 
 211 /* Structure for once(), which must be initialized with ONCE_INIT. */
 212 struct once_s {
 213     atomic_flag begun;
 214     atomic_int done;
 215 };
 216 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
 217 
 218 /*
 219   Run the provided init() function exactly once, even if multiple threads
 220   invoke once() at the same time. The state must be a once_t initialized with
 221   ONCE_INIT.
 222  */
 223 local void once(state, init)
 224     once_t *state;
 225     void (*init)(void);
 226 {
 227     if (!atomic_load(&state->done)) {
 228         if (atomic_flag_test_and_set(&state->begun))
 229             while (!atomic_load(&state->done))
 230                 ;
 231         else {
 232             init();
 233             atomic_store(&state->done, 1);
 234         }
 235     }
 236 }
 237 
 238 #else   /* no atomics */
 239 
 240 /* Structure for once(), which must be initialized with ONCE_INIT. */
 241 struct once_s {
 242     volatile int begun;
 243     volatile int done;
 244 };
 245 #define ONCE_INIT {0, 0}
 246 
 247 /* Test and set. Alas, not atomic, but tries to minimize the period of
 248    vulnerability. */
 249 local int test_and_set OF((int volatile *));
 250 local int test_and_set(flag)
 251     int volatile *flag;
 252 {
 253     int was;
 254 
 255     was = *flag;
 256     *flag = 1;
 257     return was;
 258 }
 259 
 260 /* Run the provided init() function once. This is not thread-safe. */
 261 local void once(state, init)
 262     once_t *state;
 263     void (*init)(void);
 264 {
 265     if (!state->done) {
 266         if (test_and_set(&state->begun))
 267             while (!state->done)
 268                 ;
 269         else {
 270             init();
 271             state->done = 1;
 272         }
 273     }
 274 }
 275 
 276 #endif
 277 
 278 /* State for once(). */
 279 local once_t made = ONCE_INIT;
 280 
 281 /*
 282   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
 283   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
 284 

 286   with the lowest powers in the most significant bit. Then adding polynomials
 287   is just exclusive-or, and multiplying a polynomial by x is a right shift by
 288   one. If we call the above polynomial p, and represent a byte as the
 289   polynomial q, also with the lowest power in the most significant bit (so the
 290   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
 291   where a mod b means the remainder after dividing a by b.
 292 
 293   This calculation is done using the shift-register method of multiplying and
 294   taking the remainder. The register is initialized to zero, and for each
 295   incoming bit, x^32 is added mod p to the register if the bit is a one (where
 296   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
 297   (which is shifting right by one and adding x^32 mod p if the bit shifted out
 298   is a one). We start with the highest power (least significant bit) of q and
 299   repeat for all eight bits of q.
 300 
 301   The table is simply the CRC of all possible eight bit values. This is all the
 302   information needed to generate CRCs on data a byte at a time for all
 303   combinations of CRC register values and incoming bytes.
 304  */
 305 
 306 local void make_crc_table()
 307 {
 308     unsigned i, j, n;
 309     z_crc_t p;
 310 
 311     /* initialize the CRC of bytes tables */
 312     for (i = 0; i < 256; i++) {
 313         p = i;
 314         for (j = 0; j < 8; j++)
 315             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
 316         crc_table[i] = p;
 317 #ifdef W
 318         crc_big_table[i] = byte_swap(p);
 319 #endif
 320     }
 321 
 322     /* initialize the x^2^n mod p(x) table */
 323     p = (z_crc_t)1 << 30;         /* x^1 */
 324     x2n_table[0] = p;
 325     for (n = 1; n < 32; n++)
 326         x2n_table[n] = p = multmodp(p, p);
 327 

 454 
 455         /* write out zeros operator table to crc32.h */
 456         fprintf(out,
 457             "\n"
 458             "local const z_crc_t FAR x2n_table[] = {\n"
 459             "    ");
 460         write_table(out, x2n_table, 32);
 461         fprintf(out,
 462             "};\n");
 463         fclose(out);
 464     }
 465 #endif /* MAKECRCH */
 466 }
 467 
 468 #ifdef MAKECRCH
 469 
 470 /*
 471    Write the 32-bit values in table[0..k-1] to out, five per line in
 472    hexadecimal separated by commas.
 473  */
 474 local void write_table(out, table, k)
 475     FILE *out;
 476     const z_crc_t FAR *table;
 477     int k;
 478 {
 479     int n;
 480 
 481     for (n = 0; n < k; n++)
 482         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 483                 (unsigned long)(table[n]),
 484                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 485 }
 486 
 487 /*
 488    Write the high 32-bits of each value in table[0..k-1] to out, five per line
 489    in hexadecimal separated by commas.
 490  */
 491 local void write_table32hi(out, table, k)
 492 FILE *out;
 493 const z_word_t FAR *table;
 494 int k;
 495 {
 496     int n;
 497 
 498     for (n = 0; n < k; n++)
 499         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 500                 (unsigned long)(table[n] >> 32),
 501                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 502 }
 503 
 504 /*
 505   Write the 64-bit values in table[0..k-1] to out, three per line in
 506   hexadecimal separated by commas. This assumes that if there is a 64-bit
 507   type, then there is also a long long integer type, and it is at least 64
 508   bits. If not, then the type cast and format string can be adjusted
 509   accordingly.
 510  */
 511 local void write_table64(out, table, k)
 512     FILE *out;
 513     const z_word_t FAR *table;
 514     int k;
 515 {
 516     int n;
 517 
 518     for (n = 0; n < k; n++)
 519         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
 520                 (unsigned long long)(table[n]),
 521                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
 522 }
 523 
 524 /* Actually do the deed. */
 525 int main()
 526 {
 527     make_crc_table();
 528     return 0;
 529 }
 530 
 531 #endif /* MAKECRCH */
 532 
 533 #ifdef W
 534 /*
 535   Generate the little and big-endian braid tables for the given n and z_word_t
 536   size w. Each array must have room for w blocks of 256 elements.
 537  */
 538 local void braid(ltl, big, n, w)
 539     z_crc_t ltl[][256];
 540     z_word_t big[][256];
 541     int n;
 542     int w;
 543 {
 544     int k;
 545     z_crc_t i, p, q;
 546     for (k = 0; k < w; k++) {
 547         p = x2nmodp((n * w + 3 - k) << 3, 0);
 548         ltl[k][0] = 0;
 549         big[w - 1 - k][0] = 0;
 550         for (i = 1; i < 256; i++) {
 551             ltl[k][i] = q = multmodp(i << 24, p);
 552             big[w - 1 - k][i] = byte_swap(q);
 553         }
 554     }
 555 }
 556 #endif
 557 
 558 #else /* !DYNAMIC_CRC_TABLE */
 559 /* ========================================================================
 560  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
 561  * of x for combining CRC-32s, all made by make_crc_table().
 562  */
 563 #include "crc32.h"
 564 #endif /* DYNAMIC_CRC_TABLE */
 565 
 566 /* ========================================================================
 567  * Routines used for CRC calculation. Some are also required for the table
 568  * generation above.
 569  */
 570 
 571 /*
 572   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
 573   reflected. For speed, this requires that a not be zero.
 574  */
 575 local z_crc_t multmodp(a, b)
 576     z_crc_t a;
 577     z_crc_t b;
 578 {
 579     z_crc_t m, p;
 580 
 581     m = (z_crc_t)1 << 31;
 582     p = 0;
 583     for (;;) {
 584         if (a & m) {
 585             p ^= b;
 586             if ((a & (m - 1)) == 0)
 587                 break;
 588         }
 589         m >>= 1;
 590         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
 591     }
 592     return p;
 593 }
 594 
 595 /*
 596   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
 597   initialized.
 598  */
 599 local z_crc_t x2nmodp(n, k)
 600     z_off64_t n;
 601     unsigned k;
 602 {
 603     z_crc_t p;
 604 
 605     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
 606     while (n) {
 607         if (n & 1)
 608             p = multmodp(x2n_table[k & 31], p);
 609         n >>= 1;
 610         k++;
 611     }
 612     return p;
 613 }
 614 
 615 /* =========================================================================
 616  * This function can be used by asm versions of crc32(), and to force the
 617  * generation of the CRC tables in a threaded application.
 618  */
 619 const z_crc_t FAR * ZEXPORT get_crc_table()
 620 {
 621 #ifdef DYNAMIC_CRC_TABLE
 622     once(&made, make_crc_table);
 623 #endif /* DYNAMIC_CRC_TABLE */
 624     return (const z_crc_t FAR *)crc_table;
 625 }
 626 
 627 /* =========================================================================
 628  * Use ARM machine instructions if available. This will compute the CRC about
 629  * ten times faster than the braided calculation. This code does not check for
 630  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
 631  * only be defined if the compilation specifies an ARM processor architecture
 632  * that has the instructions. For example, compiling with -march=armv8.1-a or
 633  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
 634  * instructions.
 635  */
 636 #ifdef ARMCRC32
 637 
 638 /*
 639    Constants empirically determined to maximize speed. These values are from
 640    measurements on a Cortex-A57. Your mileage may vary.
 641  */
 642 #define Z_BATCH 3990                /* number of words in a batch */
 643 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
 644 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
 645 
 646 unsigned long ZEXPORT crc32_z(crc, buf, len)
 647     unsigned long crc;
 648     const unsigned char FAR *buf;
 649     z_size_t len;
 650 {
 651     z_crc_t val;
 652     z_word_t crc1, crc2;
 653     const z_word_t *word;
 654     z_word_t val0, val1, val2;
 655     z_size_t last, last2, i;
 656     z_size_t num;
 657 
 658     /* Return initial CRC, if requested. */
 659     if (buf == Z_NULL) return 0;
 660 
 661 #ifdef DYNAMIC_CRC_TABLE
 662     once(&made, make_crc_table);
 663 #endif /* DYNAMIC_CRC_TABLE */
 664 
 665     /* Pre-condition the CRC */
 666     crc = (~crc) & 0xffffffff;
 667 
 668     /* Compute the CRC up to a word boundary. */
 669     while (len && ((z_size_t)buf & 7) != 0) {
 670         len--;

 730     buf = (const unsigned char FAR *)word;
 731     while (len) {
 732         len--;
 733         val = *buf++;
 734         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
 735     }
 736 
 737     /* Return the CRC, post-conditioned. */
 738     return crc ^ 0xffffffff;
 739 }
 740 
 741 #else
 742 
 743 #ifdef W
 744 
 745 /*
 746   Return the CRC of the W bytes in the word_t data, taking the
 747   least-significant byte of the word as the first byte of data, without any pre
 748   or post conditioning. This is used to combine the CRCs of each braid.
 749  */
 750 local z_crc_t crc_word(data)
 751     z_word_t data;
 752 {
 753     int k;
 754     for (k = 0; k < W; k++)
 755         data = (data >> 8) ^ crc_table[data & 0xff];
 756     return (z_crc_t)data;
 757 }
 758 
 759 local z_word_t crc_word_big(data)
 760     z_word_t data;
 761 {
 762     int k;
 763     for (k = 0; k < W; k++)
 764         data = (data << 8) ^
 765             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
 766     return data;
 767 }
 768 
 769 #endif
 770 
 771 /* ========================================================================= */
 772 unsigned long ZEXPORT crc32_z(crc, buf, len)
 773     unsigned long crc;
 774     const unsigned char FAR *buf;
 775     z_size_t len;
 776 {
 777     /* Return initial CRC, if requested. */
 778     if (buf == Z_NULL) return 0;
 779 
 780 #ifdef DYNAMIC_CRC_TABLE
 781     once(&made, make_crc_table);
 782 #endif /* DYNAMIC_CRC_TABLE */
 783 
 784     /* Pre-condition the CRC */
 785     crc = (~crc) & 0xffffffff;
 786 
 787 #ifdef W
 788 
 789     /* If provided enough bytes, do a braided CRC calculation. */
 790     if (len >= N * W + W - 1) {
 791         z_size_t blks;
 792         z_word_t const *words;
 793         unsigned endian;
 794         int k;
 795 
 796         /* Compute the CRC up to a z_word_t boundary. */
 797         while (len && ((z_size_t)buf & (W - 1)) != 0) {
 798             len--;
 799             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 800         }
 801 
 802         /* Compute the CRC on as many N z_word_t blocks as are available. */
 803         blks = len / (N * W);
 804         len -= blks * N * W;
 805         words = (z_word_t const *)buf;
 806 
 807         /* Do endian check at execution time instead of compile time, since ARM
 808            processors can change the endianess at execution time. If the
 809            compiler knows what the endianess will be, it can optimize out the
 810            check and the unused branch. */
 811         endian = 1;
 812         if (*(unsigned char *)&endian) {
 813             /* Little endian. */
 814 
 815             z_crc_t crc0;
 816             z_word_t word0;
 817 #if N > 1
 818             z_crc_t crc1;
 819             z_word_t word1;
 820 #if N > 2
 821             z_crc_t crc2;
 822             z_word_t word2;
 823 #if N > 3
 824             z_crc_t crc3;
 825             z_word_t word3;
 826 #if N > 4
 827             z_crc_t crc4;
 828             z_word_t word4;
 829 #if N > 5

1076         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1077         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1078         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1079         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1080         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1081         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1082         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1083     }
1084     while (len) {
1085         len--;
1086         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1087     }
1088 
1089     /* Return the CRC, post-conditioned. */
1090     return crc ^ 0xffffffff;
1091 }
1092 
1093 #endif
1094 
1095 /* ========================================================================= */
1096 unsigned long ZEXPORT crc32(crc, buf, len)
1097     unsigned long crc;
1098     const unsigned char FAR *buf;
1099     uInt len;
1100 {
1101     return crc32_z(crc, buf, len);
1102 }
1103 
1104 /* ========================================================================= */
1105 uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1106     uLong crc1;
1107     uLong crc2;
1108     z_off64_t len2;
1109 {
1110 #ifdef DYNAMIC_CRC_TABLE
1111     once(&made, make_crc_table);
1112 #endif /* DYNAMIC_CRC_TABLE */
1113     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1114 }
1115 
1116 /* ========================================================================= */
1117 uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1118     uLong crc1;
1119     uLong crc2;
1120     z_off_t len2;
1121 {
1122     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1123 }
1124 
1125 /* ========================================================================= */
1126 uLong ZEXPORT crc32_combine_gen64(len2)
1127     z_off64_t len2;
1128 {
1129 #ifdef DYNAMIC_CRC_TABLE
1130     once(&made, make_crc_table);
1131 #endif /* DYNAMIC_CRC_TABLE */
1132     return x2nmodp(len2, 3);
1133 }
1134 
1135 /* ========================================================================= */
1136 uLong ZEXPORT crc32_combine_gen(len2)
1137     z_off_t len2;
1138 {
1139     return crc32_combine_gen64((z_off64_t)len2);
1140 }
1141 
1142 /* ========================================================================= */
1143 uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1144     uLong crc1;
1145     uLong crc2;
1146     uLong op;
1147 {
1148     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1149 }

 110 #    endif
 111 #  endif
 112 #endif
 113 #ifdef W
 114 #  if W == 8 && defined(Z_U8)
 115      typedef Z_U8 z_word_t;
 116 #  elif defined(Z_U4)
 117 #    undef W
 118 #    define W 4
 119      typedef Z_U4 z_word_t;
 120 #  else
 121 #    undef W
 122 #  endif
 123 #endif
 124 
 125 /* If available, use the ARM processor CRC32 instruction. */
 126 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
 127 #  define ARMCRC32
 128 #endif
 129 













 130 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 131 /*
 132   Swap the bytes in a z_word_t to convert between little and big endian. Any
 133   self-respecting compiler will optimize this to a single machine byte-swap
 134   instruction, if one is available. This assumes that word_t is either 32 bits
 135   or 64 bits.
 136  */
 137 local z_word_t byte_swap(z_word_t word) {


 138 #  if W == 8
 139     return
 140         (word & 0xff00000000000000) >> 56 |
 141         (word & 0xff000000000000) >> 40 |
 142         (word & 0xff0000000000) >> 24 |
 143         (word & 0xff00000000) >> 8 |
 144         (word & 0xff000000) << 8 |
 145         (word & 0xff0000) << 24 |
 146         (word & 0xff00) << 40 |
 147         (word & 0xff) << 56;
 148 #  else   /* W == 4 */
 149     return
 150         (word & 0xff000000) >> 24 |
 151         (word & 0xff0000) >> 8 |
 152         (word & 0xff00) << 8 |
 153         (word & 0xff) << 24;
 154 #  endif
 155 }
 156 #endif
 157 
 158 #ifdef DYNAMIC_CRC_TABLE
 159 /* =========================================================================
 160  * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
 161  * below.
 162  */
 163    local z_crc_t FAR x2n_table[32];
 164 #else
 165 /* =========================================================================
 166  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
 167  * of x for combining CRC-32s, all made by make_crc_table().
 168  */
 169 #  include "crc32.h"
 170 #endif
 171 
 172 /* CRC polynomial. */
 173 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
 174 
 175 /*
 176   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
 177   reflected. For speed, this requires that a not be zero.
 178  */
 179 local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
 180     z_crc_t m, p;
 181 
 182     m = (z_crc_t)1 << 31;
 183     p = 0;
 184     for (;;) {
 185         if (a & m) {
 186             p ^= b;
 187             if ((a & (m - 1)) == 0)
 188                 break;
 189         }
 190         m >>= 1;
 191         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
 192     }
 193     return p;
 194 }
 195 
 196 /*
 197   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
 198   initialized.
 199  */
 200 local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
 201     z_crc_t p;
 202 
 203     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
 204     while (n) {
 205         if (n & 1)
 206             p = multmodp(x2n_table[k & 31], p);
 207         n >>= 1;
 208         k++;
 209     }
 210     return p;
 211 }
 212 
 213 #ifdef DYNAMIC_CRC_TABLE
 214 /* =========================================================================
 215  * Build the tables for byte-wise and braided CRC-32 calculations, and a table
 216  * of powers of x for combining CRC-32s.
 217  */
 218 local z_crc_t FAR crc_table[256];


 219 #ifdef W
 220    local z_word_t FAR crc_big_table[256];
 221    local z_crc_t FAR crc_braid_table[W][256];
 222    local z_word_t FAR crc_braid_big_table[W][256];
 223    local void braid(z_crc_t [][256], z_word_t [][256], int, int);
 224 #endif
 225 #ifdef MAKECRCH
 226    local void write_table(FILE *, const z_crc_t FAR *, int);
 227    local void write_table32hi(FILE *, const z_word_t FAR *, int);
 228    local void write_table64(FILE *, const z_word_t FAR *, int);
 229 #endif /* MAKECRCH */
 230 
 231 /*
 232   Define a once() function depending on the availability of atomics. If this is
 233   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
 234   multiple threads, and if atomics are not available, then get_crc_table() must
 235   be called to initialize the tables and must return before any threads are
 236   allowed to compute or combine CRCs.
 237  */
 238 
 239 /* Definition of once functionality. */
 240 typedef struct once_s once_t;

 241 
 242 /* Check for the availability of atomics. */
 243 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
 244     !defined(__STDC_NO_ATOMICS__)
 245 
 246 #include <stdatomic.h>
 247 
 248 /* Structure for once(), which must be initialized with ONCE_INIT. */
 249 struct once_s {
 250     atomic_flag begun;
 251     atomic_int done;
 252 };
 253 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
 254 
 255 /*
 256   Run the provided init() function exactly once, even if multiple threads
 257   invoke once() at the same time. The state must be a once_t initialized with
 258   ONCE_INIT.
 259  */
 260 local void once(once_t *state, void (*init)(void)) {



 261     if (!atomic_load(&state->done)) {
 262         if (atomic_flag_test_and_set(&state->begun))
 263             while (!atomic_load(&state->done))
 264                 ;
 265         else {
 266             init();
 267             atomic_store(&state->done, 1);
 268         }
 269     }
 270 }
 271 
 272 #else   /* no atomics */
 273 
 274 /* Structure for once(), which must be initialized with ONCE_INIT. */
 275 struct once_s {
 276     volatile int begun;
 277     volatile int done;
 278 };
 279 #define ONCE_INIT {0, 0}
 280 
 281 /* Test and set. Alas, not atomic, but tries to minimize the period of
 282    vulnerability. */
 283 local int test_and_set(int volatile *flag) {



 284     int was;
 285 
 286     was = *flag;
 287     *flag = 1;
 288     return was;
 289 }
 290 
 291 /* Run the provided init() function once. This is not thread-safe. */
 292 local void once(once_t *state, void (*init)(void)) {



 293     if (!state->done) {
 294         if (test_and_set(&state->begun))
 295             while (!state->done)
 296                 ;
 297         else {
 298             init();
 299             state->done = 1;
 300         }
 301     }
 302 }
 303 
 304 #endif
 305 
 306 /* State for once(). */
 307 local once_t made = ONCE_INIT;
 308 
 309 /*
 310   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
 311   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
 312 

 314   with the lowest powers in the most significant bit. Then adding polynomials
 315   is just exclusive-or, and multiplying a polynomial by x is a right shift by
 316   one. If we call the above polynomial p, and represent a byte as the
 317   polynomial q, also with the lowest power in the most significant bit (so the
 318   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
 319   where a mod b means the remainder after dividing a by b.
 320 
 321   This calculation is done using the shift-register method of multiplying and
 322   taking the remainder. The register is initialized to zero, and for each
 323   incoming bit, x^32 is added mod p to the register if the bit is a one (where
 324   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
 325   (which is shifting right by one and adding x^32 mod p if the bit shifted out
 326   is a one). We start with the highest power (least significant bit) of q and
 327   repeat for all eight bits of q.
 328 
 329   The table is simply the CRC of all possible eight bit values. This is all the
 330   information needed to generate CRCs on data a byte at a time for all
 331   combinations of CRC register values and incoming bytes.
 332  */
 333 
 334 local void make_crc_table(void) {

 335     unsigned i, j, n;
 336     z_crc_t p;
 337 
 338     /* initialize the CRC of bytes tables */
 339     for (i = 0; i < 256; i++) {
 340         p = i;
 341         for (j = 0; j < 8; j++)
 342             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
 343         crc_table[i] = p;
 344 #ifdef W
 345         crc_big_table[i] = byte_swap(p);
 346 #endif
 347     }
 348 
 349     /* initialize the x^2^n mod p(x) table */
 350     p = (z_crc_t)1 << 30;         /* x^1 */
 351     x2n_table[0] = p;
 352     for (n = 1; n < 32; n++)
 353         x2n_table[n] = p = multmodp(p, p);
 354 

 481 
 482         /* write out zeros operator table to crc32.h */
 483         fprintf(out,
 484             "\n"
 485             "local const z_crc_t FAR x2n_table[] = {\n"
 486             "    ");
 487         write_table(out, x2n_table, 32);
 488         fprintf(out,
 489             "};\n");
 490         fclose(out);
 491     }
 492 #endif /* MAKECRCH */
 493 }
 494 
 495 #ifdef MAKECRCH
 496 
 497 /*
 498    Write the 32-bit values in table[0..k-1] to out, five per line in
 499    hexadecimal separated by commas.
 500  */
 501 local void write_table(FILE *out, const z_crc_t FAR *table, int k) {




 502     int n;
 503 
 504     for (n = 0; n < k; n++)
 505         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 506                 (unsigned long)(table[n]),
 507                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 508 }
 509 
 510 /*
 511    Write the high 32-bits of each value in table[0..k-1] to out, five per line
 512    in hexadecimal separated by commas.
 513  */
 514 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {




 515     int n;
 516 
 517     for (n = 0; n < k; n++)
 518         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
 519                 (unsigned long)(table[n] >> 32),
 520                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 521 }
 522 
 523 /*
 524   Write the 64-bit values in table[0..k-1] to out, three per line in
 525   hexadecimal separated by commas. This assumes that if there is a 64-bit
 526   type, then there is also a long long integer type, and it is at least 64
 527   bits. If not, then the type cast and format string can be adjusted
 528   accordingly.
 529  */
 530 local void write_table64(FILE *out, const z_word_t FAR *table, int k) {




 531     int n;
 532 
 533     for (n = 0; n < k; n++)
 534         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
 535                 (unsigned long long)(table[n]),
 536                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
 537 }
 538 
 539 /* Actually do the deed. */
 540 int main(void) {

 541     make_crc_table();
 542     return 0;
 543 }
 544 
 545 #endif /* MAKECRCH */
 546 
 547 #ifdef W
 548 /*
 549   Generate the little and big-endian braid tables for the given n and z_word_t
 550   size w. Each array must have room for w blocks of 256 elements.
 551  */
 552 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {





 553     int k;
 554     z_crc_t i, p, q;
 555     for (k = 0; k < w; k++) {
 556         p = x2nmodp((n * w + 3 - k) << 3, 0);
 557         ltl[k][0] = 0;
 558         big[w - 1 - k][0] = 0;
 559         for (i = 1; i < 256; i++) {
 560             ltl[k][i] = q = multmodp(i << 24, p);
 561             big[w - 1 - k][i] = byte_swap(q);
 562         }
 563     }
 564 }
 565 #endif
 566 






 567 #endif /* DYNAMIC_CRC_TABLE */
 568 

















































 569 /* =========================================================================
 570  * This function can be used by asm versions of crc32(), and to force the
 571  * generation of the CRC tables in a threaded application.
 572  */
 573 const z_crc_t FAR * ZEXPORT get_crc_table(void) {

 574 #ifdef DYNAMIC_CRC_TABLE
 575     once(&made, make_crc_table);
 576 #endif /* DYNAMIC_CRC_TABLE */
 577     return (const z_crc_t FAR *)crc_table;
 578 }
 579 
 580 /* =========================================================================
 581  * Use ARM machine instructions if available. This will compute the CRC about
 582  * ten times faster than the braided calculation. This code does not check for
 583  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
 584  * only be defined if the compilation specifies an ARM processor architecture
 585  * that has the instructions. For example, compiling with -march=armv8.1-a or
 586  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
 587  * instructions.
 588  */
 589 #ifdef ARMCRC32
 590 
 591 /*
 592    Constants empirically determined to maximize speed. These values are from
 593    measurements on a Cortex-A57. Your mileage may vary.
 594  */
 595 #define Z_BATCH 3990                /* number of words in a batch */
 596 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
 597 #define Z_BATCH_MIN 800             /* fewest words in a final batch */
 598 
 599 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
 600                               z_size_t len) {



 601     z_crc_t val;
 602     z_word_t crc1, crc2;
 603     const z_word_t *word;
 604     z_word_t val0, val1, val2;
 605     z_size_t last, last2, i;
 606     z_size_t num;
 607 
 608     /* Return initial CRC, if requested. */
 609     if (buf == Z_NULL) return 0;
 610 
 611 #ifdef DYNAMIC_CRC_TABLE
 612     once(&made, make_crc_table);
 613 #endif /* DYNAMIC_CRC_TABLE */
 614 
 615     /* Pre-condition the CRC */
 616     crc = (~crc) & 0xffffffff;
 617 
 618     /* Compute the CRC up to a word boundary. */
 619     while (len && ((z_size_t)buf & 7) != 0) {
 620         len--;

 680     buf = (const unsigned char FAR *)word;
 681     while (len) {
 682         len--;
 683         val = *buf++;
 684         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
 685     }
 686 
 687     /* Return the CRC, post-conditioned. */
 688     return crc ^ 0xffffffff;
 689 }
 690 
 691 #else
 692 
 693 #ifdef W
 694 
 695 /*
 696   Return the CRC of the W bytes in the word_t data, taking the
 697   least-significant byte of the word as the first byte of data, without any pre
 698   or post conditioning. This is used to combine the CRCs of each braid.
 699  */
 700 local z_crc_t crc_word(z_word_t data) {


 701     int k;
 702     for (k = 0; k < W; k++)
 703         data = (data >> 8) ^ crc_table[data & 0xff];
 704     return (z_crc_t)data;
 705 }
 706 
 707 local z_word_t crc_word_big(z_word_t data) {


 708     int k;
 709     for (k = 0; k < W; k++)
 710         data = (data << 8) ^
 711             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
 712     return data;
 713 }
 714 
 715 #endif
 716 
 717 /* ========================================================================= */
 718 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
 719                               z_size_t len) {



 720     /* Return initial CRC, if requested. */
 721     if (buf == Z_NULL) return 0;
 722 
 723 #ifdef DYNAMIC_CRC_TABLE
 724     once(&made, make_crc_table);
 725 #endif /* DYNAMIC_CRC_TABLE */
 726 
 727     /* Pre-condition the CRC */
 728     crc = (~crc) & 0xffffffff;
 729 
 730 #ifdef W
 731 
 732     /* If provided enough bytes, do a braided CRC calculation. */
 733     if (len >= N * W + W - 1) {
 734         z_size_t blks;
 735         z_word_t const *words;
 736         unsigned endian;
 737         int k;
 738 
 739         /* Compute the CRC up to a z_word_t boundary. */
 740         while (len && ((z_size_t)buf & (W - 1)) != 0) {
 741             len--;
 742             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
 743         }
 744 
 745         /* Compute the CRC on as many N z_word_t blocks as are available. */
 746         blks = len / (N * W);
 747         len -= blks * N * W;
 748         words = (z_word_t const *)buf;
 749 
 750         /* Do endian check at execution time instead of compile time, since ARM
 751            processors can change the endianness at execution time. If the
 752            compiler knows what the endianness will be, it can optimize out the
 753            check and the unused branch. */
 754         endian = 1;
 755         if (*(unsigned char *)&endian) {
 756             /* Little endian. */
 757 
 758             z_crc_t crc0;
 759             z_word_t word0;
 760 #if N > 1
 761             z_crc_t crc1;
 762             z_word_t word1;
 763 #if N > 2
 764             z_crc_t crc2;
 765             z_word_t word2;
 766 #if N > 3
 767             z_crc_t crc3;
 768             z_word_t word3;
 769 #if N > 4
 770             z_crc_t crc4;
 771             z_word_t word4;
 772 #if N > 5

1019         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1020         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1021         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1022         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1023         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1024         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1025         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1026     }
1027     while (len) {
1028         len--;
1029         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1030     }
1031 
1032     /* Return the CRC, post-conditioned. */
1033     return crc ^ 0xffffffff;
1034 }
1035 
1036 #endif
1037 
1038 /* ========================================================================= */
1039 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1040                             uInt len) {



1041     return crc32_z(crc, buf, len);
1042 }
1043 
1044 /* ========================================================================= */
1045 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {




1046 #ifdef DYNAMIC_CRC_TABLE
1047     once(&made, make_crc_table);
1048 #endif /* DYNAMIC_CRC_TABLE */
1049     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1050 }
1051 
1052 /* ========================================================================= */
1053 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {




1054     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1055 }
1056 
1057 /* ========================================================================= */
1058 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {


1059 #ifdef DYNAMIC_CRC_TABLE
1060     once(&made, make_crc_table);
1061 #endif /* DYNAMIC_CRC_TABLE */
1062     return x2nmodp(len2, 3);
1063 }
1064 
1065 /* ========================================================================= */
1066 uLong ZEXPORT crc32_combine_gen(z_off_t len2) {


1067     return crc32_combine_gen64((z_off64_t)len2);
1068 }
1069 
1070 /* ========================================================================= */
1071 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {




1072     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1073 }
< prev index next >