2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2022 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
33 # if XXH_VERSION_NUMBER >= 800
34 # define SUPPORT_XXH3 1
39 extern int whole_file;
40 extern int checksum_seed;
41 extern int protocol_version;
42 extern int proper_seed_order;
43 extern const char *checksum_choice;
45 #define NNI_BUILTIN (1<<0)
46 #define NNI_EVP (1<<1)
47 #define NNI_EVP_OK (1<<2)
49 struct name_num_item valid_checksums_items[] = {
51 { CSUM_XXH3_128, 0, "xxh128", NULL },
52 { CSUM_XXH3_64, 0, "xxh3", NULL },
55 { CSUM_XXH64, 0, "xxh64", NULL },
56 { CSUM_XXH64, 0, "xxhash", NULL },
58 { CSUM_MD5, NNI_BUILTIN|NNI_EVP, "md5", NULL },
59 { CSUM_MD4, NNI_BUILTIN|NNI_EVP, "md4", NULL },
60 #ifdef SHA_DIGEST_LENGTH
61 { CSUM_SHA1, NNI_EVP, "sha1", NULL },
63 #ifdef SHA256_DIGEST_LENGTH
64 { CSUM_SHA256, NNI_EVP, "sha256", NULL },
66 #ifdef SHA512_DIGEST_LENGTH
67 { CSUM_SHA512, NNI_EVP, "sha512", NULL },
69 { CSUM_NONE, 0, "none", NULL },
73 struct name_num_obj valid_checksums = {
74 "checksum", NULL, 0, 0, valid_checksums_items
77 struct name_num_item valid_auth_checksums_items[] = {
78 #ifdef SHA512_DIGEST_LENGTH
79 { CSUM_SHA512, NNI_EVP, "sha512", NULL },
81 #ifdef SHA256_DIGEST_LENGTH
82 { CSUM_SHA256, NNI_EVP, "sha256", NULL },
84 #ifdef SHA_DIGEST_LENGTH
85 { CSUM_SHA1, NNI_EVP, "sha1", NULL },
87 { CSUM_MD5, NNI_BUILTIN|NNI_EVP, "md5", NULL },
88 { CSUM_MD4, NNI_BUILTIN|NNI_EVP, "md4", NULL },
92 struct name_num_obj valid_auth_checksums = {
93 "daemon auth checksum", NULL, 0, 0, valid_auth_checksums_items
96 /* These cannot make use of openssl, so they're marked just as built-in */
97 struct name_num_item implied_checksum_md4 =
98 { CSUM_MD4, NNI_BUILTIN, "md4", NULL };
99 struct name_num_item implied_checksum_md5 =
100 { CSUM_MD5, NNI_BUILTIN, "md5", NULL };
102 struct name_num_item *xfer_sum_nni; /* used for the transfer checksum2 computations */
103 const EVP_MD *xfer_sum_evp_md;
105 struct name_num_item *file_sum_nni; /* used for the pre-transfer --checksum computations */
106 const EVP_MD *file_sum_evp_md;
107 int file_sum_len, file_sum_extra_cnt;
110 EVP_MD_CTX *ctx_evp = NULL;
112 static int initialized_choices = 0;
114 struct name_num_item *parse_csum_name(const char *name, int len)
116 struct name_num_item *nni;
121 init_checksum_choices();
123 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
124 if (protocol_version >= 30) {
125 if (!proper_seed_order)
126 return &implied_checksum_md5;
130 if (protocol_version >= 27)
131 implied_checksum_md4.num = CSUM_MD4_OLD;
132 else if (protocol_version >= 21)
133 implied_checksum_md4.num = CSUM_MD4_BUSTED;
135 implied_checksum_md4.num = CSUM_MD4_ARCHAIC;
136 return &implied_checksum_md4;
140 nni = get_nni_by_name(&valid_checksums, name, len);
143 rprintf(FERROR, "unknown checksum name: %s\n", name);
144 exit_cleanup(RERR_UNSUPPORTED);
150 static const EVP_MD *csum_evp_md(struct name_num_item *nni)
154 if (!(nni->flags & NNI_EVP))
158 if (nni->num == CSUM_MD5)
162 emd = EVP_get_digestbyname(nni->name);
163 if (emd && !(nni->flags & NNI_EVP_OK)) { /* Make sure it works before we advertise it */
164 if (!ctx_evp && !(ctx_evp = EVP_MD_CTX_create()))
165 out_of_memory("csum_evp_md");
166 /* Some routines are marked as legacy and are not enabled in the openssl.cnf file.
167 * If we can't init the emd, we'll fall back to our built-in code. */
168 if (EVP_DigestInit_ex(ctx_evp, emd, NULL) == 0)
171 nni->flags = (nni->flags & ~NNI_BUILTIN) | NNI_EVP_OK;
174 nni->flags &= ~NNI_EVP;
181 void parse_checksum_choice(int final_call)
183 if (valid_checksums.negotiated_nni)
184 xfer_sum_nni = file_sum_nni = valid_checksums.negotiated_nni;
186 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
188 xfer_sum_nni = parse_csum_name(checksum_choice, cp - checksum_choice);
189 file_sum_nni = parse_csum_name(cp+1, -1);
191 xfer_sum_nni = file_sum_nni = parse_csum_name(checksum_choice, -1);
192 if (am_server && checksum_choice)
193 validate_choice_vs_env(NSTR_CHECKSUM, xfer_sum_nni->num, file_sum_nni->num);
195 xfer_sum_len = csum_len_for_type(xfer_sum_nni->num, 0);
196 file_sum_len = csum_len_for_type(file_sum_nni->num, 0);
197 xfer_sum_evp_md = csum_evp_md(xfer_sum_nni);
198 file_sum_evp_md = csum_evp_md(file_sum_nni);
200 file_sum_extra_cnt = (file_sum_len + EXTRA_LEN - 1) / EXTRA_LEN;
202 if (xfer_sum_nni->num == CSUM_NONE)
205 /* Snag the checksum name for both write_batch's option output & the following debug output. */
206 if (valid_checksums.negotiated_nni)
207 checksum_choice = valid_checksums.negotiated_nni->name;
208 else if (checksum_choice == NULL)
209 checksum_choice = xfer_sum_nni->name;
211 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
212 rprintf(FINFO, "%s%s checksum: %s\n",
213 am_server ? "Server" : "Client",
214 valid_checksums.negotiated_nni ? " negotiated" : "",
219 int csum_len_for_type(int cst, BOOL flist_csum)
224 case CSUM_MD4_ARCHAIC:
225 /* The oldest checksum code is rather weird: the file-list code only sent
226 * 2-byte checksums, but all other checksums were full MD4 length. */
227 return flist_csum ? 2 : MD4_DIGEST_LEN;
230 case CSUM_MD4_BUSTED:
231 return MD4_DIGEST_LEN;
233 return MD5_DIGEST_LEN;
234 #ifdef SHA_DIGEST_LENGTH
236 return SHA_DIGEST_LENGTH;
238 #ifdef SHA256_DIGEST_LENGTH
240 return SHA256_DIGEST_LENGTH;
242 #ifdef SHA512_DIGEST_LENGTH
244 return SHA512_DIGEST_LENGTH;
251 default: /* paranoia to prevent missing case values */
252 exit_cleanup(RERR_UNSUPPORTED);
257 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
258 * Returns 1 if the public sum order matches our internal sum order.
259 * Returns -1 if the public sum order is the reverse of our internal sum order.
261 int canonical_checksum(int csum_type)
265 case CSUM_MD4_ARCHAIC:
267 case CSUM_MD4_BUSTED:
279 default: /* paranoia to prevent missing case values */
280 exit_cleanup(RERR_UNSUPPORTED);
285 #ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */
287 a simple 32 bit checksum that can be updated from either end
288 (inspired by Mark Adler's Adler-32 checksum)
290 uint32 get_checksum1(char *buf1, int32 len)
294 schar *buf = (schar *)buf1;
297 for (i = 0; i < (len-4); i+=4) {
298 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
299 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
301 for (; i < len; i++) {
302 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
304 return (s1 & 0xffff) + (s2 << 16);
308 void get_checksum2(char *buf, int32 len, char *sum)
311 if (xfer_sum_evp_md) {
312 static EVP_MD_CTX *evp = NULL;
314 if (!evp && !(evp = EVP_MD_CTX_create()))
315 out_of_memory("get_checksum2");
316 EVP_DigestInit_ex(evp, xfer_sum_evp_md, NULL);
318 SIVALu(seedbuf, 0, checksum_seed);
319 EVP_DigestUpdate(evp, seedbuf, 4);
321 EVP_DigestUpdate(evp, (uchar *)buf, len);
322 EVP_DigestFinal_ex(evp, (uchar *)sum, NULL);
325 switch (xfer_sum_nni->num) {
326 #ifdef SUPPORT_XXHASH
328 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
333 SIVAL64(sum, 0, XXH3_64bits_withSeed(buf, len, checksum_seed));
335 case CSUM_XXH3_128: {
336 XXH128_hash_t digest = XXH3_128bits_withSeed(buf, len, checksum_seed);
337 SIVAL64(sum, 0, digest.low64);
338 SIVAL64(sum, 8, digest.high64);
346 if (proper_seed_order) {
348 SIVALu(seedbuf, 0, checksum_seed);
349 md5_update(&m5, seedbuf, 4);
351 md5_update(&m5, (uchar *)buf, len);
353 md5_update(&m5, (uchar *)buf, len);
355 SIVALu(seedbuf, 0, checksum_seed);
356 md5_update(&m5, seedbuf, 4);
359 md5_result(&m5, (uchar *)sum);
364 case CSUM_MD4_BUSTED:
365 case CSUM_MD4_ARCHAIC: {
376 buf1 = new_array(char, len+4);
380 memcpy(buf1, buf, len);
382 SIVAL(buf1,len,checksum_seed);
386 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
387 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
390 * Prior to version 27 an incorrect MD4 checksum was computed
391 * by failing to call mdfour_tail() for block sizes that
392 * are multiples of 64. This is fixed by calling mdfour_update()
393 * even when there are no more bytes.
395 if (len - i > 0 || xfer_sum_nni->num > CSUM_MD4_BUSTED)
396 mdfour_update(&m, (uchar *)(buf1+i), len-i);
398 mdfour_result(&m, (uchar *)sum);
401 default: /* paranoia to prevent missing case values */
402 exit_cleanup(RERR_UNSUPPORTED);
406 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
408 struct map_struct *buf;
409 OFF_T i, len = st_p->st_size;
413 fd = do_open(fname, O_RDONLY, 0);
415 memset(sum, 0, file_sum_len);
419 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
422 if (file_sum_evp_md) {
423 static EVP_MD_CTX *evp = NULL;
424 if (!evp && !(evp = EVP_MD_CTX_create()))
425 out_of_memory("file_checksum");
427 EVP_DigestInit_ex(evp, file_sum_evp_md, NULL);
429 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
430 EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
432 remainder = (int32)(len - i);
434 EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, remainder), remainder);
436 EVP_DigestFinal_ex(evp, (uchar *)sum, NULL);
439 switch (file_sum_nni->num) {
440 #ifdef SUPPORT_XXHASH
442 static XXH64_state_t* state = NULL;
443 if (!state && !(state = XXH64_createState()))
444 out_of_memory("file_checksum");
446 XXH64_reset(state, 0);
448 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
449 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
451 remainder = (int32)(len - i);
453 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
455 SIVAL64(sum, 0, XXH64_digest(state));
461 static XXH3_state_t* state = NULL;
462 if (!state && !(state = XXH3_createState()))
463 out_of_memory("file_checksum");
465 XXH3_64bits_reset(state);
467 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
468 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
470 remainder = (int32)(len - i);
472 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
474 SIVAL64(sum, 0, XXH3_64bits_digest(state));
477 case CSUM_XXH3_128: {
478 XXH128_hash_t digest;
479 static XXH3_state_t* state = NULL;
480 if (!state && !(state = XXH3_createState()))
481 out_of_memory("file_checksum");
483 XXH3_128bits_reset(state);
485 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
486 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
488 remainder = (int32)(len - i);
490 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
492 digest = XXH3_128bits_digest(state);
493 SIVAL64(sum, 0, digest.low64);
494 SIVAL64(sum, 8, digest.high64);
503 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
504 md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
506 remainder = (int32)(len - i);
508 md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
510 md5_result(&m5, (uchar *)sum);
515 case CSUM_MD4_BUSTED:
516 case CSUM_MD4_ARCHAIC: {
521 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
522 mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
524 /* Prior to version 27 an incorrect MD4 checksum was computed
525 * by failing to call mdfour_tail() for block sizes that
526 * are multiples of 64. This is fixed by calling mdfour_update()
527 * even when there are no more bytes. */
528 remainder = (int32)(len - i);
529 if (remainder > 0 || file_sum_nni->num > CSUM_MD4_BUSTED)
530 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
532 mdfour_result(&m, (uchar *)sum);
536 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
537 file_sum_nni->name, file_sum_nni->num);
538 exit_cleanup(RERR_UNSUPPORTED);
545 static int32 sumresidue;
546 static md_context ctx_md;
547 #ifdef SUPPORT_XXHASH
548 static XXH64_state_t* xxh64_state;
551 static XXH3_state_t* xxh3_state;
553 static struct name_num_item *cur_sum_nni;
554 static const EVP_MD *cur_sum_evp_md;
557 int sum_init(struct name_num_item *nni, int seed)
562 nni = parse_csum_name(NULL, 0);
564 cur_sum_len = csum_len_for_type(nni->num, 0);
565 cur_sum_evp_md = csum_evp_md(nni);
568 if (cur_sum_evp_md) {
569 if (!ctx_evp && !(ctx_evp = EVP_MD_CTX_create()))
570 out_of_memory("file_checksum");
571 EVP_DigestInit_ex(ctx_evp, cur_sum_evp_md, NULL);
574 switch (cur_sum_nni->num) {
575 #ifdef SUPPORT_XXHASH
577 if (!xxh64_state && !(xxh64_state = XXH64_createState()))
578 out_of_memory("sum_init");
579 XXH64_reset(xxh64_state, 0);
584 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
585 out_of_memory("sum_init");
586 XXH3_64bits_reset(xxh3_state);
589 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
590 out_of_memory("sum_init");
591 XXH3_128bits_reset(xxh3_state);
598 mdfour_begin(&ctx_md);
602 case CSUM_MD4_BUSTED:
603 case CSUM_MD4_ARCHAIC:
604 mdfour_begin(&ctx_md);
611 default: /* paranoia to prevent missing case values */
612 exit_cleanup(RERR_UNSUPPORTED);
619 * Feed data into an MD4 accumulator, md. The results may be
620 * retrieved using sum_end(). md is used for different purposes at
621 * different points during execution.
623 * @todo Perhaps get rid of md and just pass in the address each time.
624 * Very slightly clearer and slower.
626 void sum_update(const char *p, int32 len)
629 if (cur_sum_evp_md) {
630 EVP_DigestUpdate(ctx_evp, (uchar *)p, len);
633 switch (cur_sum_nni->num) {
634 #ifdef SUPPORT_XXHASH
636 XXH64_update(xxh64_state, p, len);
641 XXH3_64bits_update(xxh3_state, p, len);
644 XXH3_128bits_update(xxh3_state, p, len);
648 md5_update(&ctx_md, (uchar *)p, len);
652 case CSUM_MD4_BUSTED:
653 case CSUM_MD4_ARCHAIC:
654 if (len + sumresidue < CSUM_CHUNK) {
655 memcpy(ctx_md.buffer + sumresidue, p, len);
661 int32 i = CSUM_CHUNK - sumresidue;
662 memcpy(ctx_md.buffer + sumresidue, p, i);
663 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, CSUM_CHUNK);
668 while (len >= CSUM_CHUNK) {
669 mdfour_update(&ctx_md, (uchar *)p, CSUM_CHUNK);
676 memcpy(ctx_md.buffer, p, sumresidue);
680 default: /* paranoia to prevent missing case values */
681 exit_cleanup(RERR_UNSUPPORTED);
685 /* The sum buffer only needs to be as long as the current checksum's digest
686 * len, not MAX_DIGEST_LEN. Note that for CSUM_MD4_ARCHAIC that is the full
687 * MD4_DIGEST_LEN even if the file-list code is going to ignore all but the
688 * first 2 bytes of it. */
689 void sum_end(char *sum)
692 if (cur_sum_evp_md) {
693 EVP_DigestFinal_ex(ctx_evp, (uchar *)sum, NULL);
696 switch (cur_sum_nni->num) {
697 #ifdef SUPPORT_XXHASH
699 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
704 SIVAL64(sum, 0, XXH3_64bits_digest(xxh3_state));
706 case CSUM_XXH3_128: {
707 XXH128_hash_t digest = XXH3_128bits_digest(xxh3_state);
708 SIVAL64(sum, 0, digest.low64);
709 SIVAL64(sum, 8, digest.high64);
714 md5_result(&ctx_md, (uchar *)sum);
718 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, sumresidue);
719 mdfour_result(&ctx_md, (uchar *)sum);
721 case CSUM_MD4_BUSTED:
722 case CSUM_MD4_ARCHAIC:
724 mdfour_update(&ctx_md, (uchar *)ctx_md.buffer, sumresidue);
725 mdfour_result(&ctx_md, (uchar *)sum);
730 default: /* paranoia to prevent missing case values */
731 exit_cleanup(RERR_UNSUPPORTED);
735 #if defined SUPPORT_XXH3 || defined USE_OPENSSL
736 static void verify_digest(struct name_num_item *nni, BOOL check_auth_list)
739 static int xxh3_result = 0;
742 static int prior_num = 0, prior_flags = 0, prior_result = 0;
746 if (nni->num == CSUM_XXH3_64 || nni->num == CSUM_XXH3_128) {
750 for (j = 0; j < (int)sizeof buf; j++)
751 buf[j] = ' ' + (j % 96);
753 sum_update(buf, 32816);
754 sum_update(buf, 31152);
755 sum_update(buf, 32474);
756 sum_update(buf, 9322);
757 xxh3_result = XXH3_64bits_digest(xxh3_state) != 0xadbcf16d4678d1de ? -1 : 1;
760 nni->num = CSUM_gone;
766 if (BITS_SETnUNSET(nni->flags, NNI_EVP, NNI_BUILTIN|NNI_EVP_OK)) {
767 if (nni->num == prior_num && nni->flags == prior_flags) {
768 nni->flags = prior_result;
769 if (!(nni->flags & NNI_EVP))
770 nni->num = CSUM_gone;
772 prior_num = nni->num;
773 prior_flags = nni->flags;
774 if (!csum_evp_md(nni))
775 nni->num = CSUM_gone;
776 prior_result = nni->flags;
777 if (check_auth_list && (nni = get_nni_by_num(&valid_auth_checksums, prior_num)) != NULL)
778 verify_digest(nni, False);
785 void init_checksum_choices()
787 struct name_num_item *nni;
789 if (initialized_choices)
792 #if defined SUPPORT_XXH3 || defined USE_OPENSSL
793 for (nni = valid_checksums.list; nni->name; nni++)
794 verify_digest(nni, True);
796 for (nni = valid_auth_checksums.list; nni->name; nni++)
797 verify_digest(nni, False);
800 initialized_choices = 1;