2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2022 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
33 # if XXH_VERSION_NUMBER >= 800
34 # define SUPPORT_XXH3 1
39 extern int whole_file;
40 extern int checksum_seed;
41 extern int protocol_version;
42 extern int proper_seed_order;
43 extern const char *checksum_choice;
45 struct name_num_item valid_checksums_items[] = {
47 { CSUM_XXH3_128, "xxh128", NULL },
48 { CSUM_XXH3_64, "xxh3", NULL },
51 { CSUM_XXH64, "xxh64", NULL },
52 { CSUM_XXH64, "xxhash", NULL },
54 { CSUM_MD5, "md5", NULL },
55 { CSUM_MD4, "md4", NULL },
56 { CSUM_NONE, "none", NULL },
60 struct name_num_obj valid_checksums = {
61 "checksum", NULL, NULL, 0, 0, valid_checksums_items
64 int xfersum_type = 0; /* used for the file transfer checksums */
65 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
67 static int initialized_choices = 0;
69 int parse_csum_name(const char *name, int len)
71 struct name_num_item *nni;
76 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
77 if (protocol_version >= 30)
79 if (protocol_version >= 27)
81 if (protocol_version >= 21)
82 return CSUM_MD4_BUSTED;
83 return CSUM_MD4_ARCHAIC;
86 if (!initialized_choices)
87 init_checksum_choices();
89 nni = get_nni_by_name(&valid_checksums, name, len);
92 rprintf(FERROR, "unknown checksum name: %s\n", name);
93 exit_cleanup(RERR_UNSUPPORTED);
99 static const char *checksum_name(int num)
101 struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
103 return nni ? nni->name : num < CSUM_MD4 ? "md4" : "UNKNOWN";
106 void parse_checksum_choice(int final_call)
108 if (valid_checksums.negotiated_name)
109 xfersum_type = checksum_type = valid_checksums.negotiated_num;
111 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
113 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
114 checksum_type = parse_csum_name(cp+1, -1);
116 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
117 if (am_server && checksum_choice)
118 validate_choice_vs_env(NSTR_CHECKSUM, xfersum_type, checksum_type);
121 if (xfersum_type == CSUM_NONE)
124 /* Snag the checksum name for both write_batch's option output & the following debug output. */
125 if (valid_checksums.negotiated_name)
126 checksum_choice = valid_checksums.negotiated_name;
127 else if (checksum_choice == NULL)
128 checksum_choice = checksum_name(xfersum_type);
130 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
131 rprintf(FINFO, "%s%s checksum: %s\n",
132 am_server ? "Server" : "Client",
133 valid_checksums.negotiated_name ? " negotiated" : "",
138 int csum_len_for_type(int cst, BOOL flist_csum)
143 case CSUM_MD4_ARCHAIC:
144 /* The oldest checksum code is rather weird: the file-list code only sent
145 * 2-byte checksums, but all other checksums were full MD4 length. */
146 return flist_csum ? 2 : MD4_DIGEST_LEN;
149 case CSUM_MD4_BUSTED:
150 return MD4_DIGEST_LEN;
152 return MD5_DIGEST_LEN;
158 default: /* paranoia to prevent missing case values */
159 exit_cleanup(RERR_UNSUPPORTED);
164 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
165 * Returns 1 if the public sum order matches our internal sum order.
166 * Returns -1 if the public sum order is the reverse of our internal sum order.
168 int canonical_checksum(int csum_type)
172 case CSUM_MD4_ARCHAIC:
174 case CSUM_MD4_BUSTED:
183 default: /* paranoia to prevent missing case values */
184 exit_cleanup(RERR_UNSUPPORTED);
189 #ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */
191 a simple 32 bit checksum that can be updated from either end
192 (inspired by Mark Adler's Adler-32 checksum)
194 uint32 get_checksum1(char *buf1, int32 len)
198 schar *buf = (schar *)buf1;
201 for (i = 0; i < (len-4); i+=4) {
202 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
203 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
205 for (; i < len; i++) {
206 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
208 return (s1 & 0xffff) + (s2 << 16);
212 void get_checksum2(char *buf, int32 len, char *sum)
214 switch (xfersum_type) {
215 #ifdef SUPPORT_XXHASH
217 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
222 SIVAL64(sum, 0, XXH3_64bits_withSeed(buf, len, checksum_seed));
224 case CSUM_XXH3_128: {
225 XXH128_hash_t digest = XXH3_128bits_withSeed(buf, len, checksum_seed);
226 SIVAL64(sum, 0, digest.low64);
227 SIVAL64(sum, 8, digest.high64);
235 if (proper_seed_order) {
237 SIVALu(seedbuf, 0, checksum_seed);
238 md5_update(&m5, seedbuf, 4);
240 md5_update(&m5, (uchar *)buf, len);
242 md5_update(&m5, (uchar *)buf, len);
244 SIVALu(seedbuf, 0, checksum_seed);
245 md5_update(&m5, seedbuf, 4);
248 md5_result(&m5, (uchar *)sum);
256 MD4_Update(&m4, (uchar *)buf, len);
259 SIVALu(seedbuf, 0, checksum_seed);
260 MD4_Update(&m4, seedbuf, 4);
262 MD4_Final((uchar *)sum, &m4);
267 case CSUM_MD4_BUSTED:
268 case CSUM_MD4_ARCHAIC: {
279 buf1 = new_array(char, len+4);
283 memcpy(buf1, buf, len);
285 SIVAL(buf1,len,checksum_seed);
289 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
290 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
293 * Prior to version 27 an incorrect MD4 checksum was computed
294 * by failing to call mdfour_tail() for block sizes that
295 * are multiples of 64. This is fixed by calling mdfour_update()
296 * even when there are no more bytes.
298 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
299 mdfour_update(&m, (uchar *)(buf1+i), len-i);
301 mdfour_result(&m, (uchar *)sum);
304 default: /* paranoia to prevent missing case values */
305 exit_cleanup(RERR_UNSUPPORTED);
309 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
311 struct map_struct *buf;
312 OFF_T i, len = st_p->st_size;
316 memset(sum, 0, MAX_DIGEST_LEN);
318 fd = do_open(fname, O_RDONLY, 0);
322 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
324 switch (checksum_type) {
325 #ifdef SUPPORT_XXHASH
327 static XXH64_state_t* state = NULL;
328 if (!state && !(state = XXH64_createState()))
329 out_of_memory("file_checksum");
331 XXH64_reset(state, 0);
333 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
334 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
336 remainder = (int32)(len - i);
338 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
340 SIVAL64(sum, 0, XXH64_digest(state));
346 static XXH3_state_t* state = NULL;
347 if (!state && !(state = XXH3_createState()))
348 out_of_memory("file_checksum");
350 XXH3_64bits_reset(state);
352 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
353 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
355 remainder = (int32)(len - i);
357 XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
359 SIVAL64(sum, 0, XXH3_64bits_digest(state));
362 case CSUM_XXH3_128: {
363 XXH128_hash_t digest;
364 static XXH3_state_t* state = NULL;
365 if (!state && !(state = XXH3_createState()))
366 out_of_memory("file_checksum");
368 XXH3_128bits_reset(state);
370 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
371 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
373 remainder = (int32)(len - i);
375 XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
377 digest = XXH3_128bits_digest(state);
378 SIVAL64(sum, 0, digest.low64);
379 SIVAL64(sum, 8, digest.high64);
388 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
389 md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
391 remainder = (int32)(len - i);
393 md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
395 md5_result(&m5, (uchar *)sum);
405 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
406 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
408 remainder = (int32)(len - i);
410 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
412 MD4_Final((uchar *)sum, &m4);
417 case CSUM_MD4_BUSTED:
418 case CSUM_MD4_ARCHAIC: {
423 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
424 mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
426 /* Prior to version 27 an incorrect MD4 checksum was computed
427 * by failing to call mdfour_tail() for block sizes that
428 * are multiples of 64. This is fixed by calling mdfour_update()
429 * even when there are no more bytes. */
430 remainder = (int32)(len - i);
431 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
432 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
434 mdfour_result(&m, (uchar *)sum);
438 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
439 checksum_name(checksum_type), checksum_type);
440 exit_cleanup(RERR_UNSUPPORTED);
447 static int32 sumresidue;
455 #ifdef SUPPORT_XXHASH
456 static XXH64_state_t* xxh64_state;
459 static XXH3_state_t* xxh3_state;
461 static int cursum_type;
463 void sum_init(int csum_type, int seed)
468 csum_type = parse_csum_name(NULL, 0);
469 cursum_type = csum_type;
472 #ifdef SUPPORT_XXHASH
474 if (!xxh64_state && !(xxh64_state = XXH64_createState()))
475 out_of_memory("sum_init");
476 XXH64_reset(xxh64_state, 0);
481 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
482 out_of_memory("sum_init");
483 XXH3_64bits_reset(xxh3_state);
486 if (!xxh3_state && !(xxh3_state = XXH3_createState()))
487 out_of_memory("sum_init");
488 XXH3_128bits_reset(xxh3_state);
498 mdfour_begin(&ctx.md);
503 case CSUM_MD4_BUSTED:
504 case CSUM_MD4_ARCHAIC:
505 mdfour_begin(&ctx.md);
512 default: /* paranoia to prevent missing case values */
513 exit_cleanup(RERR_UNSUPPORTED);
518 * Feed data into an MD4 accumulator, md. The results may be
519 * retrieved using sum_end(). md is used for different purposes at
520 * different points during execution.
522 * @todo Perhaps get rid of md and just pass in the address each time.
523 * Very slightly clearer and slower.
525 void sum_update(const char *p, int32 len)
527 switch (cursum_type) {
528 #ifdef SUPPORT_XXHASH
530 XXH64_update(xxh64_state, p, len);
535 XXH3_64bits_update(xxh3_state, p, len);
538 XXH3_128bits_update(xxh3_state, p, len);
542 md5_update(&ctx.m5, (uchar *)p, len);
546 MD4_Update(&ctx.m4, (uchar *)p, len);
550 case CSUM_MD4_BUSTED:
551 case CSUM_MD4_ARCHAIC:
552 if (len + sumresidue < CSUM_CHUNK) {
553 memcpy(ctx.md.buffer + sumresidue, p, len);
559 int32 i = CSUM_CHUNK - sumresidue;
560 memcpy(ctx.md.buffer + sumresidue, p, i);
561 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
566 while (len >= CSUM_CHUNK) {
567 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
574 memcpy(ctx.md.buffer, p, sumresidue);
578 default: /* paranoia to prevent missing case values */
579 exit_cleanup(RERR_UNSUPPORTED);
583 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
584 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter than that (i.e.
585 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
586 * into the "sum" buffer. */
587 int sum_end(char *sum)
589 switch (cursum_type) {
590 #ifdef SUPPORT_XXHASH
592 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
597 SIVAL64(sum, 0, XXH3_64bits_digest(xxh3_state));
599 case CSUM_XXH3_128: {
600 XXH128_hash_t digest = XXH3_128bits_digest(xxh3_state);
601 SIVAL64(sum, 0, digest.low64);
602 SIVAL64(sum, 8, digest.high64);
607 md5_result(&ctx.m5, (uchar *)sum);
611 MD4_Final((uchar *)sum, &ctx.m4);
615 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
616 mdfour_result(&ctx.md, (uchar *)sum);
618 case CSUM_MD4_BUSTED:
619 case CSUM_MD4_ARCHAIC:
621 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
622 mdfour_result(&ctx.md, (uchar *)sum);
627 default: /* paranoia to prevent missing case values */
628 exit_cleanup(RERR_UNSUPPORTED);
631 return csum_len_for_type(cursum_type, 0);
634 void init_checksum_choices()
639 for (j = 0; j < (int)sizeof buf; j++) {
640 buf[j] = ' ' + (j % 96);
642 sum_init(CSUM_XXH3_64, 0);
643 sum_update(buf, 32816);
644 sum_update(buf, 31152);
645 sum_update(buf, 32474);
646 sum_update(buf, 9322);
647 if (XXH3_64bits_digest(xxh3_state) != 0xadbcf16d4678d1de) {
649 struct name_num_item *nni = valid_checksums.list;
650 for (t = f = 0; nni[f].name; f++) {
651 if (nni[f].num == CSUM_XXH3_64 || nni[f].num == CSUM_XXH3_128)
659 initialized_choices = 1;