2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
34 #include "openssl/md4.h"
35 #include "openssl/md5.h"
39 extern int local_server;
40 extern int whole_file;
41 extern int read_batch;
42 extern int checksum_seed;
43 extern int protocol_version;
44 extern int proper_seed_order;
45 extern char *checksum_choice;
48 #define CSUM_MD4_ARCHAIC 1
49 #define CSUM_MD4_BUSTED 2
50 #define CSUM_MD4_OLD 3
55 #define CSUM_SAW_BUFLEN 10
60 } valid_checksums[] = {
62 { CSUM_XXHASH, "xxhash" },
66 { CSUM_NONE, "none" },
70 #define MAX_CHECKSUM_LIST 1024
73 #define MD5_CTX md_context
74 #define MD5_Init md5_begin
75 #define MD5_Update md5_update
76 #define MD5_Final(digest, cptr) md5_result(cptr, digest)
79 int xfersum_type = 0; /* used for the file transfer checksums */
80 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
81 const char *negotiated_csum_name = NULL;
83 static int parse_csum_name(const char *name, int len, int allow_auto)
85 struct csum_struct *cs;
90 if (!name || (allow_auto && len == 4 && strncasecmp(name, "auto", 4) == 0)) {
91 if (protocol_version >= 30)
93 if (protocol_version >= 27)
95 if (protocol_version >= 21)
96 return CSUM_MD4_BUSTED;
97 return CSUM_MD4_ARCHAIC;
100 for (cs = valid_checksums; cs->name; cs++) {
101 if (strncasecmp(name, cs->name, len) == 0 && cs->name[len] == '\0')
106 rprintf(FERROR, "unknown checksum name: %s\n", name);
107 exit_cleanup(RERR_UNSUPPORTED);
113 static const char *checksum_name(int num)
115 struct csum_struct *cs;
117 for (cs = valid_checksums; cs->name; cs++) {
125 return "UNKNOWN"; /* IMPOSSIBLE */
128 void parse_checksum_choice(int final_call)
130 if (!negotiated_csum_name) {
131 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
133 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice, 1);
134 checksum_type = parse_csum_name(cp+1, -1, 1);
136 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1, 1);
139 if (xfersum_type == CSUM_NONE)
142 if (final_call && DEBUG_GTE(CSUM, 1)) {
143 if (negotiated_csum_name)
144 rprintf(FINFO, "[%s] negotiated checksum: %s\n", who_am_i(), negotiated_csum_name);
145 else if (xfersum_type == checksum_type) {
146 rprintf(FINFO, "[%s] %s checksum: %s\n", who_am_i(),
147 checksum_choice ? "chosen" : "protocol-based",
148 checksum_name(xfersum_type));
150 rprintf(FINFO, "[%s] chosen transfer checksum: %s\n",
151 who_am_i(), checksum_name(xfersum_type));
152 rprintf(FINFO, "[%s] chosen pre-transfer checksum: %s\n",
153 who_am_i(), checksum_name(checksum_type));
158 static int parse_checksum_list(const char *from, char *sumbuf, int sumbuf_len, char *saw)
160 char *to = sumbuf, *tok = NULL;
163 memset(saw, 0, CSUM_SAW_BUFLEN);
166 if (*from == ' ' || !*from) {
168 int sum_type = parse_csum_name(tok, to - tok, 0);
169 if (sum_type >= 0 && !saw[sum_type])
170 saw[sum_type] = ++cnt;
172 to = tok - (tok != sumbuf);
184 if (to - sumbuf >= sumbuf_len - 1) {
185 to = tok - (tok != sumbuf);
195 void negotiate_checksum(int f_in, int f_out, const char *csum_list, int saw_fail)
197 char *tok, sumbuf[MAX_CHECKSUM_LIST], saw[CSUM_SAW_BUFLEN];
200 /* Simplify the user-provided string so that it contains valid
201 * checksum names without any duplicates. The client side also
202 * makes use of the saw values when scanning the server's list. */
203 if (csum_list && *csum_list && (!am_server || local_server)) {
204 len = parse_checksum_list(csum_list, sumbuf, sizeof sumbuf, saw);
205 if (saw_fail && !len)
206 len = strlcpy(sumbuf, "FAIL", sizeof sumbuf);
209 memset(saw, 0, CSUM_SAW_BUFLEN);
213 if (!csum_list || !*csum_list) {
214 struct csum_struct *cs;
216 for (cs = valid_checksums, len = 0; cs->name; cs++) {
217 if (cs->num == CSUM_NONE)
221 len += strlcpy(sumbuf+len, cs->name, sizeof sumbuf - len);
222 if (len >= (int)sizeof sumbuf - 1)
223 exit_cleanup(RERR_UNSUPPORTED); /* IMPOSSIBLE... */
224 saw[cs->num] = ++cnt;
228 /* Each side sends their list of valid checksum names to the other side and
229 * then both sides pick the first name in the client's list that is also in
230 * the server's list. */
232 write_vstring(f_out, sumbuf, len);
234 if (!local_server || read_batch)
235 len = read_vstring(f_in, sumbuf, sizeof sumbuf);
238 int best = CSUM_SAW_BUFLEN; /* We want best == 1 from the client list */
240 memset(saw, 1, CSUM_SAW_BUFLEN); /* The first client's choice is the best choice */
241 for (tok = strtok(sumbuf, " \t"); tok; tok = strtok(NULL, " \t")) {
242 sum_type = parse_csum_name(tok, -1, 0);
243 if (sum_type < 0 || !saw[sum_type] || best < saw[sum_type])
245 xfersum_type = checksum_type = sum_type;
246 negotiated_csum_name = tok;
247 best = saw[sum_type];
251 if (negotiated_csum_name) {
252 negotiated_csum_name = strdup(negotiated_csum_name);
259 rprintf(FERROR, "Failed to negotiate a common checksum\n");
260 exit_cleanup(RERR_UNSUPPORTED);
263 int csum_len_for_type(int cst, BOOL flist_csum)
268 case CSUM_MD4_ARCHAIC:
269 /* The oldest checksum code is rather weird: the file-list code only sent
270 * 2-byte checksums, but all other checksums were full MD4 length. */
271 return flist_csum ? 2 : MD4_DIGEST_LEN;
274 case CSUM_MD4_BUSTED:
275 return MD4_DIGEST_LEN;
277 return MD5_DIGEST_LEN;
278 #ifdef SUPPORT_XXHASH
280 return sizeof (XXH64_hash_t);
282 default: /* paranoia to prevent missing case values */
283 exit_cleanup(RERR_UNSUPPORTED);
288 int canonical_checksum(int csum_type)
290 return csum_type >= CSUM_MD4 ? 1 : 0;
293 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
295 a simple 32 bit checksum that can be updated from either end
296 (inspired by Mark Adler's Adler-32 checksum)
298 uint32 get_checksum1(char *buf1, int32 len)
302 schar *buf = (schar *)buf1;
305 for (i = 0; i < (len-4); i+=4) {
306 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
307 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
309 for (; i < len; i++) {
310 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
312 return (s1 & 0xffff) + (s2 << 16);
316 void get_checksum2(char *buf, int32 len, char *sum)
318 switch (xfersum_type) {
323 if (proper_seed_order) {
325 SIVALu(seedbuf, 0, checksum_seed);
326 MD5_Update(&m5, seedbuf, 4);
328 MD5_Update(&m5, (uchar *)buf, len);
330 MD5_Update(&m5, (uchar *)buf, len);
332 SIVALu(seedbuf, 0, checksum_seed);
333 MD5_Update(&m5, seedbuf, 4);
336 MD5_Final((uchar *)sum, &m5);
344 MD4_Update(&m4, (uchar *)buf, len);
347 SIVALu(seedbuf, 0, checksum_seed);
348 MD4_Update(&m4, seedbuf, 4);
350 MD4_Final((uchar *)sum, &m4);
355 case CSUM_MD4_BUSTED:
356 case CSUM_MD4_ARCHAIC: {
367 buf1 = new_array(char, len+4);
370 out_of_memory("get_checksum2");
373 memcpy(buf1, buf, len);
375 SIVAL(buf1,len,checksum_seed);
379 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
380 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
383 * Prior to version 27 an incorrect MD4 checksum was computed
384 * by failing to call mdfour_tail() for block sizes that
385 * are multiples of 64. This is fixed by calling mdfour_update()
386 * even when there are no more bytes.
388 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
389 mdfour_update(&m, (uchar *)(buf1+i), len-i);
391 mdfour_result(&m, (uchar *)sum);
394 #ifdef SUPPORT_XXHASH
396 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
399 default: /* paranoia to prevent missing case values */
400 exit_cleanup(RERR_UNSUPPORTED);
404 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
406 struct map_struct *buf;
407 OFF_T i, len = st_p->st_size;
411 memset(sum, 0, MAX_DIGEST_LEN);
413 fd = do_open(fname, O_RDONLY, 0);
417 buf = map_file(fd, len, MAX_MAP_SIZE, CSUM_CHUNK);
419 switch (checksum_type) {
425 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
426 MD5_Update(&m5, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
428 remainder = (int32)(len - i);
430 MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
432 MD5_Final((uchar *)sum, &m5);
442 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
443 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
445 remainder = (int32)(len - i);
447 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
449 MD4_Final((uchar *)sum, &m4);
454 case CSUM_MD4_BUSTED:
455 case CSUM_MD4_ARCHAIC: {
460 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
461 mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
463 /* Prior to version 27 an incorrect MD4 checksum was computed
464 * by failing to call mdfour_tail() for block sizes that
465 * are multiples of 64. This is fixed by calling mdfour_update()
466 * even when there are no more bytes. */
467 remainder = (int32)(len - i);
468 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
469 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
471 mdfour_result(&m, (uchar *)sum);
474 #ifdef SUPPORT_XXHASH
476 XXH64_state_t* state = XXH64_createState();
478 out_of_memory("file_checksum xx64");
480 if (XXH64_reset(state, 0) == XXH_ERROR) {
481 rprintf(FERROR, "error resetting XXH64 seed");
482 exit_cleanup(RERR_STREAMIO);
485 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) {
486 XXH_errorcode const updateResult =
487 XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK);
488 if (updateResult == XXH_ERROR) {
489 rprintf(FERROR, "error computing XX64 hash");
490 exit_cleanup(RERR_STREAMIO);
493 remainder = (int32)(len - i);
495 XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder);
496 SIVAL64(sum, 0, XXH64_digest(state));
498 XXH64_freeState(state);
503 rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type);
504 exit_cleanup(RERR_UNSUPPORTED);
511 static int32 sumresidue;
519 static int cursum_type;
520 #ifdef SUPPORT_XXHASH
521 XXH64_state_t* xxh64_state = NULL;
524 void sum_init(int csum_type, int seed)
529 csum_type = parse_csum_name(NULL, 0, 1);
530 cursum_type = csum_type;
540 mdfour_begin(&ctx.md);
545 case CSUM_MD4_BUSTED:
546 case CSUM_MD4_ARCHAIC:
547 mdfour_begin(&ctx.md);
552 #ifdef SUPPORT_XXHASH
554 if (xxh64_state == NULL) {
555 xxh64_state = XXH64_createState();
556 if (xxh64_state == NULL)
557 out_of_memory("sum_init xxh64");
559 if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) {
560 rprintf(FERROR, "error resetting XXH64 state");
561 exit_cleanup(RERR_STREAMIO);
567 default: /* paranoia to prevent missing case values */
568 exit_cleanup(RERR_UNSUPPORTED);
573 * Feed data into an MD4 accumulator, md. The results may be
574 * retrieved using sum_end(). md is used for different purposes at
575 * different points during execution.
577 * @todo Perhaps get rid of md and just pass in the address each time.
578 * Very slightly clearer and slower.
580 void sum_update(const char *p, int32 len)
582 switch (cursum_type) {
584 MD5_Update(&ctx.m5, (uchar *)p, len);
588 MD4_Update(&ctx.m4, (uchar *)p, len);
592 case CSUM_MD4_BUSTED:
593 case CSUM_MD4_ARCHAIC:
594 if (len + sumresidue < CSUM_CHUNK) {
595 memcpy(ctx.md.buffer + sumresidue, p, len);
601 int32 i = CSUM_CHUNK - sumresidue;
602 memcpy(ctx.md.buffer + sumresidue, p, i);
603 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
608 while (len >= CSUM_CHUNK) {
609 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
616 memcpy(ctx.md.buffer, p, sumresidue);
618 #ifdef SUPPORT_XXHASH
620 if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) {
621 rprintf(FERROR, "error computing XX64 hash");
622 exit_cleanup(RERR_STREAMIO);
628 default: /* paranoia to prevent missing case values */
629 exit_cleanup(RERR_UNSUPPORTED);
633 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
634 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
635 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
636 * into the "sum" buffer. */
637 int sum_end(char *sum)
639 switch (cursum_type) {
641 MD5_Final((uchar *)sum, &ctx.m5);
645 MD4_Final((uchar *)sum, &ctx.m4);
649 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
650 mdfour_result(&ctx.md, (uchar *)sum);
652 case CSUM_MD4_BUSTED:
653 case CSUM_MD4_ARCHAIC:
655 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
656 mdfour_result(&ctx.md, (uchar *)sum);
658 #ifdef SUPPORT_XXHASH
660 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
666 default: /* paranoia to prevent missing case values */
667 exit_cleanup(RERR_UNSUPPORTED);
670 return csum_len_for_type(cursum_type, 0);