2 * Routines to support checksumming of bytes.
4 * Copyright (C) 1996 Andrew Tridgell
5 * Copyright (C) 1996 Paul Mackerras
6 * Copyright (C) 2004-2020 Wayne Davison
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * In addition, as a special exception, the copyright holders give
14 * permission to dynamically link rsync with the OpenSSL and xxhash
15 * libraries when those libraries are being distributed in compliance
16 * with their license terms, and to distribute a dynamically linked
17 * combination of rsync and these libraries. This is also considered
18 * to be covered under the GPL's System Libraries exception.
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License along
26 * with this program; if not, visit the http://fsf.org website.
35 extern int whole_file;
36 extern int checksum_seed;
37 extern int protocol_version;
38 extern int proper_seed_order;
39 extern const char *checksum_choice;
42 #define CSUM_MD4_ARCHAIC 1
43 #define CSUM_MD4_BUSTED 2
44 #define CSUM_MD4_OLD 3
49 struct name_num_obj valid_checksums = {
50 "checksum", NULL, NULL, 0, 0, {
52 { CSUM_XXH64, "xxh64", NULL },
53 { CSUM_XXH64, "xxhash", NULL },
55 { CSUM_MD5, "md5", NULL },
56 { CSUM_MD4, "md4", NULL },
57 { CSUM_NONE, "none", NULL },
62 int xfersum_type = 0; /* used for the file transfer checksums */
63 int checksum_type = 0; /* used for the pre-transfer (--checksum) checksums */
65 static int parse_csum_name(const char *name, int len)
67 struct name_num_item *nni;
72 if (!name || (len == 4 && strncasecmp(name, "auto", 4) == 0)) {
73 if (protocol_version >= 30)
75 if (protocol_version >= 27)
77 if (protocol_version >= 21)
78 return CSUM_MD4_BUSTED;
79 return CSUM_MD4_ARCHAIC;
82 nni = get_nni_by_name(&valid_checksums, name, len);
85 rprintf(FERROR, "unknown checksum name: %s\n", name);
86 exit_cleanup(RERR_UNSUPPORTED);
92 static const char *checksum_name(int num)
94 struct name_num_item *nni = get_nni_by_num(&valid_checksums, num);
96 return nni ? nni->name : num < CSUM_MD4 ? "MD4" : "UNKNOWN";
99 void parse_checksum_choice(int final_call)
101 if (valid_checksums.negotiated_name)
102 xfersum_type = checksum_type = valid_checksums.negotiated_num;
104 char *cp = checksum_choice ? strchr(checksum_choice, ',') : NULL;
106 xfersum_type = parse_csum_name(checksum_choice, cp - checksum_choice);
107 checksum_type = parse_csum_name(cp+1, -1);
109 xfersum_type = checksum_type = parse_csum_name(checksum_choice, -1);
112 if (xfersum_type == CSUM_NONE)
115 /* Snag the checksum name for both write_batch's option output & the following debug output. */
116 if (valid_checksums.negotiated_name)
117 checksum_choice = valid_checksums.negotiated_name;
118 else if (checksum_choice == NULL)
119 checksum_choice = checksum_name(xfersum_type);
121 if (final_call && DEBUG_GTE(NSTR, am_server ? 3 : 1)) {
122 rprintf(FINFO, "%s%s checksum: %s\n",
123 am_server ? "Server" : "Client",
124 valid_checksums.negotiated_name ? " negotiated" : "",
129 int csum_len_for_type(int cst, BOOL flist_csum)
134 case CSUM_MD4_ARCHAIC:
135 /* The oldest checksum code is rather weird: the file-list code only sent
136 * 2-byte checksums, but all other checksums were full MD4 length. */
137 return flist_csum ? 2 : MD4_DIGEST_LEN;
140 case CSUM_MD4_BUSTED:
141 return MD4_DIGEST_LEN;
143 return MD5_DIGEST_LEN;
144 #ifdef SUPPORT_XXHASH
148 default: /* paranoia to prevent missing case values */
149 exit_cleanup(RERR_UNSUPPORTED);
154 /* Returns 0 if the checksum is not canonical (i.e. it includes a seed value).
155 * Returns 1 if the public sum order matches our internal sum order.
156 * Returns -1 if the public sum order is the reverse of our internal sum order.
158 int canonical_checksum(int csum_type)
162 case CSUM_MD4_ARCHAIC:
164 case CSUM_MD4_BUSTED:
169 #ifdef SUPPORT_XXHASH
173 default: /* paranoia to prevent missing case values */
174 exit_cleanup(RERR_UNSUPPORTED);
179 #ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
181 a simple 32 bit checksum that can be updated from either end
182 (inspired by Mark Adler's Adler-32 checksum)
184 uint32 get_checksum1(char *buf1, int32 len)
188 schar *buf = (schar *)buf1;
191 for (i = 0; i < (len-4); i+=4) {
192 s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3] + 10*CHAR_OFFSET;
193 s1 += (buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3] + 4*CHAR_OFFSET);
195 for (; i < len; i++) {
196 s1 += (buf[i]+CHAR_OFFSET); s2 += s1;
198 return (s1 & 0xffff) + (s2 << 16);
202 void get_checksum2(char *buf, int32 len, char *sum)
204 switch (xfersum_type) {
205 #ifdef SUPPORT_XXHASH
207 SIVAL64(sum, 0, XXH64(buf, len, checksum_seed));
214 if (proper_seed_order) {
216 SIVALu(seedbuf, 0, checksum_seed);
217 MD5_Update(&m5, seedbuf, 4);
219 MD5_Update(&m5, (uchar *)buf, len);
221 MD5_Update(&m5, (uchar *)buf, len);
223 SIVALu(seedbuf, 0, checksum_seed);
224 MD5_Update(&m5, seedbuf, 4);
227 MD5_Final((uchar *)sum, &m5);
235 MD4_Update(&m4, (uchar *)buf, len);
238 SIVALu(seedbuf, 0, checksum_seed);
239 MD4_Update(&m4, seedbuf, 4);
241 MD4_Final((uchar *)sum, &m4);
246 case CSUM_MD4_BUSTED:
247 case CSUM_MD4_ARCHAIC: {
258 buf1 = new_array(char, len+4);
261 out_of_memory("get_checksum2");
264 memcpy(buf1, buf, len);
266 SIVAL(buf1,len,checksum_seed);
270 for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK)
271 mdfour_update(&m, (uchar *)(buf1+i), CSUM_CHUNK);
274 * Prior to version 27 an incorrect MD4 checksum was computed
275 * by failing to call mdfour_tail() for block sizes that
276 * are multiples of 64. This is fixed by calling mdfour_update()
277 * even when there are no more bytes.
279 if (len - i > 0 || xfersum_type > CSUM_MD4_BUSTED)
280 mdfour_update(&m, (uchar *)(buf1+i), len-i);
282 mdfour_result(&m, (uchar *)sum);
285 default: /* paranoia to prevent missing case values */
286 exit_cleanup(RERR_UNSUPPORTED);
290 void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
292 struct map_struct *buf;
293 OFF_T i, len = st_p->st_size;
297 memset(sum, 0, MAX_DIGEST_LEN);
299 fd = do_open(fname, O_RDONLY, 0);
303 buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE);
305 switch (checksum_type) {
306 #ifdef SUPPORT_XXHASH
308 static XXH64_state_t* state = NULL;
309 if (!state && !(state = XXH64_createState()))
310 out_of_memory("file_checksum");
312 XXH64_reset(state, 0);
314 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
315 XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
317 remainder = (int32)(len - i);
319 XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder);
321 SIVAL64(sum, 0, XXH64_digest(state));
330 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
331 MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
333 remainder = (int32)(len - i);
335 MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
337 MD5_Final((uchar *)sum, &m5);
347 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
348 MD4_Update(&m4, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
350 remainder = (int32)(len - i);
352 MD4_Update(&m4, (uchar *)map_ptr(buf, i, remainder), remainder);
354 MD4_Final((uchar *)sum, &m4);
359 case CSUM_MD4_BUSTED:
360 case CSUM_MD4_ARCHAIC: {
365 for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
366 mdfour_update(&m, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
368 /* Prior to version 27 an incorrect MD4 checksum was computed
369 * by failing to call mdfour_tail() for block sizes that
370 * are multiples of 64. This is fixed by calling mdfour_update()
371 * even when there are no more bytes. */
372 remainder = (int32)(len - i);
373 if (remainder > 0 || checksum_type > CSUM_MD4_BUSTED)
374 mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder);
376 mdfour_result(&m, (uchar *)sum);
380 rprintf(FERROR, "Invalid checksum-choice for --checksum: %s (%d)\n",
381 checksum_name(checksum_type), checksum_type);
382 exit_cleanup(RERR_UNSUPPORTED);
389 static int32 sumresidue;
397 #ifdef SUPPORT_XXHASH
398 static XXH64_state_t* xxh64_state;
400 static int cursum_type;
402 void sum_init(int csum_type, int seed)
407 csum_type = parse_csum_name(NULL, 0);
408 cursum_type = csum_type;
411 #ifdef SUPPORT_XXHASH
413 if (!xxh64_state && !(xxh64_state = XXH64_createState()))
414 out_of_memory("sum_init");
415 XXH64_reset(xxh64_state, 0);
425 mdfour_begin(&ctx.md);
430 case CSUM_MD4_BUSTED:
431 case CSUM_MD4_ARCHAIC:
432 mdfour_begin(&ctx.md);
439 default: /* paranoia to prevent missing case values */
440 exit_cleanup(RERR_UNSUPPORTED);
445 * Feed data into an MD4 accumulator, md. The results may be
446 * retrieved using sum_end(). md is used for different purposes at
447 * different points during execution.
449 * @todo Perhaps get rid of md and just pass in the address each time.
450 * Very slightly clearer and slower.
452 void sum_update(const char *p, int32 len)
454 switch (cursum_type) {
455 #ifdef SUPPORT_XXHASH
457 XXH64_update(xxh64_state, p, len);
461 MD5_Update(&ctx.m5, (uchar *)p, len);
465 MD4_Update(&ctx.m4, (uchar *)p, len);
469 case CSUM_MD4_BUSTED:
470 case CSUM_MD4_ARCHAIC:
471 if (len + sumresidue < CSUM_CHUNK) {
472 memcpy(ctx.md.buffer + sumresidue, p, len);
478 int32 i = CSUM_CHUNK - sumresidue;
479 memcpy(ctx.md.buffer + sumresidue, p, i);
480 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, CSUM_CHUNK);
485 while (len >= CSUM_CHUNK) {
486 mdfour_update(&ctx.md, (uchar *)p, CSUM_CHUNK);
493 memcpy(ctx.md.buffer, p, sumresidue);
497 default: /* paranoia to prevent missing case values */
498 exit_cleanup(RERR_UNSUPPORTED);
502 /* NOTE: all the callers of sum_end() pass in a pointer to a buffer that is
503 * MAX_DIGEST_LEN in size, so even if the csum-len is shorter that that (i.e.
504 * CSUM_MD4_ARCHAIC), we don't have to worry about limiting the data we write
505 * into the "sum" buffer. */
506 int sum_end(char *sum)
508 switch (cursum_type) {
509 #ifdef SUPPORT_XXHASH
511 SIVAL64(sum, 0, XXH64_digest(xxh64_state));
515 MD5_Final((uchar *)sum, &ctx.m5);
519 MD4_Final((uchar *)sum, &ctx.m4);
523 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
524 mdfour_result(&ctx.md, (uchar *)sum);
526 case CSUM_MD4_BUSTED:
527 case CSUM_MD4_ARCHAIC:
529 mdfour_update(&ctx.md, (uchar *)ctx.md.buffer, sumresidue);
530 mdfour_result(&ctx.md, (uchar *)sum);
535 default: /* paranoia to prevent missing case values */
536 exit_cleanup(RERR_UNSUPPORTED);
539 return csum_len_for_type(cursum_type, 0);