From de94193353864221280be9fbb6193d92eb133000 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Sat, 3 Aug 2013 09:44:13 -0700 Subject: [PATCH] Remove bypassed checksums in --inplace to improve speed. When checking a checksum that refers to a part of an --inplace file that has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the checksum from the list. This will speed up files that have a lot of identical checksum blocks (e.g. sequences of zeros) that we can't use due to them not getting marked as being the same. Patch provided by Michael Chapman. --- NEWS | 3 +++ match.c | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 040ac2d7..eec631d3 100644 --- a/NEWS +++ b/NEWS @@ -154,6 +154,9 @@ Changes since 3.0.9: file for one way to package the resulting files. (Suggestions for how to make this even easier to install & use are welcomed.) + - Improved the speed of some --inplace updates when there are lots of + identical checksum blocks that end up being unsuable. + - Added the --outbuf=N|L|B option for chosing the output buffering. - Repating the --fuzzy option now causes the code to look for fuzzy matches diff --git a/match.c b/match.c index bafab9f3..a8bd1f30 100644 --- a/match.c +++ b/match.c @@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s, do { int done_csum2 = 0; - int32 i; + uint32 hash_entry; + int32 i, *prev; if (DEBUG_GTE(DELTASUM, 4)) { rprintf(FINFO, "offset=%s sum=%04x%04x\n", @@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s, } if (tablesize == TRADITIONAL_TABLESIZE) { - if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0) + hash_entry = SUM2HASH2(s1,s2); + if ((i = hash_table[hash_entry]) < 0) goto null_hash; sum = (s1 & 0xffff) | (s2 << 16); } else { sum = (s1 & 0xffff) | (s2 << 16); - if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0) + hash_entry = BIG_SUM2HASH(sum); + if ((i = hash_table[hash_entry]) < 0) goto null_hash; } + prev = &hash_table[hash_entry]; hash_hits++; do { int32 l; + /* When updating in-place, the chunk's offset must be + * either >= our offset or identical data at that offset. + * Remove any bypassed entries that we can never use. */ + if (updating_basis_file && s->sums[i].offset < offset + && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) { + *prev = s->sums[i].chain; + continue; + } + prev = &s->sums[i].chain; + if (sum != s->sums[i].sum1) continue; @@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s, if (l != s->sums[i].len) continue; - /* in-place: ensure chunk's offset is either >= our - * offset or that the data didn't move. */ - if (updating_basis_file && s->sums[i].offset < offset - && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) - continue; - if (DEBUG_GTE(DELTASUM, 3)) { rprintf(FINFO, "potential match at %s i=%ld sum=%08x\n", -- 2.34.1