From: Stefan Metzmacher Date: Fri, 5 Jul 2019 15:31:48 +0000 (+0200) Subject: lib/compression/lzxpress.c... X-Git-Url: http://git.samba.org/?p=metze%2Fsamba%2Fwip.git;a=commitdiff_plain;h=91f5b672d019e15c6423444ed493194a44a48efa lib/compression/lzxpress.c... --- diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c index 024aba4c2ce8..3fe9f04de1bb 100644 --- a/lib/compression/lzxpress.c +++ b/lib/compression/lzxpress.c @@ -63,10 +63,10 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed, uint32_t max_compressed_size) { uint32_t uncompressed_pos, compressed_pos, byte_left; - uint32_t max_offset, best_offset; - int32_t offset; + uint16_t max_offset, best_offset; + int16_t offset; uint32_t max_len, len, best_len; - const uint8_t *str1, *str2; + const uint8_t *str1; uint32_t indic; uint8_t *indic_pos; uint32_t indic_bit, nibble_index; @@ -100,97 +100,144 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed, str1 = &uncompressed[uncompressed_pos]; best_len = 2; - best_offset = 0; + best_offset = UINT16_MAX; /* invalid */ - max_offset = MIN(0x1FFF, max_offset); + max_offset = MIN(0x2000, max_offset); + if (bytes_left < 3) { + max_offset = 0; + } /* search for the longest match in the window for the lookahead buffer */ for (offset = 1; (uint32_t)offset <= max_offset; offset++) { - str2 = &str1[-offset]; + const uint8_t *str2 = &str1[-offset]; + int cmp = 0; + uint32_t tmp_max; - /* maximum len we can encode into metadata */ - max_len = MIN((255 + 15 + 7 + 3), byte_left); + /* + * We compare at least 3 bytes (for the first + * match) or one more than the current best match. + */ + cmp = memcmp(str2, str1, best_len + 1); + if (cmp != 0) { + /* + * We didn't find a better match + */ + continue; + } - for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++); + /* + * Remember we found at least one match + */ + found = true; + best_offset = offset; + best_len += 1; /* - * We check if len is better than the value found before, including the - * sequence of identical bytes + * Now we use some kind of binary search to find the + * length of the match. */ - if (len > best_len) { - found = true; - best_len = len; - best_offset = offset; + tmp_max = bytes_left; + while (best_len < tmp_max) { + const uint32_t cmp_len = + MAX((tmp_max - best_len) / 2, 1); + + cmp = memcmp(&str2[best_len], + &str1[best_len], + cmp_len); + if (cmp == 0) { + /* + * We still match, + * remember it. + */ + best_len += cmp_len; + } else { + /* + * We no longer match + * reduce the max boundary. + */ + uint32_t new_max = best_len + cmp_len; + if (new_max == tmp_max) { + /* + * make sure we always reduce + * at least by one + */ + new_max -= 1; + } + tmp_max = new_max; + } + } + + if (best_len < bytes_left) { + /* + * We check if len is better than the value + * found before, including the sequence of + * identical bytes + */ + continue; } + + break; } if (found) { - metadata_size = 0; - dest = (uint16_t *)&compressed[compressed_pos]; - - if (best_len < 10) { - /* Classical meta-data */ - metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3)); - SSVAL(dest, metadata_size / sizeof(uint16_t), metadata); - metadata_size += sizeof(uint16_t); - } else { - metadata = (uint16_t)(((best_offset - 1) << 3) | 7); - SSVAL(dest, metadata_size / sizeof(uint16_t), metadata); - metadata_size = sizeof(uint16_t); - - if (best_len < (15 + 7 + 3)) { - /* Shared byte */ - if (!nibble_index) { - compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF; - metadata_size += sizeof(uint8_t); - } else { - compressed[nibble_index] &= 0xF; - compressed[nibble_index] |= (best_len - (3 + 7)) * 16; - } - } else if (best_len < (3 + 7 + 15 + 255)) { - /* Shared byte */ - if (!nibble_index) { - compressed[compressed_pos + metadata_size] = 15; - metadata_size += sizeof(uint8_t); - } else { - compressed[nibble_index] &= 0xF; - compressed[nibble_index] |= (15 * 16); - } + uint32_t len = best_len; + uint16_t ofs = best_offset; - /* Additional best_len */ - compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF; - metadata_size += sizeof(uint8_t); - } else { - /* Shared byte */ - if (!nibble_index) { - compressed[compressed_pos + metadata_size] |= 15; - metadata_size += sizeof(uint8_t); - } else { - compressed[nibble_index] |= 15 << 4; - } + len -= 3; + ofs -= 1; + ofs <<= 3; + ofs |= MIN(len, 7); - /* Additional best_len */ - compressed[compressed_pos + metadata_size] = 255; + SSVAL(compressed, compressed_pos, ofs); + compressed_pos += sizeof(uint16_t); - metadata_size += sizeof(uint8_t); + if (len >= 7) { + len -= 7; - compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF; - compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF; - metadata_size += sizeof(uint16_t); + /* Shared byte */ + if (nibble_index == 0) { + compressed[compressed_pos] = MIN(len, 15); + nibble_index = compressed_pos; + compressed_pos += sizeof(uint8_t); + } else { + compressed[nibble_index] |= MIN(len, 15) << 4; + nibble_index = 0; } } - indic |= 1 << (32 - ((indic_bit % 32) + 1)); + if (len >= 15) { + len -= 15; + compressed[compressed_pos] = MIN(len, 255); + compressed_pos += sizeof(uint8_t); + } - if (best_len > 9) { - if (nibble_index == 0) { - nibble_index = compressed_pos + sizeof(uint16_t); - } else { - nibble_index = 0; + if (len >= 255) { + len += 15 + 7; + + if (len > UINT16_MAX) { + /* + * 2 zero bytes indicate that a 3 bytes length + * follows + */ + SSVAL(compressed, compressed_pos, 0); + compressed_pos += sizeof(uint16_t); + } + + SSVAL(compressed, compressed_pos, len); + compressed_pos += sizeof(uint16_t); + + len >>= 16; + if (len > 0) { + /* + * add the 3rd byte + */ + compressed[compressed_pos] = (uint8_t)len; + compressed_pos += sizeof(uint8_t); } } - compressed_pos += metadata_size; + indic |= 1 << (32 - ((indic_bit % 32) + 1)); + uncompressed_pos += best_len; byte_left -= best_len; } else { @@ -291,6 +338,14 @@ ssize_t lzxpress_decompress(const uint8_t *input, if (length == 255) { length = PULL_LE_UINT16(input, input_index); input_index += sizeof(uint16_t); + if (length == 0) { + length = PULL_LE_UINT32(input, input_index); + input_index += sizeof(uint32_t); + } + if (length < (15 + 7)) { + errno = EINVAL; + return -1; + } length -= (15 + 7); } length += 15;