uint32_t max_compressed_size)
{
uint32_t uncompressed_pos, compressed_pos, byte_left;
- uint32_t max_offset, best_offset;
- int32_t offset;
+ uint16_t max_offset, best_offset;
+ int16_t offset;
uint32_t max_len, len, best_len;
- const uint8_t *str1, *str2;
+ const uint8_t *str1;
uint32_t indic;
uint8_t *indic_pos;
uint32_t indic_bit, nibble_index;
str1 = &uncompressed[uncompressed_pos];
best_len = 2;
- best_offset = 0;
+ best_offset = UINT16_MAX; /* invalid */
- max_offset = MIN(0x1FFF, max_offset);
+ max_offset = MIN(0x2000, max_offset);
+ if (bytes_left < 3) {
+ max_offset = 0;
+ }
/* search for the longest match in the window for the lookahead buffer */
for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
- str2 = &str1[-offset];
+ const uint8_t *str2 = &str1[-offset];
+ int cmp = 0;
+ uint32_t tmp_max;
- /* maximum len we can encode into metadata */
- max_len = MIN((255 + 15 + 7 + 3), byte_left);
+ /*
+ * We compare at least 3 bytes (for the first
+ * match) or one more than the current best match.
+ */
+ cmp = memcmp(str2, str1, best_len + 1);
+ if (cmp != 0) {
+ /*
+ * We didn't find a better match
+ */
+ continue;
+ }
- for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
+ /*
+ * Remember we found at least one match
+ */
+ found = true;
+ best_offset = offset;
+ best_len += 1;
/*
- * We check if len is better than the value found before, including the
- * sequence of identical bytes
+ * Now we use some kind of binary search to find the
+ * length of the match.
*/
- if (len > best_len) {
- found = true;
- best_len = len;
- best_offset = offset;
+ tmp_max = bytes_left;
+ while (best_len < tmp_max) {
+ const uint32_t cmp_len =
+ MAX((tmp_max - best_len) / 2, 1);
+
+ cmp = memcmp(&str2[best_len],
+ &str1[best_len],
+ cmp_len);
+ if (cmp == 0) {
+ /*
+ * We still match,
+ * remember it.
+ */
+ best_len += cmp_len;
+ } else {
+ /*
+ * We no longer match
+ * reduce the max boundary.
+ */
+ uint32_t new_max = best_len + cmp_len;
+ if (new_max == tmp_max) {
+ /*
+ * make sure we always reduce
+ * at least by one
+ */
+ new_max -= 1;
+ }
+ tmp_max = new_max;
+ }
+ }
+
+ if (best_len < bytes_left) {
+ /*
+ * We check if len is better than the value
+ * found before, including the sequence of
+ * identical bytes
+ */
+ continue;
}
+
+ break;
}
if (found) {
- metadata_size = 0;
- dest = (uint16_t *)&compressed[compressed_pos];
-
- if (best_len < 10) {
- /* Classical meta-data */
- metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
- SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
- metadata_size += sizeof(uint16_t);
- } else {
- metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
- SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
- metadata_size = sizeof(uint16_t);
-
- if (best_len < (15 + 7 + 3)) {
- /* Shared byte */
- if (!nibble_index) {
- compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
- metadata_size += sizeof(uint8_t);
- } else {
- compressed[nibble_index] &= 0xF;
- compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
- }
- } else if (best_len < (3 + 7 + 15 + 255)) {
- /* Shared byte */
- if (!nibble_index) {
- compressed[compressed_pos + metadata_size] = 15;
- metadata_size += sizeof(uint8_t);
- } else {
- compressed[nibble_index] &= 0xF;
- compressed[nibble_index] |= (15 * 16);
- }
+ uint32_t len = best_len;
+ uint16_t ofs = best_offset;
- /* Additional best_len */
- compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
- metadata_size += sizeof(uint8_t);
- } else {
- /* Shared byte */
- if (!nibble_index) {
- compressed[compressed_pos + metadata_size] |= 15;
- metadata_size += sizeof(uint8_t);
- } else {
- compressed[nibble_index] |= 15 << 4;
- }
+ len -= 3;
+ ofs -= 1;
+ ofs <<= 3;
+ ofs |= MIN(len, 7);
- /* Additional best_len */
- compressed[compressed_pos + metadata_size] = 255;
+ SSVAL(compressed, compressed_pos, ofs);
+ compressed_pos += sizeof(uint16_t);
- metadata_size += sizeof(uint8_t);
+ if (len >= 7) {
+ len -= 7;
- compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
- compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
- metadata_size += sizeof(uint16_t);
+ /* Shared byte */
+ if (nibble_index == 0) {
+ compressed[compressed_pos] = MIN(len, 15);
+ nibble_index = compressed_pos;
+ compressed_pos += sizeof(uint8_t);
+ } else {
+ compressed[nibble_index] |= MIN(len, 15) << 4;
+ nibble_index = 0;
}
}
- indic |= 1 << (32 - ((indic_bit % 32) + 1));
+ if (len >= 15) {
+ len -= 15;
+ compressed[compressed_pos] = MIN(len, 255);
+ compressed_pos += sizeof(uint8_t);
+ }
- if (best_len > 9) {
- if (nibble_index == 0) {
- nibble_index = compressed_pos + sizeof(uint16_t);
- } else {
- nibble_index = 0;
+ if (len >= 255) {
+ len += 15 + 7;
+
+ if (len > UINT16_MAX) {
+ /*
+ * 2 zero bytes indicate that a 3 bytes length
+ * follows
+ */
+ SSVAL(compressed, compressed_pos, 0);
+ compressed_pos += sizeof(uint16_t);
+ }
+
+ SSVAL(compressed, compressed_pos, len);
+ compressed_pos += sizeof(uint16_t);
+
+ len >>= 16;
+ if (len > 0) {
+ /*
+ * add the 3rd byte
+ */
+ compressed[compressed_pos] = (uint8_t)len;
+ compressed_pos += sizeof(uint8_t);
}
}
- compressed_pos += metadata_size;
+ indic |= 1 << (32 - ((indic_bit % 32) + 1));
+
uncompressed_pos += best_len;
byte_left -= best_len;
} else {
if (length == 255) {
length = PULL_LE_UINT16(input, input_index);
input_index += sizeof(uint16_t);
+ if (length == 0) {
+ length = PULL_LE_UINT32(input, input_index);
+ input_index += sizeof(uint32_t);
+ }
+ if (length < (15 + 7)) {
+ errno = EINVAL;
+ return -1;
+ }
length -= (15 + 7);
}
length += 15;