uint32_t max_compressed_size)
{
uint32_t uncompressed_pos, compressed_pos, byte_left;
- uint16_t max_offset, best_offset;
- int16_t offset;
+ uint32_t max_offset, best_offset;
+ int32_t offset;
uint32_t max_len, len, best_len;
- const uint8_t *str1;
+ const uint8_t *str1, *str2;
uint32_t indic;
uint8_t *indic_pos;
uint32_t indic_bit, nibble_index;
str1 = &uncompressed[uncompressed_pos];
best_len = 2;
- best_offset = UINT16_MAX; /* invalid */
+ best_offset = 0;
- max_offset = MIN(0x2000, max_offset);
- if (bytes_left < 3) {
- max_offset = 0;
- }
+ max_offset = MIN(0x1FFF, max_offset);
/* search for the longest match in the window for the lookahead buffer */
for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
- const uint8_t *str2 = &str1[-offset];
- int cmp = 0;
- uint32_t tmp_max;
+ str2 = &str1[-offset];
- /*
- * We compare at least 3 bytes (for the first
- * match) or one more than the current best match.
- */
- cmp = memcmp(str2, str1, best_len + 1);
- if (cmp != 0) {
- /*
- * We didn't find a better match
- */
- continue;
- }
+ /* maximum len we can encode into metadata */
+ max_len = MIN((255 + 15 + 7 + 3), byte_left);
- /*
- * Remember we found at least one match
- */
- found = true;
- best_offset = offset;
- best_len += 1;
+ for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
/*
- * Now we use some kind of binary search to find the
- * length of the match.
+ * We check if len is better than the value found before, including the
+ * sequence of identical bytes
*/
- tmp_max = bytes_left;
- while (best_len < tmp_max) {
- const uint32_t cmp_len =
- MAX((tmp_max - best_len) / 2, 1);
-
- cmp = memcmp(&str2[best_len],
- &str1[best_len],
- cmp_len);
- if (cmp == 0) {
- /*
- * We still match,
- * remember it.
- */
- best_len += cmp_len;
- } else {
- /*
- * We no longer match
- * reduce the max boundary.
- */
- uint32_t new_max = best_len + cmp_len;
- if (new_max == tmp_max) {
- /*
- * make sure we always reduce
- * at least by one
- */
- new_max -= 1;
- }
- tmp_max = new_max;
- }
- }
-
- if (best_len < bytes_left) {
- /*
- * We check if len is better than the value
- * found before, including the sequence of
- * identical bytes
- */
- continue;
+ if (len > best_len) {
+ found = true;
+ best_len = len;
+ best_offset = offset;
}
-
- break;
}
if (found) {
- uint32_t len = best_len;
- uint16_t ofs = best_offset;
-
- len -= 3;
- ofs -= 1;
- ofs <<= 3;
- ofs |= MIN(len, 7);
-
- SSVAL(compressed, compressed_pos, ofs);
- compressed_pos += sizeof(uint16_t);
-
- if (len >= 7) {
- len -= 7;
+ metadata_size = 0;
+ dest = (uint16_t *)&compressed[compressed_pos];
+
+ if (best_len < 10) {
+ /* Classical meta-data */
+ metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
+ SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
+ metadata_size += sizeof(uint16_t);
+ } else {
+ metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
+ SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
+ metadata_size = sizeof(uint16_t);
+
+ if (best_len < (15 + 7 + 3)) {
+ /* Shared byte */
+ if (!nibble_index) {
+ compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
+ metadata_size += sizeof(uint8_t);
+ } else {
+ compressed[nibble_index] &= 0xF;
+ compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
+ }
+ } else if (best_len < (3 + 7 + 15 + 255)) {
+ /* Shared byte */
+ if (!nibble_index) {
+ compressed[compressed_pos + metadata_size] = 15;
+ metadata_size += sizeof(uint8_t);
+ } else {
+ compressed[nibble_index] &= 0xF;
+ compressed[nibble_index] |= (15 * 16);
+ }
- /* Shared byte */
- if (nibble_index == 0) {
- compressed[compressed_pos] = MIN(len, 15);
- nibble_index = compressed_pos;
- compressed_pos += sizeof(uint8_t);
+ /* Additional best_len */
+ compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
+ metadata_size += sizeof(uint8_t);
} else {
- compressed[nibble_index] |= MIN(len, 15) << 4;
- nibble_index = 0;
- }
- }
+ /* Shared byte */
+ if (!nibble_index) {
+ compressed[compressed_pos + metadata_size] |= 15;
+ metadata_size += sizeof(uint8_t);
+ } else {
+ compressed[nibble_index] |= 15 << 4;
+ }
- if (len >= 15) {
- len -= 15;
- compressed[compressed_pos] = MIN(len, 255);
- compressed_pos += sizeof(uint8_t);
- }
+ /* Additional best_len */
+ compressed[compressed_pos + metadata_size] = 255;
- if (len >= 255) {
- len += 15 + 7;
+ metadata_size += sizeof(uint8_t);
- if (len > UINT16_MAX) {
- /*
- * 2 zero bytes indicate that a 3 bytes length
- * follows
- */
- SSVAL(compressed, compressed_pos, 0);
- compressed_pos += sizeof(uint16_t);
+ compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
+ compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
+ metadata_size += sizeof(uint16_t);
}
+ }
- SSVAL(compressed, compressed_pos, len);
- compressed_pos += sizeof(uint16_t);
+ indic |= 1 << (32 - ((indic_bit % 32) + 1));
- len >>= 16;
- if (len > 0) {
- /*
- * add the 3rd byte
- */
- compressed[compressed_pos] = (uint8_t)len;
- compressed_pos += sizeof(uint8_t);
+ if (best_len > 9) {
+ if (nibble_index == 0) {
+ nibble_index = compressed_pos + sizeof(uint16_t);
+ } else {
+ nibble_index = 0;
}
}
- indic |= 1 << (32 - ((indic_bit % 32) + 1));
-
+ compressed_pos += metadata_size;
uncompressed_pos += best_len;
byte_left -= best_len;
} else {
if (length == 255) {
length = PULL_LE_UINT16(input, input_index);
input_index += sizeof(uint16_t);
- if (length == 0) {
- length = PULL_LE_UINT32(input, input_index);
- input_index += sizeof(uint32_t);
- }
- if (length < (15 + 7)) {
- errno = EINVAL;
- return -1;
- }
length -= (15 + 7);
}
length += 15;