lib/compression/lzxpress.c...
authorStefan Metzmacher <metze@samba.org>
Fri, 5 Jul 2019 15:31:48 +0000 (17:31 +0200)
committerStefan Metzmacher <metze@samba.org>
Tue, 15 Oct 2019 07:36:38 +0000 (09:36 +0200)
lib/compression/lzxpress.c

index 024aba4c2ce8aa1b868a1001c36cc5091cb894a0..3fe9f04de1bb7173acdfd3159d42ca5187db8047 100644 (file)
@@ -63,10 +63,10 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed,
                          uint32_t max_compressed_size)
 {
        uint32_t uncompressed_pos, compressed_pos, byte_left;
-       uint32_t max_offset, best_offset;
-       int32_t offset;
+       uint16_t max_offset, best_offset;
+       int16_t offset;
        uint32_t max_len, len, best_len;
-       const uint8_t *str1, *str2;
+       const uint8_t *str1;
        uint32_t indic;
        uint8_t *indic_pos;
        uint32_t indic_bit, nibble_index;
@@ -100,97 +100,144 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed,
                str1 = &uncompressed[uncompressed_pos];
 
                best_len = 2;
-               best_offset = 0;
+               best_offset = UINT16_MAX; /* invalid */
 
-               max_offset = MIN(0x1FFF, max_offset);
+               max_offset = MIN(0x2000, max_offset);
+               if (bytes_left < 3) {
+                       max_offset = 0;
+               }
 
                /* search for the longest match in the window for the lookahead buffer */
                for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
-                       str2 = &str1[-offset];
+                       const uint8_t *str2 = &str1[-offset];
+                       int cmp = 0;
+                       uint32_t tmp_max;
 
-                       /* maximum len we can encode into metadata */
-                       max_len = MIN((255 + 15 + 7 + 3), byte_left);
+                       /*
+                        * We compare at least 3 bytes (for the first
+                        * match) or one more than the current best match.
+                        */
+                       cmp = memcmp(str2, str1, best_len + 1);
+                       if (cmp != 0) {
+                               /*
+                                * We didn't find a better match
+                                */
+                               continue;
+                       }
 
-                       for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
+                       /*
+                        * Remember we found at least one match
+                        */
+                       found = true;
+                       best_offset = offset;
+                       best_len += 1;
 
                        /*
-                        * We check if len is better than the value found before, including the
-                        * sequence of identical bytes
+                        * Now we use some kind of binary search to find the
+                        * length of the match.
                         */
-                       if (len > best_len) {
-                               found = true;
-                               best_len = len;
-                               best_offset = offset;
+                       tmp_max = bytes_left;
+                       while (best_len < tmp_max) {
+                               const uint32_t cmp_len =
+                                       MAX((tmp_max - best_len) / 2, 1);
+
+                               cmp = memcmp(&str2[best_len],
+                                            &str1[best_len],
+                                            cmp_len);
+                               if (cmp == 0) {
+                                       /*
+                                        * We still match,
+                                        * remember it.
+                                        */
+                                       best_len += cmp_len;
+                               } else {
+                                       /*
+                                        * We no longer match
+                                        * reduce the max boundary.
+                                        */
+                                       uint32_t new_max = best_len + cmp_len;
+                                       if (new_max == tmp_max) {
+                                               /*
+                                                * make sure we always reduce
+                                                * at least by one
+                                                */
+                                               new_max -= 1;
+                                       }
+                                       tmp_max = new_max;
+                               }
+                       }
+
+                       if (best_len < bytes_left) {
+                               /*
+                                * We check if len is better than the value
+                                * found before, including the sequence of
+                                * identical bytes
+                                */
+                               continue;
                        }
+
+                       break;
                }
 
                if (found) {
-                       metadata_size = 0;
-                       dest = (uint16_t *)&compressed[compressed_pos];
-
-                       if (best_len < 10) {
-                               /* Classical meta-data */
-                               metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
-                               SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
-                               metadata_size += sizeof(uint16_t);
-                       } else {
-                               metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
-                               SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
-                               metadata_size = sizeof(uint16_t);
-
-                               if (best_len < (15 + 7 + 3)) {
-                                       /* Shared byte */
-                                       if (!nibble_index) {
-                                               compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
-                                               metadata_size += sizeof(uint8_t);
-                                       } else {
-                                               compressed[nibble_index] &= 0xF;
-                                               compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
-                                       }
-                               } else if (best_len < (3 + 7 + 15 + 255)) {
-                                       /* Shared byte */
-                                       if (!nibble_index) {
-                                               compressed[compressed_pos + metadata_size] = 15;
-                                               metadata_size += sizeof(uint8_t);
-                                       } else {
-                                               compressed[nibble_index] &= 0xF;
-                                               compressed[nibble_index] |= (15 * 16);
-                                       }
+                       uint32_t len = best_len;
+                       uint16_t ofs = best_offset;
 
-                                       /* Additional best_len */
-                                       compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
-                                       metadata_size += sizeof(uint8_t);
-                               } else {
-                                       /* Shared byte */
-                                       if (!nibble_index) {
-                                               compressed[compressed_pos + metadata_size] |= 15;
-                                               metadata_size += sizeof(uint8_t);
-                                       } else {
-                                               compressed[nibble_index] |= 15 << 4;
-                                       }
+                       len -= 3;
+                       ofs -= 1;
+                       ofs <<= 3;
+                       ofs |= MIN(len, 7);
 
-                                       /* Additional best_len */
-                                       compressed[compressed_pos + metadata_size] = 255;
+                       SSVAL(compressed, compressed_pos, ofs);
+                       compressed_pos += sizeof(uint16_t);
 
-                                       metadata_size += sizeof(uint8_t);
+                       if (len >= 7) {
+                               len -= 7;
 
-                                       compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
-                                       compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
-                                       metadata_size += sizeof(uint16_t);
+                               /* Shared byte */
+                               if (nibble_index == 0) {
+                                       compressed[compressed_pos] = MIN(len, 15);
+                                       nibble_index = compressed_pos;
+                                       compressed_pos += sizeof(uint8_t);
+                               } else {
+                                       compressed[nibble_index] |= MIN(len, 15) << 4;
+                                       nibble_index = 0;
                                }
                        }
 
-                       indic |= 1 << (32 - ((indic_bit % 32) + 1));
+                       if (len >= 15) {
+                               len -= 15;
+                               compressed[compressed_pos] = MIN(len, 255);
+                               compressed_pos += sizeof(uint8_t);
+                       }
 
-                       if (best_len > 9) {
-                               if (nibble_index == 0) {
-                                       nibble_index = compressed_pos + sizeof(uint16_t);
-                               } else {
-                                       nibble_index = 0;
+                       if (len >= 255) {
+                               len += 15 + 7;
+
+                               if (len > UINT16_MAX) {
+                                       /*
+                                        * 2 zero bytes indicate that a 3 bytes length
+                                        * follows
+                                        */
+                                       SSVAL(compressed, compressed_pos, 0);
+                                       compressed_pos += sizeof(uint16_t);
+                               }
+
+                               SSVAL(compressed, compressed_pos, len);
+                               compressed_pos += sizeof(uint16_t);
+
+                               len >>= 16;
+                               if (len > 0) {
+                                       /*
+                                        * add the 3rd byte
+                                        */
+                                       compressed[compressed_pos] = (uint8_t)len;
+                                       compressed_pos += sizeof(uint8_t);
                                }
                        }
 
-                       compressed_pos += metadata_size;
+                       indic |= 1 << (32 - ((indic_bit % 32) + 1));
+
                        uncompressed_pos += best_len;
                        byte_left -= best_len;
                } else {
@@ -291,6 +338,14 @@ ssize_t lzxpress_decompress(const uint8_t *input,
                                        if (length == 255) {
                                                length = PULL_LE_UINT16(input, input_index);
                                                input_index += sizeof(uint16_t);
+                                               if (length == 0) {
+                                                       length = PULL_LE_UINT32(input, input_index);
+                                                       input_index += sizeof(uint32_t);
+                                               }
+                                               if (length < (15 + 7)) {
+                                                       errno = EINVAL;
+                                                       return -1;
+                                               }
                                                length -= (15 + 7);
                                        }
                                        length += 15;