1 /* gzappend -- command to append to a gzip file
3 Copyright (C) 2003, 2012 Mark Adler, all rights reserved
4 version 1.2, 13 Aug 2012
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the author be held liable for any damages
8 arising from the use of this software.
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
22 Mark Adler madler@alumni.caltech.edu
28 * 1.0 19 Oct 2003 - First version
29 * 1.1 4 Nov 2003 - Expand and clarify some comments and notes
30 * - Add version and copyright to help
31 * - Send help to stdout instead of stderr
32 * - Add some preemptive typecasts
33 * - Add L to constants in lseek() calls
34 * - Remove some debugging information in error messages
35 * - Use new data_type definition for zlib 1.2.1
36 * - Simplfy and unify file operations
37 * - Finish off gzip file in gztack()
38 * - Use deflatePrime() instead of adding empty blocks
39 * - Keep gzip file clean on appended file read errors
40 * - Use in-place rotate instead of auxiliary buffer
41 * (Why you ask? Because it was fun to write!)
42 * 1.2 13 Aug 2012 - Fix for proper z_const usage
46 gzappend takes a gzip file and appends to it, compressing files from the
47 command line or data from stdin. The gzip file is written to directly, to
48 avoid copying that file, in case it's large. Note that this results in the
49 unfriendly behavior that if gzappend fails, the gzip file is corrupted.
51 This program was written to illustrate the use of the new Z_BLOCK option of
52 zlib 1.2.x's inflate() function. This option returns from inflate() at each
53 block boundary to facilitate locating and modifying the last block bit at
54 the start of the final deflate block. Also whether using Z_BLOCK or not,
55 another required feature of zlib 1.2.x is that inflate() now provides the
56 number of unusued bits in the last input byte used. gzappend will not work
57 with versions of zlib earlier than 1.2.1.
59 gzappend first decompresses the gzip file internally, discarding all but
60 the last 32K of uncompressed data, and noting the location of the last block
61 bit and the number of unused bits in the last byte of the compressed data.
62 The gzip trailer containing the CRC-32 and length of the uncompressed data
63 is verified. This trailer will be later overwritten.
65 Then the last block bit is cleared by seeking back in the file and rewriting
66 the byte that contains it. Seeking forward, the last byte of the compressed
67 data is saved along with the number of unused bits to initialize deflate.
69 A deflate process is initialized, using the last 32K of the uncompressed
70 data from the gzip file to initialize the dictionary. If the total
71 uncompressed data was less than 32K, then all of it is used to initialize
72 the dictionary. The deflate output bit buffer is also initialized with the
73 last bits from the original deflate stream. From here on, the data to
74 append is simply compressed using deflate, and written to the gzip file.
75 When that is complete, the new CRC-32 and uncompressed length are written
76 as the trailer of the gzip file.
88 #define CHUNK (1U << LGCHUNK)
91 /* print an error message and terminate with extreme prejudice */
92 local void bye(char *msg1, char *msg2)
94 fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
98 /* return the greatest common divisor of a and b using Euclid's algorithm,
99 modified to be fast when one argument much greater than the other, and
100 coded to avoid unnecessary swapping */
101 local unsigned gcd(unsigned a, unsigned b)
121 /* rotate list[0..len-1] left by rot positions, in place */
122 local void rotate(unsigned char *list, unsigned len, unsigned rot)
126 unsigned char *start, *last, *to, *from;
128 /* normalize rot and handle degenerate cases */
130 if (rot >= len) rot %= len;
131 if (rot == 0) return;
133 /* pointer to last entry in list */
134 last = list + (len - 1);
136 /* do simple left shift by one */
139 memcpy(list, list + 1, len - 1);
144 /* do simple right shift by one */
145 if (rot == len - 1) {
147 memmove(list + 1, list, len - 1);
152 /* otherwise do rotate as a set of cycles in place */
153 cycles = gcd(len, rot); /* number of cycles */
155 start = from = list + cycles; /* start index is arbitrary */
156 tmp = *from; /* save entry to be overwritten */
158 to = from; /* next step in cycle */
159 from += rot; /* go right rot positions */
160 if (from > last) from -= len; /* (pointer better not wrap) */
161 if (from == start) break; /* all but one shifted */
162 *to = *from; /* shift left */
164 *to = tmp; /* complete the circle */
168 /* structure for gzip file read operations */
170 int fd; /* file descriptor */
171 int size; /* 1 << size is bytes in buf */
172 unsigned left; /* bytes available at next */
173 unsigned char *buf; /* buffer */
174 z_const unsigned char *next; /* next byte in buffer */
175 char *name; /* file name for error messages */
179 local int readin(file *in)
183 len = read(in->fd, in->buf, 1 << in->size);
184 if (len == -1) bye("error reading ", in->name);
185 in->left = (unsigned)len;
190 /* read from file in, exit if end-of-file */
191 local int readmore(file *in)
193 if (readin(in) == 0) bye("unexpected end of ", in->name);
197 #define read1(in) (in->left == 0 ? readmore(in) : 0, \
198 in->left--, *(in->next)++)
200 /* skip over n bytes of in */
201 local void skip(file *in, unsigned n)
207 bypass = n & ~((1U << in->size) - 1);
209 if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
210 bye("seeking ", in->name);
215 bye("unexpected end of ", in->name);
221 /* read a four-byte unsigned integer, little-endian, from in */
222 unsigned long read4(file *in)
227 val += (unsigned)read1(in) << 8;
228 val += (unsigned long)read1(in) << 16;
229 val += (unsigned long)read1(in) << 24;
233 /* skip over gzip header */
234 local void gzheader(file *in)
239 if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
240 if (read1(in) != 8) bye("unknown compression method in", in->name);
242 if (flags & 0xe0) bye("unknown header flags set in", in->name);
246 n += (unsigned)(read1(in)) << 8;
249 if (flags & 8) while (read1(in) != 0) ;
250 if (flags & 16) while (read1(in) != 0) ;
251 if (flags & 2) skip(in, 2);
254 /* decompress gzip file "name", return strm with a deflate stream ready to
255 continue compression of the data in the gzip file, and return a file
256 descriptor pointing to where to write the compressed data -- the deflate
257 stream is initialized to compress using level "level" */
258 local int gzscan(char *name, z_stream *strm, int level)
260 int ret, lastbit, left, full;
262 unsigned long crc, tot;
263 unsigned char *window;
269 gz.fd = open(name, O_RDWR, 0);
270 if (gz.fd == -1) bye("cannot open ", name);
271 gz.buf = malloc(CHUNK);
272 if (gz.buf == NULL) bye("out of memory", "");
276 /* skip gzip header */
279 /* prepare to decompress */
280 window = malloc(DSIZE);
281 if (window == NULL) bye("out of memory", "");
282 strm->zalloc = Z_NULL;
283 strm->zfree = Z_NULL;
284 strm->opaque = Z_NULL;
285 ret = inflateInit2(strm, -15);
286 if (ret != Z_OK) bye("out of memory", " or library mismatch");
288 /* decompress the deflate stream, saving append information */
290 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
292 strm->avail_in = gz.left;
293 strm->next_in = gz.next;
294 crc = crc32(0L, Z_NULL, 0);
297 /* if needed, get more input */
298 if (strm->avail_in == 0) {
300 strm->avail_in = gz.left;
301 strm->next_in = gz.next;
304 /* set up output to next available section of sliding window */
305 strm->avail_out = DSIZE - have;
306 strm->next_out = window + have;
308 /* inflate and check for errors */
309 ret = inflate(strm, Z_BLOCK);
310 if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
311 if (ret == Z_MEM_ERROR) bye("out of memory", "");
312 if (ret == Z_DATA_ERROR)
313 bye("invalid compressed data--format violated in", name);
315 /* update crc and sliding window pointer */
316 crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
318 have = DSIZE - strm->avail_out;
324 /* process end of block */
325 if (strm->data_type & 128) {
326 if (strm->data_type & 64)
327 left = strm->data_type & 0x1f;
329 lastbit = strm->data_type & 0x1f;
330 lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
333 } while (ret != Z_STREAM_END);
335 gz.left = strm->avail_in;
336 gz.next = strm->next_in;
338 /* save the location of the end of the compressed data */
339 end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
341 /* check gzip trailer and save total for deflate */
342 if (crc != read4(&gz))
343 bye("invalid compressed data--crc mismatch in ", name);
344 tot = strm->total_out;
345 if ((tot & 0xffffffffUL) != read4(&gz))
346 bye("invalid compressed data--length mismatch in", name);
348 /* if not at end of file, warn */
349 if (gz.left || readin(&gz))
351 "gzappend warning: junk at end of gzip file overwritten\n");
353 /* clear last block bit */
354 lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
355 if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
356 *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
357 lseek(gz.fd, -1L, SEEK_CUR);
358 if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
360 /* if window wrapped, build dictionary from window by rotating */
362 rotate(window, DSIZE, have);
366 /* set up deflate stream with window, crc, total_in, and leftover bits */
367 ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
368 if (ret != Z_OK) bye("out of memory", "");
369 deflateSetDictionary(strm, window, have);
371 strm->total_in = tot;
373 lseek(gz.fd, --end, SEEK_SET);
374 if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
375 deflatePrime(strm, 8 - left, *gz.buf);
377 lseek(gz.fd, end, SEEK_SET);
379 /* clean up and return */
385 /* append file "name" to gzip file gd using deflate stream strm -- if last
386 is true, then finish off the deflate stream at the end */
387 local void gztack(char *name, int gd, z_stream *strm, int last)
391 unsigned char *in, *out;
393 /* open file to compress and append */
396 fd = open(name, O_RDONLY, 0);
398 fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
402 /* allocate buffers */
403 in = fd == -1 ? NULL : malloc(CHUNK);
405 if (out == NULL) bye("out of memory", "");
407 /* compress input file and append to gzip file */
410 len = fd == -1 ? 0 : read(fd, in, CHUNK);
413 "gzappend warning: error reading %s, skipping rest ...\n",
417 strm->avail_in = (unsigned)len;
419 if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
421 /* compress and write all available output */
423 strm->avail_out = CHUNK;
424 strm->next_out = out;
425 ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
426 left = CHUNK - strm->avail_out;
428 len = write(gd, out + CHUNK - strm->avail_out - left, left);
429 if (len == -1) bye("writing gzip file", "");
430 left -= (unsigned)len;
432 } while (strm->avail_out == 0 && ret != Z_STREAM_END);
435 /* write trailer after last entry */
438 out[0] = (unsigned char)(strm->adler);
439 out[1] = (unsigned char)(strm->adler >> 8);
440 out[2] = (unsigned char)(strm->adler >> 16);
441 out[3] = (unsigned char)(strm->adler >> 24);
442 out[4] = (unsigned char)(strm->total_in);
443 out[5] = (unsigned char)(strm->total_in >> 8);
444 out[6] = (unsigned char)(strm->total_in >> 16);
445 out[7] = (unsigned char)(strm->total_in >> 24);
448 ret = write(gd, out + 8 - len, len);
449 if (ret == -1) bye("writing gzip file", "");
455 /* clean up and return */
457 if (in != NULL) free(in);
458 if (fd > 0) close(fd);
461 /* process the compression level option if present, scan the gzip file, and
462 append the specified files, or append the data from stdin if no other file
463 names are provided on the command line -- the gzip file must be writable
465 int main(int argc, char **argv)
470 /* ignore command name */
473 /* provide usage if no arguments */
475 printf("gzappend 1.1 (4 Nov 2003) Copyright (C) 2003 Mark Adler\n");
477 "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
481 /* set compression level */
482 level = Z_DEFAULT_COMPRESSION;
483 if (argv[0][0] == '-') {
484 if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
485 bye("invalid compression level", "");
486 level = argv[0][1] - '0';
487 if (*++argv == NULL) bye("no gzip file name after options", "");
490 /* prepare to append to gzip file */
491 gd = gzscan(*argv++, &strm, level);
493 /* append files on command line, or from stdin if none */
495 gztack(NULL, gd, &strm, 1);
498 gztack(*argv, gd, &strm, argv[1] == NULL);
499 } while (*++argv != NULL);