1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static int pytdbpack_calc_reqd_len(char *format_str,
33 static PyObject *pytdbpack_unpack_item(char,
37 pytdbpack_calc_item_len(char format_ch,
40 static PyObject *pytdbpack_pack_data(const char *format_str,
46 static const char * pytdbpack_docstring =
47 "Convert between Python values and Samba binary encodings.
49 This module is conceptually similar to the standard 'struct' module, but it
50 uses both a different binary format and a different description string.
52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
53 little-endian, unpadded, non-self-describing binary format. It is intended
54 that these functions be as similar as possible to the routines in Samba's
55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
57 Python strings are used to specify the format of data to be packed or
60 Strings in TDBs are typically stored in DOS codepages. The caller of this
61 module must make appropriate translations if necessary, typically to and from
64 tdbpack format strings:
66 'f': NULL-terminated string in DOS codepage
70 'd': 4 byte little-endian number
72 'w': 2 byte little-endian number
74 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
75 really just an \"exists\" or \"does not exist\" flag. The boolean
76 value of the Python object is used.
78 'B': 4-byte LE length, followed by that many bytes of binary data.
79 Corresponds to a Python byte string of the appropriate length.
81 '$': Special flag indicating that the preceding format code should be
82 repeated while data remains. This is only supported for unpacking.
84 Every code corresponds to a single Python object, except 'B' which
85 corresponds to two values (length and contents), and '$', which produces
86 however many make sense.
90 static char const pytdbpack_pack_doc[] =
91 "pack(format, values) -> buffer
92 Pack Python objects into Samba binary format according to format string.
95 format -- string of tdbpack format characters
96 values -- sequence of value objects corresponding 1:1 to format characters
99 buffer -- string containing packed data
102 IndexError -- if there are too few values for the format
103 ValueError -- if any of the format characters is illegal
104 TypeError -- if the format is not a string, or values is not a sequence,
105 or any of the values is of the wrong type for the corresponding
109 For historical reasons, it is not an error to pass more values than are consumed
114 static char const pytdbpack_unpack_doc[] =
115 "unpack(format, buffer) -> (values, rest)
116 Unpack Samba binary data according to format string.
119 format -- string of tdbpack characters
120 buffer -- string of packed binary data
124 values -- sequence of values corresponding 1:1 to format characters
125 rest -- string containing data that was not decoded, or '' if the
126 whole string was consumed
129 IndexError -- if there is insufficient data in the buffer for the
130 format (or if the data is corrupt and contains a variable-length
131 field extending past the end)
132 ValueError -- if any of the format characters is illegal
135 Because unconsumed data is returned, you can feed it back in to the
136 unpacker to extract further fields. Alternatively, if you wish to modify
137 some fields near the start of the data, you may be able to save time by
138 only unpacking and repacking the necessary part.
144 Game plan is to first of all walk through the arguments and calculate the
145 total length that will be required. We allocate a Python string of that
146 size, then walk through again and fill it in.
148 We just borrow references to all the passed arguments, since none of them
149 need to be permanently stored. We transfer ownership to the returned
153 pytdbpack_pack(PyObject *self,
157 PyObject *val_seq, *fast_seq, *buf_str;
161 /* TODO: Test passing wrong types or too many arguments */
162 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
165 /* Convert into a list or tuple (if not already one), so that we can
166 * index more easily. */
167 fast_seq = PySequence_Fast(val_seq,
168 __FUNCTION__ ": argument 2 must be sequence");
172 reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
173 if (reqd_len == -1) /* exception was thrown */
178 This design causes an unnecessary copying of the data when Python
179 constructs an object, and that might possibly be avoided by using a
180 Buffer object of some kind instead. I'm not doing that for now
182 packed_buf = malloc(reqd_len);
184 PyErr_Format(PyExc_MemoryError,
185 "%s: couldn't allocate %d bytes for packed buffer",
186 __FUNCTION__, reqd_len);
190 if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
195 buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
196 free(packed_buf); /* get rid of tmp buf */
204 pytdbpack_unpack(PyObject *self,
207 char *format_str, *packed_str, *ppacked;
208 PyObject *val_list = NULL, *ret_tuple = NULL;
209 PyObject *rest_string = NULL;
210 int format_len, packed_len;
212 char last_format = '#';
215 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
218 format_len = strlen(format_str);
220 /* allocate list to hold results */
221 val_list = PyList_New(format_len);
224 ret_tuple = PyTuple_New(2);
228 /* For every object, unpack. */
229 for (ppacked = packed_str, i = 0; i < format_len; i++) {
233 format = format_str[i];
236 PyErr_Format(PyExc_ValueError,
237 "%s: '$' may not be first character in format",
242 format = last_format; /* repeat */
246 val_obj = pytdbpack_unpack_item(format,
252 PyList_SET_ITEM(val_list, i, val_obj);
253 last_format = format;
256 /* put leftovers in box for lunch tomorrow */
257 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
261 /* return (values, rest) tuple; give up references to them */
262 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
264 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
269 /* handle failure: deallocate anything */
270 Py_XDECREF(val_list);
271 Py_XDECREF(ret_tuple);
272 Py_XDECREF(rest_string);
278 Internal routine that calculates how many bytes will be required to
279 encode the values in the format.
281 Also checks that the value list is the right size for the format list.
283 Returns number of bytes (may be 0), or -1 if there's something wrong, in
284 which case a Python exception has been raised.
288 val_seq: a Fast Sequence (list or tuple), being all the values
291 pytdbpack_calc_reqd_len(char *format_str,
299 val_len = PySequence_Length(val_seq);
303 for (p = format_str, val_i = 0; *p; p++, val_i++) {
308 if (val_i >= val_len) {
309 PyErr_Format(PyExc_IndexError,
310 "samba.tdbpack.pack: value list is too short for format string");
314 /* borrow a reference to the item */
315 val_obj = PySequence_GetItem(val_seq, val_i);
319 item_len = pytdbpack_calc_item_len(ch, val_obj);
326 if (val_i != val_len) {
327 PyErr_Format(PyExc_IndexError,
328 "%s: value list is wrong length for format string",
337 static PyObject *pytdbpack_bad_type(char ch,
338 const char *expected,
341 PyObject *r = PyObject_Repr(val_obj);
344 PyErr_Format(PyExc_TypeError,
345 "tdbpack: format '%c' requires %s, not %s",
346 ch, expected, PyString_AS_STRING(r));
353 * Calculate the number of bytes required to pack a single value. While doing
354 * this, also conduct some initial checks that the argument types are
357 * Returns -1 on exception.
360 pytdbpack_calc_item_len(char ch,
363 if (ch == 'd' || ch == 'w') {
364 if (!PyInt_Check(val_obj)) {
365 pytdbpack_bad_type(ch, "Int", val_obj);
372 } else if (ch == 'p') {
375 else if (ch == 'f' || ch == 'P' || ch == 'B') {
376 /* nul-terminated 8-bit string */
377 if (!PyString_Check(val_obj)) {
378 pytdbpack_bad_type(ch, "String", val_obj);
383 /* byte buffer; just use Python string's length, plus
385 return 4 + PyString_GET_SIZE(val_obj);
388 /* one nul character */
389 return 1 + PyString_GET_SIZE(val_obj);
393 PyErr_Format(PyExc_ValueError,
394 "tdbpack: format character '%c' is not supported",
403 XXX: glib and Samba have quicker macro for doing the endianness conversions,
404 but I don't know of one in plain libc, and it's probably not a big deal. I
405 realize this is kind of dumb because we'll almost always be on x86, but
406 being safe is important.
408 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
410 (*pbuf)[0] = val_long & 0xff;
411 (*pbuf)[1] = (val_long >> 8) & 0xff;
412 (*pbuf)[2] = (val_long >> 16) & 0xff;
413 (*pbuf)[3] = (val_long >> 24) & 0xff;
418 static void pack_bytes(long len, const char *from,
419 unsigned char **pbuf)
421 memcpy(*pbuf, from, len);
427 unpack_err_too_short(void)
429 PyErr_Format(PyExc_IndexError,
430 __FUNCTION__ ": data too short for unpack format");
435 unpack_int32(char **pbuf, int *plen)
441 unpack_err_too_short();
446 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
451 return PyInt_FromLong(v);
455 static PyObject *unpack_int16(char **pbuf, int *plen)
461 unpack_err_too_short();
471 return PyInt_FromLong(v);
476 unpack_string(char **pbuf, int *plen)
479 char *nul_ptr, *start;
483 nul_ptr = memchr(start, '\0', *plen);
485 unpack_err_too_short();
489 len = nul_ptr - start;
491 *pbuf += len + 1; /* skip \0 */
494 return PyString_FromStringAndSize(start, len);
499 unpack_buffer(char **pbuf, int *plen)
501 /* first get 32-bit len */
504 unsigned char *start;
507 unpack_err_too_short();
512 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
514 if (slen < 0) { /* surely you jest */
515 PyErr_Format(PyExc_ValueError,
516 __FUNCTION__ ": buffer seems to have negative length");
525 PyErr_Format(PyExc_IndexError,
526 __FUNCTION__ ": not enough data to unpack buffer: "
527 "need %d bytes, have %d",
535 return PyString_FromStringAndSize(start, slen);
539 /* Unpack a single field from packed data, according to format character CH.
540 Remaining data is at *PBUF, of *PLEN.
542 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
545 Returns a reference to the unpacked Python object, or NULL for failure.
547 static PyObject *pytdbpack_unpack_item(char ch,
551 if (ch == 'w') { /* 16-bit int */
552 return unpack_int16(pbuf, plen);
554 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
555 /* pointers can just come through as integers */
556 return unpack_int32(pbuf, plen);
558 else if (ch == 'f' || ch == 'P') { /* nul-term string */
559 return unpack_string(pbuf, plen);
561 else if (ch == 'B') { /* length, buffer */
562 return unpack_buffer(pbuf, plen);
565 PyErr_Format(PyExc_ValueError,
566 __FUNCTION__ ": format character '%c' is not supported",
576 Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
577 and advance the pointer. Buffer length has been pre-calculated so we are
578 sure that there is enough space.
582 pytdbpack_pack_item(char ch,
584 unsigned char **pbuf)
587 unsigned long val_long = PyInt_AsLong(val_obj);
588 (*pbuf)[0] = val_long & 0xff;
589 (*pbuf)[1] = (val_long >> 8) & 0xff;
592 else if (ch == 'd') {
593 /* 4-byte LE number */
594 pack_int32(PyInt_AsLong(val_obj), pbuf);
596 else if (ch == 'p') {
597 /* "Pointer" value -- in the subset of DCERPC used by Samba,
598 this is really just an "exists" or "does not exist"
600 pack_int32(PyObject_IsTrue(val_obj), pbuf);
602 else if (ch == 'f' || ch == 'P') {
606 size = PyString_GET_SIZE(val_obj);
607 sval = PyString_AS_STRING(val_obj);
608 pack_bytes(size+1, sval, pbuf); /* include nul */
610 else if (ch == 'B') {
614 size = PyString_GET_SIZE(val_obj);
615 pack_int32(size, pbuf);
616 sval = PyString_AS_STRING(val_obj);
617 pack_bytes(size, sval, pbuf); /* do not include nul */
620 /* this ought to be caught while calculating the length, but
622 PyErr_Format(PyExc_ValueError,
623 "%s: format character '%c' is not supported",
634 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
637 The string has already been checked out, so we know that VAL_SEQ is large
638 enough to hold the packed data, and that there are enough value items.
639 (However, their types may not have been thoroughly checked yet.)
641 In addition, val_seq is a Python Fast sequence.
643 Returns NULL for error (with exception set), or None.
646 pytdbpack_pack_data(const char *format_str,
648 unsigned char *packed_buf)
652 for (i = 0; format_str[i]; i++) {
653 char ch = format_str[i];
656 /* borrow a reference to the item */
657 val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
661 if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
672 static PyMethodDef pytdbpack_methods[] = {
673 { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
674 { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
680 Py_InitModule3("tdbpack", pytdbpack_methods,
681 (char *) pytdbpack_docstring);