1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static int pytdbpack_calc_reqd_len(char *format_str,
33 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
35 static PyObject *pytdbpack_pack_data(const char *format_str,
42 static PyObject *pytdbpack_bad_type(char ch,
46 static const char * pytdbpack_docstring =
47 "Convert between Python values and Samba binary encodings.
49 This module is conceptually similar to the standard 'struct' module, but it
50 uses both a different binary format and a different description string.
52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
53 little-endian, unpadded, non-self-describing binary format. It is intended
54 that these functions be as similar as possible to the routines in Samba's
55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
57 Python strings are used to specify the format of data to be packed or
60 Strings in TDBs are typically stored in DOS codepages. The caller of this
61 module must make appropriate translations if necessary, typically to and from
64 tdbpack format strings:
66 'f': NULL-terminated string in DOS codepage
70 'd': 4 byte little-endian unsigned number
72 'w': 2 byte little-endian unsigned number
74 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
75 really just an \"exists\" or \"does not exist\" flag. The boolean
76 value of the Python object is used.
78 'B': 4-byte LE length, followed by that many bytes of binary data.
79 Corresponds to a Python integer giving the length, followed by a byte
80 string of the appropriate length.
82 '$': Special flag indicating that the preceding format code should be
83 repeated while data remains. This is only supported for unpacking.
85 Every code corresponds to a single Python object, except 'B' which
86 corresponds to two values (length and contents), and '$', which produces
87 however many make sense.
91 static char const pytdbpack_pack_doc[] =
92 "pack(format, values) -> buffer
93 Pack Python objects into Samba binary format according to format string.
96 format -- string of tdbpack format characters
97 values -- sequence of value objects corresponding 1:1 to format characters
100 buffer -- string containing packed data
103 IndexError -- if there are too few values for the format
104 ValueError -- if any of the format characters is illegal
105 TypeError -- if the format is not a string, or values is not a sequence,
106 or any of the values is of the wrong type for the corresponding
110 For historical reasons, it is not an error to pass more values than are consumed
115 static char const pytdbpack_unpack_doc[] =
116 "unpack(format, buffer) -> (values, rest)
117 Unpack Samba binary data according to format string.
120 format -- string of tdbpack characters
121 buffer -- string of packed binary data
125 values -- sequence of values corresponding 1:1 to format characters
126 rest -- string containing data that was not decoded, or '' if the
127 whole string was consumed
130 IndexError -- if there is insufficient data in the buffer for the
131 format (or if the data is corrupt and contains a variable-length
132 field extending past the end)
133 ValueError -- if any of the format characters is illegal
136 Because unconsumed data is returned, you can feed it back in to the
137 unpacker to extract further fields. Alternatively, if you wish to modify
138 some fields near the start of the data, you may be able to save time by
139 only unpacking and repacking the necessary part.
145 Game plan is to first of all walk through the arguments and calculate the
146 total length that will be required. We allocate a Python string of that
147 size, then walk through again and fill it in.
149 We just borrow references to all the passed arguments, since none of them
150 need to be permanently stored. We transfer ownership to the returned
154 pytdbpack_pack(PyObject *self,
158 PyObject *val_seq, *fast_seq, *buf_str;
162 /* TODO: Test passing wrong types or too many arguments */
163 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
166 /* Convert into a list or tuple (if not already one), so that we can
167 * index more easily. */
168 fast_seq = PySequence_Fast(val_seq,
169 __FUNCTION__ ": argument 2 must be sequence");
173 reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
174 if (reqd_len == -1) /* exception was thrown */
179 This design causes an unnecessary copying of the data when Python
180 constructs an object, and that might possibly be avoided by using a
181 Buffer object of some kind instead. I'm not doing that for now
183 packed_buf = malloc(reqd_len);
185 PyErr_Format(PyExc_MemoryError,
186 "%s: couldn't allocate %d bytes for packed buffer",
187 __FUNCTION__, reqd_len);
191 if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
196 buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
197 free(packed_buf); /* get rid of tmp buf */
205 pytdbpack_unpack(PyObject *self,
208 char *format_str, *packed_str, *ppacked;
209 PyObject *val_list = NULL, *ret_tuple = NULL;
210 PyObject *rest_string = NULL;
211 int format_len, packed_len;
212 char last_format = '#'; /* invalid */
216 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
219 format_len = strlen(format_str);
221 /* Allocate list to hold results. Initially empty, and we append
222 results as we go along. */
223 val_list = PyList_New(0);
226 ret_tuple = PyTuple_New(2);
230 /* For every object, unpack. */
231 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
232 last_format = format_str[i];
233 /* packed_len is reduced in place */
234 if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
238 /* If the last character was '$', keep going until out of space */
239 if (format_str[i] == '$') {
241 PyErr_Format(PyExc_ValueError,
242 "%s: '$' may not be first character in format",
246 while (packed_len > 0)
247 if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
251 /* save leftovers for next time */
252 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
256 /* return (values, rest) tuple; give up references to them */
257 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
259 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
264 /* handle failure: deallocate anything. XDECREF forms handle NULL
265 pointers for objects that haven't been allocated yet. */
266 Py_XDECREF(val_list);
267 Py_XDECREF(ret_tuple);
268 Py_XDECREF(rest_string);
274 Internal routine that calculates how many bytes will be required to
275 encode the values in the format.
277 Also checks that the value list is the right size for the format list.
279 Returns number of bytes (may be 0), or -1 if there's something wrong, in
280 which case a Python exception has been raised.
284 val_seq: a Fast Sequence (list or tuple), being all the values
287 pytdbpack_calc_reqd_len(char *format_str,
295 val_len = PySequence_Length(val_seq);
299 for (p = format_str, val_i = 0; *p; p++, val_i++) {
302 if (val_i >= val_len) {
303 PyErr_Format(PyExc_IndexError,
304 "%s: value list is too short for format string",
309 /* borrow a reference to the item */
310 if (ch == 'd' || ch == 'p')
314 else if (ch == 'f' || ch == 'P') {
315 /* nul-terminated 8-bit string */
319 str_obj = PySequence_GetItem(val_seq, val_i);
323 if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
324 pytdbpack_bad_type(ch, "String", str_obj);
330 else if (ch == 'B') {
331 /* length-preceded byte buffer: n bytes, plus a preceding
336 len_obj = PySequence_GetItem(val_seq, val_i);
337 val_i++; /* skip over buffer */
339 if (!PyNumber_Check(len_obj)) {
340 pytdbpack_bad_type(ch, "Number", len_obj);
344 len_val = PyInt_AsLong(len_obj);
346 PyErr_Format(PyExc_ValueError,
347 "%s: format 'B' requires positive integer", __FUNCTION__);
354 PyErr_Format(PyExc_ValueError,
355 "%s: format character '%c' is not supported",
366 static PyObject *pytdbpack_bad_type(char ch,
367 const char *expected,
370 PyObject *r = PyObject_Repr(val_obj);
373 PyErr_Format(PyExc_TypeError,
374 "tdbpack: format '%c' requires %s, not %s",
375 ch, expected, PyString_AS_STRING(r));
382 XXX: glib and Samba have quicker macro for doing the endianness conversions,
383 but I don't know of one in plain libc, and it's probably not a big deal. I
384 realize this is kind of dumb because we'll almost always be on x86, but
385 being safe is important.
387 static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
389 (*pbuf)[0] = val_long & 0xff;
390 (*pbuf)[1] = (val_long >> 8) & 0xff;
391 (*pbuf)[2] = (val_long >> 16) & 0xff;
392 (*pbuf)[3] = (val_long >> 24) & 0xff;
397 static void pack_bytes(long len, const char *from,
398 unsigned char **pbuf)
400 memcpy(*pbuf, from, len);
406 unpack_err_too_short(void)
408 PyErr_Format(PyExc_IndexError,
409 __FUNCTION__ ": data too short for unpack format");
414 unpack_uint32(char **pbuf, int *plen)
420 unpack_err_too_short();
425 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
430 return PyLong_FromUnsignedLong(v);
434 static PyObject *unpack_int16(char **pbuf, int *plen)
440 unpack_err_too_short();
450 return PyInt_FromLong(v);
455 unpack_string(char **pbuf, int *plen)
458 char *nul_ptr, *start;
462 nul_ptr = memchr(start, '\0', *plen);
464 unpack_err_too_short();
468 len = nul_ptr - start;
470 *pbuf += len + 1; /* skip \0 */
473 return PyString_FromStringAndSize(start, len);
478 unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
480 /* first get 32-bit len */
483 unsigned char *start;
484 PyObject *str_obj = NULL, *len_obj = NULL;
487 unpack_err_too_short();
492 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
494 if (slen < 0) { /* surely you jest */
495 PyErr_Format(PyExc_ValueError,
496 __FUNCTION__ ": buffer seems to have negative length");
505 PyErr_Format(PyExc_IndexError,
506 __FUNCTION__ ": not enough data to unpack buffer: "
507 "need %d bytes, have %d",
515 if (!(len_obj = PyInt_FromLong(slen)))
518 if (PyList_Append(val_list, len_obj) == -1)
521 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
524 if (PyList_Append(val_list, str_obj) == -1)
530 Py_XDECREF(len_obj); /* handles NULL */
536 /* Unpack a single field from packed data, according to format character CH.
537 Remaining data is at *PBUF, of *PLEN.
539 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
542 Returns a reference to None, or NULL for failure.
544 static PyObject *pytdbpack_unpack_item(char ch,
551 if (ch == 'w') { /* 16-bit int */
552 result = unpack_int16(pbuf, plen);
554 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
555 /* pointers can just come through as integers */
556 result = unpack_uint32(pbuf, plen);
558 else if (ch == 'f' || ch == 'P') { /* nul-term string */
559 result = unpack_string(pbuf, plen);
561 else if (ch == 'B') { /* length, buffer */
562 return unpack_buffer(pbuf, plen, val_list);
565 PyErr_Format(PyExc_ValueError,
566 __FUNCTION__ ": format character '%c' is not supported",
575 if (PyList_Append(val_list, result) == -1)
585 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
588 The string has already been checked out, so we know that VAL_SEQ is large
589 enough to hold the packed data, and that there are enough value items.
590 (However, their types may not have been thoroughly checked yet.)
592 In addition, val_seq is a Python Fast sequence.
594 Returns NULL for error (with exception set), or None.
597 pytdbpack_pack_data(const char *format_str,
599 unsigned char *packed)
601 int format_i, val_i = 0;
603 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
604 char ch = format_str[format_i];
607 /* borrow a reference to the item */
608 val_obj = PySequence_GetItem(val_seq, val_i++);
613 unsigned long val_long;
616 if (!(long_obj = PyNumber_Long(val_obj))) {
617 pytdbpack_bad_type(ch, "Long", val_obj);
621 val_long = PyLong_AsUnsignedLong(long_obj);
622 (packed)[0] = val_long & 0xff;
623 (packed)[1] = (val_long >> 8) & 0xff;
627 else if (ch == 'd') {
628 /* 4-byte LE number */
631 if (!(long_obj = PyNumber_Long(val_obj))) {
632 pytdbpack_bad_type(ch, "Long", val_obj);
636 pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
640 else if (ch == 'p') {
641 /* "Pointer" value -- in the subset of DCERPC used by Samba,
642 this is really just an "exists" or "does not exist"
644 pack_uint32(PyObject_IsTrue(val_obj), &packed);
646 else if (ch == 'f' || ch == 'P') {
650 size = PySequence_Length(val_obj);
653 sval = PyString_AsString(val_obj);
656 pack_bytes(size+1, sval, &packed); /* include nul */
658 else if (ch == 'B') {
662 if (!PyNumber_Check(val_obj)) {
663 pytdbpack_bad_type(ch, "Number", val_obj);
667 if (!(val_obj = PyNumber_Long(val_obj)))
670 size = PyLong_AsLong(val_obj);
671 pack_uint32(size, &packed);
673 /* Release the new reference created by the cast */
676 val_obj = PySequence_GetItem(val_seq, val_i++);
680 sval = PyString_AsString(val_obj);
684 pack_bytes(size, sval, &packed); /* do not include nul */
687 /* this ought to be caught while calculating the length, but
689 PyErr_Format(PyExc_ValueError,
690 "%s: format character '%c' is not supported",
702 static PyMethodDef pytdbpack_methods[] = {
703 { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
704 { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
710 Py_InitModule3("tdbpack", pytdbpack_methods,
711 (char *) pytdbpack_docstring);