1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static int pytdbpack_calc_reqd_len(char *format_str,
33 static PyObject *pytdbpack_unpack_item(char,
37 pytdbpack_calc_item_len(char format_ch,
40 static PyObject *pytdbpack_pack_data(const char *format_str,
46 static const char * pytdbpack_docstring =
47 "Convert between Python values and Samba binary encodings.
49 This module is conceptually similar to the standard 'struct' module, but it
50 uses both a different binary format and a different description string.
52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
53 little-endian, unpadded, non-self-describing binary format. It is intended
54 that these functions be as similar as possible to the routines in Samba's
55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
57 Python strings are used to specify the format of data to be packed or
60 Strings in TDBs are typically stored in DOS codepages. The caller of this
61 module must make appropriate translations if necessary, typically to and from
64 tdbpack format strings:
66 'f': NULL-terminated string in DOS codepage
70 'd': 4 byte little-endian number
72 'w': 2 byte little-endian number
74 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
75 really just an \"exists\" or \"does not exist\" flag. The boolean
76 value of the Python object is used.
78 'B': 4-byte LE length, followed by that many bytes of binary data.
79 Corresponds to a Python byte string of the appropriate length.
81 '$': Special flag indicating that the preceding format code should be
82 repeated while data remains. This is only supported for unpacking.
84 Every code corresponds to a single Python object, except 'B' which
85 corresponds to two values (length and contents), and '$', which produces
86 however many make sense.
90 static char const pytdbpack_pack_doc[] =
91 "pack(format, values) -> buffer
92 Pack Python objects into Samba binary format according to format string.
95 format -- string of tdbpack format characters
96 values -- sequence of value objects corresponding 1:1 to format characters
99 buffer -- string containing packed data
102 IndexError -- if there are not the same number of format codes as of
104 ValueError -- if any of the format characters is illegal
105 TypeError -- if the format is not a string, or values is not a sequence,
106 or any of the values is of the wrong type for the corresponding
111 static char const pytdbpack_unpack_doc[] =
112 "unpack(format, buffer) -> (values, rest)
113 Unpack Samba binary data according to format string.
116 format -- string of tdbpack characters
117 buffer -- string of packed binary data
121 values -- sequence of values corresponding 1:1 to format characters
122 rest -- string containing data that was not decoded, or '' if the
123 whole string was consumed
126 IndexError -- if there is insufficient data in the buffer for the
127 format (or if the data is corrupt and contains a variable-length
128 field extending past the end)
129 ValueError -- if any of the format characters is illegal
132 Because unconsumed data is returned, you can feed it back in to the
133 unpacker to extract further fields. Alternatively, if you wish to modify
134 some fields near the start of the data, you may be able to save time by
135 only unpacking and repacking the necessary part.
141 Game plan is to first of all walk through the arguments and calculate the
142 total length that will be required. We allocate a Python string of that
143 size, then walk through again and fill it in.
145 We just borrow references to all the passed arguments, since none of them
146 need to be permanently stored. We transfer ownership to the returned
150 pytdbpack_pack(PyObject *self,
154 PyObject *val_seq, *fast_seq, *buf_str;
158 /* TODO: Test passing wrong types or too many arguments */
159 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
162 /* Convert into a list or tuple (if not already one), so that we can
163 * index more easily. */
164 fast_seq = PySequence_Fast(val_seq,
165 __FUNCTION__ ": argument 2 must be sequence");
169 reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
170 if (reqd_len == -1) /* exception was thrown */
175 This design causes an unnecessary copying of the data when Python
176 constructs an object, and that might possibly be avoided by using a
177 Buffer object of some kind instead. I'm not doing that for now
179 packed_buf = malloc(reqd_len);
181 PyErr_Format(PyExc_MemoryError,
182 "%s: couldn't allocate %d bytes for packed buffer",
183 __FUNCTION__, reqd_len);
187 if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
192 buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
193 free(packed_buf); /* get rid of tmp buf */
201 pytdbpack_unpack(PyObject *self,
204 char *format_str, *packed_str, *ppacked;
205 PyObject *val_list = NULL, *ret_tuple = NULL;
206 PyObject *rest_string = NULL;
207 int format_len, packed_len;
209 char last_format = '#';
212 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
215 format_len = strlen(format_str);
217 /* allocate list to hold results */
218 val_list = PyList_New(format_len);
221 ret_tuple = PyTuple_New(2);
225 /* For every object, unpack. */
226 for (ppacked = packed_str, i = 0; i < format_len; i++) {
230 format = format_str[i];
233 PyErr_Format(PyExc_ValueError,
234 "%s: '$' may not be first character in format",
239 format = last_format; /* repeat */
243 val_obj = pytdbpack_unpack_item(format,
249 PyList_SET_ITEM(val_list, i, val_obj);
250 last_format = format;
253 /* put leftovers in box for lunch tomorrow */
254 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
258 /* return (values, rest) tuple; give up references to them */
259 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
261 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
266 /* handle failure: deallocate anything */
267 Py_XDECREF(val_list);
268 Py_XDECREF(ret_tuple);
269 Py_XDECREF(rest_string);
275 Internal routine that calculates how many bytes will be required to
276 encode the values in the format.
278 Also checks that the value list is the right size for the format list.
280 Returns number of bytes (may be 0), or -1 if there's something wrong, in
281 which case a Python exception has been raised.
285 val_seq: a Fast Sequence (list or tuple), being all the values
288 pytdbpack_calc_reqd_len(char *format_str,
296 val_len = PySequence_Length(val_seq);
300 for (p = format_str, val_i = 0; *p; p++, val_i++) {
305 if (val_i >= val_len) {
306 PyErr_Format(PyExc_IndexError,
307 "samba.tdbpack.pack: value list is too short for format string");
311 /* borrow a reference to the item */
312 val_obj = PySequence_GetItem(val_seq, val_i);
316 item_len = pytdbpack_calc_item_len(ch, val_obj);
323 if (val_i != val_len) {
324 PyErr_Format(PyExc_IndexError,
325 "%s: value list is wrong length for format string",
334 static PyObject *pytdbpack_bad_type(char ch,
335 const char *expected,
338 PyObject *r = PyObject_Repr(val_obj);
341 PyErr_Format(PyExc_TypeError,
342 "tdbpack: format '%c' requires %s, not %s",
343 ch, expected, PyString_AS_STRING(r));
350 * Calculate the number of bytes required to pack a single value. While doing
351 * this, also conduct some initial checks that the argument types are
354 * Returns -1 on exception.
357 pytdbpack_calc_item_len(char ch,
360 if (ch == 'd' || ch == 'w') {
361 if (!PyInt_Check(val_obj)) {
362 pytdbpack_bad_type(ch, "Int", val_obj);
369 } else if (ch == 'p') {
372 else if (ch == 'f' || ch == 'P' || ch == 'B') {
373 /* nul-terminated 8-bit string */
374 if (!PyString_Check(val_obj)) {
375 pytdbpack_bad_type(ch, "String", val_obj);
380 /* byte buffer; just use Python string's length, plus
382 return 4 + PyString_GET_SIZE(val_obj);
385 /* one nul character */
386 return 1 + PyString_GET_SIZE(val_obj);
390 PyErr_Format(PyExc_ValueError,
391 "tdbpack: format character '%c' is not supported",
400 XXX: glib and Samba have quicker macro for doing the endianness conversions,
401 but I don't know of one in plain libc, and it's probably not a big deal. I
402 realize this is kind of dumb because we'll almost always be on x86, but
403 being safe is important.
405 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
407 (*pbuf)[0] = val_long & 0xff;
408 (*pbuf)[1] = (val_long >> 8) & 0xff;
409 (*pbuf)[2] = (val_long >> 16) & 0xff;
410 (*pbuf)[3] = (val_long >> 24) & 0xff;
415 static void pack_bytes(long len, const char *from,
416 unsigned char **pbuf)
418 memcpy(*pbuf, from, len);
424 unpack_err_too_short(void)
426 PyErr_Format(PyExc_IndexError,
427 __FUNCTION__ ": data too short for unpack format");
432 unpack_int32(char **pbuf, int *plen)
438 unpack_err_too_short();
443 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
448 return PyInt_FromLong(v);
452 static PyObject *unpack_int16(char **pbuf, int *plen)
458 unpack_err_too_short();
468 return PyInt_FromLong(v);
473 unpack_string(char **pbuf, int *plen)
476 char *nul_ptr, *start;
480 nul_ptr = memchr(start, '\0', *plen);
482 unpack_err_too_short();
486 len = nul_ptr - start;
488 *pbuf += len + 1; /* skip \0 */
491 return PyString_FromStringAndSize(start, len);
496 unpack_buffer(char **pbuf, int *plen)
498 /* first get 32-bit len */
501 unsigned char *start;
504 unpack_err_too_short();
509 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
511 if (slen < 0) { /* surely you jest */
512 PyErr_Format(PyExc_ValueError,
513 __FUNCTION__ ": buffer seems to have negative length");
522 PyErr_Format(PyExc_IndexError,
523 __FUNCTION__ ": not enough data to unpack buffer: "
524 "need %d bytes, have %d",
532 return PyString_FromStringAndSize(start, slen);
536 /* Unpack a single field from packed data, according to format character CH.
537 Remaining data is at *PBUF, of *PLEN.
539 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
542 Returns a reference to the unpacked Python object, or NULL for failure.
544 static PyObject *pytdbpack_unpack_item(char ch,
548 if (ch == 'w') { /* 16-bit int */
549 return unpack_int16(pbuf, plen);
551 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
552 /* pointers can just come through as integers */
553 return unpack_int32(pbuf, plen);
555 else if (ch == 'f' || ch == 'P') { /* nul-term string */
556 return unpack_string(pbuf, plen);
558 else if (ch == 'B') { /* length, buffer */
559 return unpack_buffer(pbuf, plen);
562 PyErr_Format(PyExc_ValueError,
563 __FUNCTION__ ": format character '%c' is not supported",
573 Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
574 and advance the pointer. Buffer length has been pre-calculated so we are
575 sure that there is enough space.
579 pytdbpack_pack_item(char ch,
581 unsigned char **pbuf)
584 unsigned long val_long = PyInt_AsLong(val_obj);
585 (*pbuf)[0] = val_long & 0xff;
586 (*pbuf)[1] = (val_long >> 8) & 0xff;
589 else if (ch == 'd') {
590 /* 4-byte LE number */
591 pack_int32(PyInt_AsLong(val_obj), pbuf);
593 else if (ch == 'p') {
594 /* "Pointer" value -- in the subset of DCERPC used by Samba,
595 this is really just an "exists" or "does not exist"
597 pack_int32(PyObject_IsTrue(val_obj), pbuf);
599 else if (ch == 'f' || ch == 'P') {
603 size = PyString_GET_SIZE(val_obj);
604 sval = PyString_AS_STRING(val_obj);
605 pack_bytes(size+1, sval, pbuf); /* include nul */
607 else if (ch == 'B') {
611 size = PyString_GET_SIZE(val_obj);
612 pack_int32(size, pbuf);
613 sval = PyString_AS_STRING(val_obj);
614 pack_bytes(size, sval, pbuf); /* do not include nul */
617 /* this ought to be caught while calculating the length, but
619 PyErr_Format(PyExc_ValueError,
620 "%s: format character '%c' is not supported",
631 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
634 The string has already been checked out, so we know that VAL_SEQ is large
635 enough to hold the packed data, and that there are enough value items.
636 (However, their types may not have been thoroughly checked yet.)
638 In addition, val_seq is a Python Fast sequence.
640 Returns NULL for error (with exception set), or None.
643 pytdbpack_pack_data(const char *format_str,
645 unsigned char *packed_buf)
649 for (i = 0; format_str[i]; i++) {
650 char ch = format_str[i];
653 /* borrow a reference to the item */
654 val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
658 if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
669 static PyMethodDef pytdbpack_methods[] = {
670 { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
671 { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
677 Py_InitModule3("tdbpack", pytdbpack_methods,
678 (char *) pytdbpack_docstring);