1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
31 static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
32 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
34 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
36 static PyObject *pytdbpack_data(const char *format_str,
40 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
43 static PyObject *pytdbpack_bad_type(char ch,
47 static const char * pytdbpack_docstring =
48 "Convert between Python values and Samba binary encodings.
50 This module is conceptually similar to the standard 'struct' module, but it
51 uses both a different binary format and a different description string.
53 Samba's encoding is based on that used inside DCE-RPC and SMB: a
54 little-endian, unpadded, non-self-describing binary format. It is intended
55 that these functions be as similar as possible to the routines in Samba's
56 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
58 Python strings are used to specify the format of data to be packed or
61 Strings are always stored in codepage 850. Unicode objects are translated
62 to cp850; plain strings are assumed to be in latin-1 and are also
65 This may be a problem in the future if it is different to the Samba codepage.
66 It might be better to have the caller do the conversion, but that would conflict
67 with existing CMI code.
69 tdbpack format strings:
71 'f': NULL-terminated string in codepage 850
75 'd': 4 byte little-endian unsigned number
77 'w': 2 byte little-endian unsigned number
79 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
80 really just an \"exists\" or \"does not exist\" flag. The boolean
81 value of the Python object is used.
83 'B': 4-byte LE length, followed by that many bytes of binary data.
84 Corresponds to a Python integer giving the length, followed by a byte
85 string of the appropriate length.
87 '$': Special flag indicating that the preceding format code should be
88 repeated while data remains. This is only supported for unpacking.
90 Every code corresponds to a single Python object, except 'B' which
91 corresponds to two values (length and contents), and '$', which produces
92 however many make sense.
96 static char const pytdbpack_doc[] =
97 "pack(format, values) -> buffer
98 Pack Python objects into Samba binary format according to format string.
101 format -- string of tdbpack format characters
102 values -- sequence of value objects corresponding 1:1 to format characters
105 buffer -- string containing packed data
108 IndexError -- if there are too few values for the format
109 ValueError -- if any of the format characters is illegal
110 TypeError -- if the format is not a string, or values is not a sequence,
111 or any of the values is of the wrong type for the corresponding
115 For historical reasons, it is not an error to pass more values than are consumed
120 static char const pytdbpack_unpack_doc[] =
121 "unpack(format, buffer) -> (values, rest)
122 Unpack Samba binary data according to format string.
125 format -- string of tdbpack characters
126 buffer -- string of packed binary data
130 values -- sequence of values corresponding 1:1 to format characters
131 rest -- string containing data that was not decoded, or '' if the
132 whole string was consumed
135 IndexError -- if there is insufficient data in the buffer for the
136 format (or if the data is corrupt and contains a variable-length
137 field extending past the end)
138 ValueError -- if any of the format characters is illegal
141 Because unconsumed data is returned, you can feed it back in to the
142 unpacker to extract further fields. Alternatively, if you wish to modify
143 some fields near the start of the data, you may be able to save time by
144 only unpacking and repacking the necessary part.
151 * Pack objects to bytes.
153 * All objects are first individually encoded onto a list, and then the list
154 * of strings is concatenated. This is faster than concatenating strings,
155 * and reasonably simple to code.
158 pytdbpack(PyObject *self,
162 PyObject *val_seq, *val_iter = NULL,
163 *packed_list = NULL, *packed_str = NULL,
166 /* TODO: Test passing wrong types or too many arguments */
167 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
170 if (!(val_iter = PyObject_GetIter(val_seq)))
173 /* Create list to hold strings until we're done, then join them all. */
174 if (!(packed_list = PyList_New(0)))
177 if (!pytdbpack_data(format_str, val_iter, packed_list))
180 /* this function is not officially documented but it works */
181 if (!(empty_str = PyString_InternFromString("")))
184 packed_str = _PyString_Join(empty_str, packed_list);
187 Py_XDECREF(empty_str);
188 Py_XDECREF(val_iter);
189 Py_XDECREF(packed_list);
196 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
199 The string has already been checked out, so we know that VAL_SEQ is large
200 enough to hold the packed data, and that there are enough value items.
201 (However, their types may not have been thoroughly checked yet.)
203 In addition, val_seq is a Python Fast sequence.
205 Returns NULL for error (with exception set), or None.
208 pytdbpack_data(const char *format_str,
210 PyObject *packed_list)
212 int format_i, val_i = 0;
214 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
215 char ch = format_str[format_i];
218 /* dispatch to the appropriate packer for this type,
219 which should pull things off the iterator, and
220 append them to the packed_list */
224 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
230 if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
235 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
240 PyErr_Format(PyExc_ValueError,
241 "%s: format character '%c' is not supported",
252 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
254 unsigned long val_long;
255 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
256 PyObject *new_list = NULL;
257 unsigned char pack_buf[4];
259 if (!(val_obj = PyIter_Next(val_iter)))
262 if (!(long_obj = PyNumber_Long(val_obj))) {
263 pytdbpack_bad_type(ch, "Number", val_obj);
267 val_long = PyLong_AsUnsignedLong(long_obj);
268 pack_le_uint32(val_long, pack_buf);
270 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
271 the first two bytes. */
273 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
276 if (PyList_Append(packed_list, result_obj) != -1)
277 new_list = packed_list;
281 Py_XDECREF(long_obj);
282 Py_XDECREF(result_obj);
289 * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
292 * If the input is neither a string nor Unicode, an exception is raised.
294 * If the input is Unicode, then it is converted to CP850.
296 * If the input is a String, then it is converted to Unicode using the default
297 * decoding method, and then converted to CP850. This in effect gives
298 * conversion from latin-1 (currently the PSA's default) to CP850, without
299 * needing a custom translation table.
301 * I hope this approach avoids being too fragile w.r.t. being passed either
302 * Unicode or String objects.
305 pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
307 PyObject *val_obj = NULL;
308 PyObject *unicode_obj = NULL;
309 PyObject *cp850_str = NULL;
310 PyObject *nul_str = NULL;
311 PyObject *new_list = NULL;
313 if (!(val_obj = PyIter_Next(val_iter)))
316 if (PyUnicode_Check(val_obj)) {
317 unicode_obj = val_obj;
321 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
327 if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
331 /* this is constant and often-used; hold it forever */
332 if (!(nul_str = PyString_FromStringAndSize("", 1)))
335 if ((PyList_Append(packed_list, cp850_str) != -1)
336 && (PyList_Append(packed_list, nul_str) != -1))
337 new_list = packed_list;
340 Py_XDECREF(unicode_obj);
341 Py_XDECREF(cp850_str);
348 * Pack (LENGTH, BUFFER) pair onto the list.
350 * The buffer must already be a String, not Unicode, because it contains 8-bit
351 * untranslated data. In some cases it will actually be UTF_16_LE data.
354 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
357 PyObject *new_list = NULL;
359 /* pull off integer and stick onto list */
360 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
363 /* this assumes that the string is the right length; the old code did the same. */
364 if (!(val_obj = PyIter_Next(val_iter)))
367 if (!PyString_Check(val_obj)) {
368 pytdbpack_bad_type('B', "String", val_obj);
372 if (PyList_Append(packed_list, val_obj) != -1)
373 new_list = packed_list;
383 pytdbpack_unpack(PyObject *self,
386 char *format_str, *packed_str, *ppacked;
387 PyObject *val_list = NULL, *ret_tuple = NULL;
388 PyObject *rest_string = NULL;
389 int format_len, packed_len;
390 char last_format = '#'; /* invalid */
394 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
397 format_len = strlen(format_str);
399 /* Allocate list to hold results. Initially empty, and we append
400 results as we go along. */
401 val_list = PyList_New(0);
404 ret_tuple = PyTuple_New(2);
408 /* For every object, unpack. */
409 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
410 last_format = format_str[i];
411 /* packed_len is reduced in place */
412 if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
416 /* If the last character was '$', keep going until out of space */
417 if (format_str[i] == '$') {
419 PyErr_Format(PyExc_ValueError,
420 "%s: '$' may not be first character in format",
424 while (packed_len > 0)
425 if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
429 /* save leftovers for next time */
430 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
434 /* return (values, rest) tuple; give up references to them */
435 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
437 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
442 /* handle failure: deallocate anything. XDECREF forms handle NULL
443 pointers for objects that haven't been allocated yet. */
444 Py_XDECREF(val_list);
445 Py_XDECREF(ret_tuple);
446 Py_XDECREF(rest_string);
454 Internal routine that calculates how many bytes will be required to
455 encode the values in the format.
457 Also checks that the value list is the right size for the format list.
459 Returns number of bytes (may be 0), or -1 if there's something wrong, in
460 which case a Python exception has been raised.
464 val_seq: a Fast Sequence (list or tuple), being all the values
467 pytdbpack_calc_reqd_len(char *format_str,
475 val_len = PySequence_Length(val_seq);
479 for (p = format_str, val_i = 0; *p; p++, val_i++) {
482 if (val_i >= val_len) {
483 PyErr_Format(PyExc_IndexError,
484 "%s: value list is too short for format string",
489 /* borrow a reference to the item */
490 if (ch == 'd' || ch == 'p')
494 else if (ch == 'f' || ch == 'P') {
495 /* nul-terminated 8-bit string */
499 str_obj = PySequence_GetItem(val_seq, val_i);
503 if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
504 pytdbpack_bad_type(ch, "String", str_obj);
510 else if (ch == 'B') {
511 /* length-preceded byte buffer: n bytes, plus a preceding
516 len_obj = PySequence_GetItem(val_seq, val_i);
517 val_i++; /* skip over buffer */
519 if (!PyNumber_Check(len_obj)) {
520 pytdbpack_bad_type(ch, "Number", len_obj);
524 len_val = PyInt_AsLong(len_obj);
526 PyErr_Format(PyExc_ValueError,
527 "%s: format 'B' requires positive integer", __FUNCTION__);
534 PyErr_Format(PyExc_ValueError,
535 "%s: format character '%c' is not supported",
547 static PyObject *pytdbpack_bad_type(char ch,
548 const char *expected,
551 PyObject *r = PyObject_Repr(val_obj);
554 PyErr_Format(PyExc_TypeError,
555 "tdbpack: format '%c' requires %s, not %s",
556 ch, expected, PyString_AS_STRING(r));
563 XXX: glib and Samba have quicker macro for doing the endianness conversions,
564 but I don't know of one in plain libc, and it's probably not a big deal. I
565 realize this is kind of dumb because we'll almost always be on x86, but
566 being safe is important.
568 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
570 pbuf[0] = val_long & 0xff;
571 pbuf[1] = (val_long >> 8) & 0xff;
572 pbuf[2] = (val_long >> 16) & 0xff;
573 pbuf[3] = (val_long >> 24) & 0xff;
577 static void pack_bytes(long len, const char *from,
578 unsigned char **pbuf)
580 memcpy(*pbuf, from, len);
586 unpack_err_too_short(void)
588 PyErr_Format(PyExc_IndexError,
589 __FUNCTION__ ": data too short for unpack format");
594 unpack_uint32(char **pbuf, int *plen)
600 unpack_err_too_short();
605 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
610 return PyLong_FromUnsignedLong(v);
614 static PyObject *unpack_int16(char **pbuf, int *plen)
620 unpack_err_too_short();
630 return PyInt_FromLong(v);
635 unpack_string(char **pbuf, int *plen)
638 char *nul_ptr, *start;
642 nul_ptr = memchr(start, '\0', *plen);
644 unpack_err_too_short();
648 len = nul_ptr - start;
650 *pbuf += len + 1; /* skip \0 */
653 return PyString_FromStringAndSize(start, len);
658 unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
660 /* first get 32-bit len */
663 unsigned char *start;
664 PyObject *str_obj = NULL, *len_obj = NULL;
667 unpack_err_too_short();
672 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
674 if (slen < 0) { /* surely you jest */
675 PyErr_Format(PyExc_ValueError,
676 __FUNCTION__ ": buffer seems to have negative length");
685 PyErr_Format(PyExc_IndexError,
686 __FUNCTION__ ": not enough data to unpack buffer: "
687 "need %d bytes, have %d",
695 if (!(len_obj = PyInt_FromLong(slen)))
698 if (PyList_Append(val_list, len_obj) == -1)
701 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
704 if (PyList_Append(val_list, str_obj) == -1)
710 Py_XDECREF(len_obj); /* handles NULL */
716 /* Unpack a single field from packed data, according to format character CH.
717 Remaining data is at *PBUF, of *PLEN.
719 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
722 Returns a reference to None, or NULL for failure.
724 static PyObject *pytdbpack_unpack_item(char ch,
731 if (ch == 'w') { /* 16-bit int */
732 result = unpack_int16(pbuf, plen);
734 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
735 /* pointers can just come through as integers */
736 result = unpack_uint32(pbuf, plen);
738 else if (ch == 'f' || ch == 'P') { /* nul-term string */
739 result = unpack_string(pbuf, plen);
741 else if (ch == 'B') { /* length, buffer */
742 return unpack_buffer(pbuf, plen, val_list);
745 PyErr_Format(PyExc_ValueError,
746 __FUNCTION__ ": format character '%c' is not supported",
755 if (PyList_Append(val_list, result) == -1)
766 static PyMethodDef pytdbpack_methods[] = {
767 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
768 { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
774 Py_InitModule3("tdbpack", pytdbpack_methods,
775 (char *) pytdbpack_docstring);