1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
31 static PyObject * pytdbpack_str(char ch,
32 PyObject *val_iter, PyObject *packed_list,
33 const char *encoding);
34 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
36 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
38 static PyObject *pytdbpack_data(const char *format_str,
43 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
45 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
48 static PyObject *pytdbpack_bad_type(char ch,
52 static const char * pytdbpack_docstring =
53 "Convert between Python values and Samba binary encodings.
55 This module is conceptually similar to the standard 'struct' module, but it
56 uses both a different binary format and a different description string.
58 Samba's encoding is based on that used inside DCE-RPC and SMB: a
59 little-endian, unpadded, non-self-describing binary format. It is intended
60 that these functions be as similar as possible to the routines in Samba's
61 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
63 Python strings are used to specify the format of data to be packed or
66 String encodings are implied by the database format: they may be either DOS
67 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
68 to be the same as the default Python encoding).
70 tdbpack format strings:
72 'f': NUL-terminated string in codepage 850
76 'F': NUL-terminated string in iso-8859-1
78 'd': 4 byte little-endian unsigned number
80 'w': 2 byte little-endian unsigned number
82 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
83 really just an \"exists\" or \"does not exist\" flag. The boolean
84 value of the Python object is used.
86 'B': 4-byte LE length, followed by that many bytes of binary data.
87 Corresponds to a Python integer giving the length, followed by a byte
88 string of the appropriate length.
90 '$': Special flag indicating that the preceding format code should be
91 repeated while data remains. This is only supported for unpacking.
93 Every code corresponds to a single Python object, except 'B' which
94 corresponds to two values (length and contents), and '$', which produces
95 however many make sense.
99 static char const pytdbpack_doc[] =
100 "pack(format, values) -> buffer
101 Pack Python objects into Samba binary format according to format string.
104 format -- string of tdbpack format characters
105 values -- sequence of value objects corresponding 1:1 to format characters
108 buffer -- string containing packed data
111 IndexError -- if there are too few values for the format
112 ValueError -- if any of the format characters is illegal
113 TypeError -- if the format is not a string, or values is not a sequence,
114 or any of the values is of the wrong type for the corresponding
118 For historical reasons, it is not an error to pass more values than are consumed
123 static char const pytdbunpack_doc[] =
124 "unpack(format, buffer) -> (values, rest)
125 Unpack Samba binary data according to format string.
128 format -- string of tdbpack characters
129 buffer -- string of packed binary data
133 values -- sequence of values corresponding 1:1 to format characters
134 rest -- string containing data that was not decoded, or '' if the
135 whole string was consumed
138 IndexError -- if there is insufficient data in the buffer for the
139 format (or if the data is corrupt and contains a variable-length
140 field extending past the end)
141 ValueError -- if any of the format characters is illegal
144 Because unconsumed data is returned, you can feed it back in to the
145 unpacker to extract further fields. Alternatively, if you wish to modify
146 some fields near the start of the data, you may be able to save time by
147 only unpacking and repacking the necessary part.
151 const char *pytdb_dos_encoding = "cp850";
153 /* NULL, meaning that the Samba default encoding *must* be the same as the
154 Python default encoding. */
155 const char *pytdb_unix_encoding = NULL;
159 * Pack objects to bytes.
161 * All objects are first individually encoded onto a list, and then the list
162 * of strings is concatenated. This is faster than concatenating strings,
163 * and reasonably simple to code.
166 pytdbpack(PyObject *self,
170 PyObject *val_seq, *val_iter = NULL,
171 *packed_list = NULL, *packed_str = NULL,
174 /* TODO: Test passing wrong types or too many arguments */
175 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
178 if (!(val_iter = PyObject_GetIter(val_seq)))
181 /* Create list to hold strings until we're done, then join them all. */
182 if (!(packed_list = PyList_New(0)))
185 if (!pytdbpack_data(format_str, val_iter, packed_list))
188 /* this function is not officially documented but it works */
189 if (!(empty_str = PyString_InternFromString("")))
192 packed_str = _PyString_Join(empty_str, packed_list);
195 Py_XDECREF(empty_str);
196 Py_XDECREF(val_iter);
197 Py_XDECREF(packed_list);
204 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
207 The string has already been checked out, so we know that VAL_SEQ is large
208 enough to hold the packed data, and that there are enough value items.
209 (However, their types may not have been thoroughly checked yet.)
211 In addition, val_seq is a Python Fast sequence.
213 Returns NULL for error (with exception set), or None.
216 pytdbpack_data(const char *format_str,
218 PyObject *packed_list)
220 int format_i, val_i = 0;
222 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
223 char ch = format_str[format_i];
226 /* dispatch to the appropriate packer for this type,
227 which should pull things off the iterator, and
228 append them to the packed_list */
232 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
238 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_dos_encoding)))
243 /* We specify NULL encoding: Samba databases in this
244 form are written in the default Python encoding. */
245 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
250 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
255 PyErr_Format(PyExc_ValueError,
256 "%s: format character '%c' is not supported",
267 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
269 unsigned long val_long;
270 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
271 PyObject *new_list = NULL;
272 unsigned char pack_buf[4];
274 if (!(val_obj = PyIter_Next(val_iter)))
277 if (!(long_obj = PyNumber_Long(val_obj))) {
278 pytdbpack_bad_type(ch, "Number", val_obj);
282 val_long = PyLong_AsUnsignedLong(long_obj);
283 pack_le_uint32(val_long, pack_buf);
285 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
286 the first two bytes. */
288 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
291 if (PyList_Append(packed_list, result_obj) != -1)
292 new_list = packed_list;
296 Py_XDECREF(long_obj);
297 Py_XDECREF(result_obj);
304 * Take one string from the iterator val_iter, convert it to 8-bit, and return
307 * If the input is neither a string nor Unicode, an exception is raised.
309 * If the input is Unicode, then it is converted to the appropriate encoding.
311 * If the input is a String, and encoding is not null, then it is converted to
312 * Unicode using the default decoding method, and then converted to the
313 * encoding. If the encoding is NULL, then the string is written out as-is --
314 * this is used when the default Python encoding is the same as the Samba
317 * I hope this approach avoids being too fragile w.r.t. being passed either
318 * Unicode or String objects.
321 pytdbpack_str(char ch,
322 PyObject *val_iter, PyObject *packed_list, const char *encoding)
324 PyObject *val_obj = NULL;
325 PyObject *unicode_obj = NULL;
326 PyObject *coded_str = NULL;
327 PyObject *nul_str = NULL;
328 PyObject *new_list = NULL;
330 if (!(val_obj = PyIter_Next(val_iter)))
333 if (PyUnicode_Check(val_obj)) {
334 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
337 else if (PyString_Check(val_obj) && !encoding) {
338 /* For efficiency, we assume that the Python interpreter has
339 the same default string encoding as Samba's native string
340 encoding. On the PSA, both are always 8859-1. */
342 Py_INCREF(coded_str);
344 else if (PyString_Check(val_obj)) {
345 /* String, but needs to be converted */
346 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
348 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
352 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
357 /* this is constant and often-used; hold it forever */
358 if (!(nul_str = PyString_FromStringAndSize("", 1)))
361 if ((PyList_Append(packed_list, coded_str) != -1)
362 && (PyList_Append(packed_list, nul_str) != -1))
363 new_list = packed_list;
367 Py_XDECREF(unicode_obj);
368 Py_XDECREF(coded_str);
375 * Pack (LENGTH, BUFFER) pair onto the list.
377 * The buffer must already be a String, not Unicode, because it contains 8-bit
378 * untranslated data. In some cases it will actually be UTF_16_LE data.
381 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
384 PyObject *new_list = NULL;
386 /* pull off integer and stick onto list */
387 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
390 /* this assumes that the string is the right length; the old code did
392 if (!(val_obj = PyIter_Next(val_iter)))
395 if (!PyString_Check(val_obj)) {
396 pytdbpack_bad_type('B', "String", val_obj);
400 if (PyList_Append(packed_list, val_obj) != -1)
401 new_list = packed_list;
409 static PyObject *pytdbpack_bad_type(char ch,
410 const char *expected,
413 PyObject *r = PyObject_Repr(val_obj);
416 PyErr_Format(PyExc_TypeError,
417 "tdbpack: format '%c' requires %s, not %s",
418 ch, expected, PyString_AS_STRING(r));
425 XXX: glib and Samba have quicker macro for doing the endianness conversions,
426 but I don't know of one in plain libc, and it's probably not a big deal. I
427 realize this is kind of dumb because we'll almost always be on x86, but
428 being safe is important.
430 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
432 pbuf[0] = val_long & 0xff;
433 pbuf[1] = (val_long >> 8) & 0xff;
434 pbuf[2] = (val_long >> 16) & 0xff;
435 pbuf[3] = (val_long >> 24) & 0xff;
439 static void pack_bytes(long len, const char *from,
440 unsigned char **pbuf)
442 memcpy(*pbuf, from, len);
449 pytdbunpack(PyObject *self,
452 char *format_str, *packed_str, *ppacked;
453 PyObject *val_list = NULL, *ret_tuple = NULL;
454 PyObject *rest_string = NULL;
455 int format_len, packed_len;
456 char last_format = '#'; /* invalid */
460 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
463 format_len = strlen(format_str);
465 /* Allocate list to hold results. Initially empty, and we append
466 results as we go along. */
467 val_list = PyList_New(0);
470 ret_tuple = PyTuple_New(2);
474 /* For every object, unpack. */
475 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
476 last_format = format_str[i];
477 /* packed_len is reduced in place */
478 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
482 /* If the last character was '$', keep going until out of space */
483 if (format_str[i] == '$') {
485 PyErr_Format(PyExc_ValueError,
486 "%s: '$' may not be first character in format",
490 while (packed_len > 0)
491 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
495 /* save leftovers for next time */
496 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
500 /* return (values, rest) tuple; give up references to them */
501 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
503 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
508 /* handle failure: deallocate anything. XDECREF forms handle NULL
509 pointers for objects that haven't been allocated yet. */
510 Py_XDECREF(val_list);
511 Py_XDECREF(ret_tuple);
512 Py_XDECREF(rest_string);
518 pytdbunpack_err_too_short(void)
520 PyErr_Format(PyExc_IndexError,
521 __FUNCTION__ ": data too short for unpack format");
526 pytdbunpack_uint32(char **pbuf, int *plen)
532 pytdbunpack_err_too_short();
537 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
542 return PyLong_FromUnsignedLong(v);
546 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
552 pytdbunpack_err_too_short();
562 return PyInt_FromLong(v);
567 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
570 char *nul_ptr, *start;
574 nul_ptr = memchr(start, '\0', *plen);
576 pytdbunpack_err_too_short();
580 len = nul_ptr - start;
582 *pbuf += len + 1; /* skip \0 */
585 return PyString_Decode(start, len, encoding, NULL);
590 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
592 /* first get 32-bit len */
595 unsigned char *start;
596 PyObject *str_obj = NULL, *len_obj = NULL;
599 pytdbunpack_err_too_short();
604 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
606 if (slen < 0) { /* surely you jest */
607 PyErr_Format(PyExc_ValueError,
608 __FUNCTION__ ": buffer seems to have negative length");
617 PyErr_Format(PyExc_IndexError,
618 __FUNCTION__ ": not enough data to unpack buffer: "
619 "need %d bytes, have %d",
627 if (!(len_obj = PyInt_FromLong(slen)))
630 if (PyList_Append(val_list, len_obj) == -1)
633 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
636 if (PyList_Append(val_list, str_obj) == -1)
642 Py_XDECREF(len_obj); /* handles NULL */
648 /* Unpack a single field from packed data, according to format character CH.
649 Remaining data is at *PBUF, of *PLEN.
651 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
654 Returns a reference to None, or NULL for failure.
656 static PyObject *pytdbunpack_item(char ch,
663 if (ch == 'w') { /* 16-bit int */
664 result = pytdbunpack_int16(pbuf, plen);
666 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
667 /* pointers can just come through as integers */
668 result = pytdbunpack_uint32(pbuf, plen);
670 else if (ch == 'f' || ch == 'P') { /* nul-term string */
671 result = pytdbunpack_string(pbuf, plen, pytdb_dos_encoding);
673 else if (ch == 'F') { /* nul-term string */
674 result = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
676 else if (ch == 'B') { /* length, buffer */
677 return pytdbunpack_buffer(pbuf, plen, val_list);
680 PyErr_Format(PyExc_ValueError,
681 __FUNCTION__ ": format character '%c' is not supported",
690 if (PyList_Append(val_list, result) == -1)
701 static PyMethodDef pytdbpack_methods[] = {
702 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
703 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
709 Py_InitModule3("tdbpack", pytdbpack_methods,
710 (char *) pytdbpack_docstring);