source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
  31 static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
  32 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  33
  34 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
  35
  36 static PyObject *pytdbpack_data(const char *format_str,
  37                                      PyObject *val_seq,
  38                                      PyObject *val_list);
  39
  40 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  41
  42
  43 static PyObject *pytdbpack_bad_type(char ch,
  44                                     const char *expected,
  45                                     PyObject *val_obj);
  46
  47 static const char * pytdbpack_docstring =
  48 "Convert between Python values and Samba binary encodings.
  49
  50 This module is conceptually similar to the standard 'struct' module, but it
  51 uses both a different binary format and a different description string.
  52
  53 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  54 little-endian, unpadded, non-self-describing binary format.  It is intended
  55 that these functions be as similar as possible to the routines in Samba's
  56 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  57
  58 Python strings are used to specify the format of data to be packed or
  59 unpacked.
  60
  61 Strings are always stored in codepage 850.  Unicode objects are translated
  62 to cp850; plain strings are assumed to be in latin-1 and are also
  63 translated.
  64
  65 This may be a problem in the future if it is different to the Samba codepage.
  66 It might be better to have the caller do the conversion, but that would conflict
  67 with existing CMI code.
  68
  69 tdbpack format strings:
  70
  71     'f':  NULL-terminated string in codepage 850
  72
  73     'P':  same as 'f'
  74
  75     'd':  4 byte little-endian unsigned number
  76
  77     'w':  2 byte little-endian unsigned number
  78
  79     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  80           really just an \"exists\" or \"does not exist\" flag.  The boolean
  81           value of the Python object is used.
  82
  83     'B': 4-byte LE length, followed by that many bytes of binary data.
  84          Corresponds to a Python integer giving the length, followed by a byte
  85          string of the appropriate length.
  86
  87     '$': Special flag indicating that the preceding format code should be
  88          repeated while data remains.  This is only supported for unpacking.
  89
  90     Every code corresponds to a single Python object, except 'B' which
  91     corresponds to two values (length and contents), and '$', which produces
  92     however many make sense.
  93 ";
  94
  95
  96 static char const pytdbpack_doc[] =
  97 "pack(format, values) -> buffer
  98 Pack Python objects into Samba binary format according to format string.
  99
 100 arguments:
 101     format -- string of tdbpack format characters
 102     values -- sequence of value objects corresponding 1:1 to format characters
 103
 104 returns:
 105     buffer -- string containing packed data
 106
 107 raises:
 108     IndexError -- if there are too few values for the format
 109     ValueError -- if any of the format characters is illegal
 110     TypeError  -- if the format is not a string, or values is not a sequence,
 111         or any of the values is of the wrong type for the corresponding
 112         format character
 113
 114 notes:
 115     For historical reasons, it is not an error to pass more values than are consumed
 116     by the format.
 117 ";
 118
 119
 120 static char const pytdbpack_unpack_doc[] =
 121 "unpack(format, buffer) -> (values, rest)
 122 Unpack Samba binary data according to format string.
 123
 124 arguments:
 125     format -- string of tdbpack characters
 126     buffer -- string of packed binary data
 127
 128 returns:
 129     2-tuple of:
 130         values -- sequence of values corresponding 1:1 to format characters
 131         rest -- string containing data that was not decoded, or '' if the
 132             whole string was consumed
 133
 134 raises:
 135     IndexError -- if there is insufficient data in the buffer for the
 136         format (or if the data is corrupt and contains a variable-length
 137         field extending past the end)
 138     ValueError -- if any of the format characters is illegal
 139
 140 notes:
 141     Because unconsumed data is returned, you can feed it back in to the
 142     unpacker to extract further fields.  Alternatively, if you wish to modify
 143     some fields near the start of the data, you may be able to save time by
 144     only unpacking and repacking the necessary part.
 145 ";
 146
 147
 148
 149
 150 /*
 151   * Pack objects to bytes.
 152   *
 153   * All objects are first individually encoded onto a list, and then the list
 154   * of strings is concatenated.  This is faster than concatenating strings,
 155   * and reasonably simple to code.
 156   */
 157 static PyObject *
 158 pytdbpack(PyObject *self,
 159                PyObject *args)
 160 {
 161         char *format_str;
 162         PyObject *val_seq, *val_iter = NULL,
 163                 *packed_list = NULL, *packed_str = NULL,
 164                 *empty_str = NULL;
 165
 166         /* TODO: Test passing wrong types or too many arguments */
 167         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 168                 return NULL;
 169
 170         if (!(val_iter = PyObject_GetIter(val_seq)))
 171                 goto out;
 172
 173         /* Create list to hold strings until we're done, then join them all. */
 174         if (!(packed_list = PyList_New(0)))
 175                 goto out;
 176
 177         if (!pytdbpack_data(format_str, val_iter, packed_list))
 178                 goto out;
 179
 180         /* this function is not officially documented but it works */
 181         if (!(empty_str = PyString_InternFromString("")))
 182                 goto out;
 183
 184         packed_str = _PyString_Join(empty_str, packed_list);
 185
 186   out:
 187         Py_XDECREF(empty_str);
 188         Py_XDECREF(val_iter);
 189         Py_XDECREF(packed_list);
 190
 191         return packed_str;
 192 }
 193
 194
 195 /*
 196   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 197   PACKED_BUF.
 198
 199   The string has already been checked out, so we know that VAL_SEQ is large
 200   enough to hold the packed data, and that there are enough value items.
 201   (However, their types may not have been thoroughly checked yet.)
 202
 203   In addition, val_seq is a Python Fast sequence.
 204
 205   Returns NULL for error (with exception set), or None.
 206 */
 207 PyObject *
 208 pytdbpack_data(const char *format_str,
 209                     PyObject *val_iter,
 210                     PyObject *packed_list)
 211 {
 212         int format_i, val_i = 0;
 213
 214         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 215                 char ch = format_str[format_i];
 216
 217                 switch (ch) {
 218                         /* dispatch to the appropriate packer for this type,
 219                            which should pull things off the iterator, and
 220                            append them to the packed_list */
 221                 case 'w':
 222                 case 'd':
 223                 case 'p':
 224                         if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
 225                                 return NULL;
 226                         break;
 227
 228                 case 'f':
 229                 case 'P':
 230                         if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
 231                                 return NULL;
 232                         break;
 233
 234                 case 'B':
 235                         if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
 236                                 return NULL;
 237                         break;
 238
 239                 default:
 240                         PyErr_Format(PyExc_ValueError,
 241                                      "%s: format character '%c' is not supported",
 242                                      __FUNCTION__, ch);
 243                         return NULL;
 244                 }
 245         }
 246
 247         return packed_list;
 248 }
 249
 250
 251 static PyObject *
 252 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 253 {
 254         unsigned long val_long;
 255         PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
 256         PyObject *new_list = NULL;
 257         unsigned char pack_buf[4];
 258
 259         if (!(val_obj = PyIter_Next(val_iter)))
 260                 goto out;
 261
 262         if (!(long_obj = PyNumber_Long(val_obj))) {
 263                 pytdbpack_bad_type(ch, "Number", val_obj);
 264                 goto out;
 265         }
 266
 267         val_long = PyLong_AsUnsignedLong(long_obj);
 268         pack_le_uint32(val_long, pack_buf);
 269
 270         /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
 271            the first two bytes. */
 272
 273         if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
 274                 goto out;
 275
 276         if (PyList_Append(packed_list, result_obj) != -1)
 277                 new_list = packed_list;
 278
 279   out:
 280         Py_XDECREF(val_obj);
 281         Py_XDECREF(long_obj);
 282         Py_XDECREF(result_obj);
 283
 284         return new_list;
 285 }
 286
 287
 288 /*
 289  * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
 290  * return it.
 291  *
 292  * If the input is neither a string nor Unicode, an exception is raised.
 293  *
 294  * If the input is Unicode, then it is converted to CP850.
 295  *
 296  * If the input is a String, then it is converted to Unicode using the default
 297  * decoding method, and then converted to CP850.  This in effect gives
 298  * conversion from latin-1 (currently the PSA's default) to CP850, without
 299  * needing a custom translation table.
 300  *
 301  * I hope this approach avoids being too fragile w.r.t. being passed either
 302  * Unicode or String objects.
 303  */
 304 static PyObject *
 305 pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
 306 {
 307         PyObject *val_obj = NULL;
 308         PyObject *unicode_obj = NULL;
 309         PyObject *cp850_str = NULL;
 310         PyObject *nul_str = NULL;
 311         PyObject *new_list = NULL;
 312
 313         if (!(val_obj = PyIter_Next(val_iter)))
 314                 goto out;
 315
 316         if (PyUnicode_Check(val_obj)) {
 317                 unicode_obj = val_obj;
 318         }
 319         else {
 320                 /* string */
 321                 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 322                         goto out;
 323                 Py_XDECREF(val_obj);
 324                 val_obj = NULL;
 325         }
 326
 327         if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
 328                 goto out;
 329
 330         if (!nul_str)
 331                 /* this is constant and often-used; hold it forever */
 332                 if (!(nul_str = PyString_FromStringAndSize("", 1)))
 333                         goto out;
 334
 335         if ((PyList_Append(packed_list, cp850_str) != -1)
 336             && (PyList_Append(packed_list, nul_str) != -1))
 337                 new_list = packed_list;
 338
 339   out:
 340         Py_XDECREF(unicode_obj);
 341         Py_XDECREF(cp850_str);
 342
 343         return new_list;
 344 }
 345
 346
 347 /*
 348  * Pack (LENGTH, BUFFER) pair onto the list.
 349  *
 350  * The buffer must already be a String, not Unicode, because it contains 8-bit
 351  * untranslated data.  In some cases it will actually be UTF_16_LE data.
 352  */
 353 static PyObject *
 354 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 355 {
 356         PyObject *val_obj;
 357         PyObject *new_list = NULL;
 358
 359         /* pull off integer and stick onto list */
 360         if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 361                 return NULL;
 362
 363         /* this assumes that the string is the right length; the old code did the same. */
 364         if (!(val_obj = PyIter_Next(val_iter)))
 365                 return NULL;
 366
 367         if (!PyString_Check(val_obj)) {
 368                 pytdbpack_bad_type('B', "String", val_obj);
 369                 goto out;
 370         }
 371
 372         if (PyList_Append(packed_list, val_obj) != -1)
 373                 new_list = packed_list;
 374
 375   out:
 376         Py_XDECREF(val_obj);
 377         return new_list;
 378 }
 379
 380
 381 static PyObject *pytdbpack_bad_type(char ch,
 382                                     const char *expected,
 383                                     PyObject *val_obj)
 384 {
 385         PyObject *r = PyObject_Repr(val_obj);
 386         if (!r)
 387                 return NULL;
 388         PyErr_Format(PyExc_TypeError,
 389                      "tdbpack: format '%c' requires %s, not %s",
 390                      ch, expected, PyString_AS_STRING(r));
 391         Py_DECREF(r);
 392         return val_obj;
 393 }
 394
 395
 396 /*
 397   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 398   but I don't know of one in plain libc, and it's probably not a big deal.  I
 399   realize this is kind of dumb because we'll almost always be on x86, but
 400   being safe is important.
 401 */
 402 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 403 {
 404         pbuf[0] =         val_long & 0xff;
 405         pbuf[1] = (val_long >> 8)  & 0xff;
 406         pbuf[2] = (val_long >> 16) & 0xff;
 407         pbuf[3] = (val_long >> 24) & 0xff;
 408 }
 409
 410
 411 static void pack_bytes(long len, const char *from,
 412                        unsigned char **pbuf)
 413 {
 414         memcpy(*pbuf, from, len);
 415         (*pbuf) += len;
 416 }
 417
 418
 419
 420 static PyObject *
 421 pytdbpack_unpack(PyObject *self,
 422                  PyObject *args)
 423 {
 424         char *format_str, *packed_str, *ppacked;
 425         PyObject *val_list = NULL, *ret_tuple = NULL;
 426         PyObject *rest_string = NULL;
 427         int format_len, packed_len;
 428         char last_format = '#'; /* invalid */
 429         int i;
 430
 431         /* get arguments */
 432         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 433                 return NULL;
 434
 435         format_len = strlen(format_str);
 436
 437         /* Allocate list to hold results.  Initially empty, and we append
 438            results as we go along. */
 439         val_list = PyList_New(0);
 440         if (!val_list)
 441                 goto failed;
 442         ret_tuple = PyTuple_New(2);
 443         if (!ret_tuple)
 444                 goto failed;
 445
 446         /* For every object, unpack.  */
 447         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 448                 last_format = format_str[i];
 449                 /* packed_len is reduced in place */
 450                 if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
 451                         goto failed;
 452         }
 453
 454         /* If the last character was '$', keep going until out of space */
 455         if (format_str[i] == '$') {
 456                 if (i == 0) {
 457                         PyErr_Format(PyExc_ValueError,
 458                                      "%s: '$' may not be first character in format",
 459                                      __FUNCTION__);
 460                         return NULL;
 461                 }
 462                 while (packed_len > 0)
 463                         if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
 464                                 goto failed;
 465         }
 466
 467         /* save leftovers for next time */
 468         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 469         if (!rest_string)
 470                 goto failed;
 471
 472         /* return (values, rest) tuple; give up references to them */
 473         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 474         val_list = NULL;
 475         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 476         val_list = NULL;
 477         return ret_tuple;
 478
 479   failed:
 480         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 481            pointers for objects that haven't been allocated yet. */
 482         Py_XDECREF(val_list);
 483         Py_XDECREF(ret_tuple);
 484         Py_XDECREF(rest_string);
 485         return NULL;
 486 }
 487
 488
 489 static void
 490 unpack_err_too_short(void)
 491 {
 492         PyErr_Format(PyExc_IndexError,
 493                      __FUNCTION__ ": data too short for unpack format");
 494 }
 495
 496
 497 static PyObject *
 498 unpack_uint32(char **pbuf, int *plen)
 499 {
 500         unsigned long v;
 501         unsigned char *b;
 502
 503         if (*plen < 4) {
 504                 unpack_err_too_short();
 505                 return NULL;
 506         }
 507
 508         b = *pbuf;
 509         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 510
 511         (*pbuf) += 4;
 512         (*plen) -= 4;
 513
 514         return PyLong_FromUnsignedLong(v);
 515 }
 516
 517
 518 static PyObject *unpack_int16(char **pbuf, int *plen)
 519 {
 520         long v;
 521         unsigned char *b;
 522
 523         if (*plen < 2) {
 524                 unpack_err_too_short();
 525                 return NULL;
 526         }
 527
 528         b = *pbuf;
 529         v = b[0] | b[1]<<8;
 530
 531         (*pbuf) += 2;
 532         (*plen) -= 2;
 533
 534         return PyInt_FromLong(v);
 535 }
 536
 537
 538 static PyObject *
 539 unpack_string(char **pbuf, int *plen)
 540 {
 541         int len;
 542         char *nul_ptr, *start;
 543
 544         start = *pbuf;
 545
 546         nul_ptr = memchr(start, '\0', *plen);
 547         if (!nul_ptr) {
 548                 unpack_err_too_short();
 549                 return NULL;
 550         }
 551
 552         len = nul_ptr - start;
 553
 554         *pbuf += len + 1;       /* skip \0 */
 555         *plen -= len + 1;
 556
 557         return PyString_FromStringAndSize(start, len);
 558 }
 559
 560
 561 static PyObject *
 562 unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 563 {
 564         /* first get 32-bit len */
 565         long slen;
 566         unsigned char *b;
 567         unsigned char *start;
 568         PyObject *str_obj = NULL, *len_obj = NULL;
 569
 570         if (*plen < 4) {
 571                 unpack_err_too_short();
 572                 return NULL;
 573         }
 574
 575         b = *pbuf;
 576         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 577
 578         if (slen < 0) { /* surely you jest */
 579                 PyErr_Format(PyExc_ValueError,
 580                              __FUNCTION__ ": buffer seems to have negative length");
 581                 return NULL;
 582         }
 583
 584         (*pbuf) += 4;
 585         (*plen) -= 4;
 586         start = *pbuf;
 587
 588         if (*plen < slen) {
 589                 PyErr_Format(PyExc_IndexError,
 590                              __FUNCTION__ ": not enough data to unpack buffer: "
 591                              "need %d bytes, have %d",
 592                              (int) slen, *plen);
 593                 return NULL;
 594         }
 595
 596         (*pbuf) += slen;
 597         (*plen) -= slen;
 598
 599         if (!(len_obj = PyInt_FromLong(slen)))
 600                 goto failed;
 601
 602         if (PyList_Append(val_list, len_obj) == -1)
 603                 goto failed;
 604
 605         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 606                 goto failed;
 607
 608         if (PyList_Append(val_list, str_obj) == -1)
 609                 goto failed;
 610
 611         return val_list;
 612
 613   failed:
 614         Py_XDECREF(len_obj);    /* handles NULL */
 615         Py_XDECREF(str_obj);
 616         return NULL;
 617 }
 618
 619
 620 /* Unpack a single field from packed data, according to format character CH.
 621    Remaining data is at *PBUF, of *PLEN.
 622
 623    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 624    been consumed.
 625
 626    Returns a reference to None, or NULL for failure.
 627 */
 628 static PyObject *pytdbpack_unpack_item(char ch,
 629                                        char **pbuf,
 630                                        int *plen,
 631                                        PyObject *val_list)
 632 {
 633         PyObject *result;
 634
 635         if (ch == 'w') {        /* 16-bit int */
 636                 result = unpack_int16(pbuf, plen);
 637         }
 638         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 639                 /* pointers can just come through as integers */
 640                 result = unpack_uint32(pbuf, plen);
 641         }
 642         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 643                 result = unpack_string(pbuf, plen);
 644         }
 645         else if (ch == 'B') { /* length, buffer */
 646                 return unpack_buffer(pbuf, plen, val_list);
 647         }
 648         else {
 649                 PyErr_Format(PyExc_ValueError,
 650                              __FUNCTION__ ": format character '%c' is not supported",
 651                              ch);
 652
 653                 return NULL;
 654         }
 655
 656         /* otherwise OK */
 657         if (!result)
 658                 return NULL;
 659         if (PyList_Append(val_list, result) == -1)
 660                 return NULL;
 661
 662         return val_list;
 663 }
 664
 665
 666
 667
 668
 669
 670 static PyMethodDef pytdbpack_methods[] = {
 671         { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 672         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 673 };
 674
 675 DL_EXPORT(void)
 676 inittdbpack(void)
 677 {
 678         Py_InitModule3("tdbpack", pytdbpack_methods,
 679                        (char *) pytdbpack_docstring);
 680 }