source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
  31 static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
  32 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  33
  34 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
  35
  36 static PyObject *pytdbpack_data(const char *format_str,
  37                                      PyObject *val_seq,
  38                                      PyObject *val_list);
  39
  40 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  41
  42
  43 static PyObject *pytdbpack_bad_type(char ch,
  44                                     const char *expected,
  45                                     PyObject *val_obj);
  46
  47 static const char * pytdbpack_docstring =
  48 "Convert between Python values and Samba binary encodings.
  49
  50 This module is conceptually similar to the standard 'struct' module, but it
  51 uses both a different binary format and a different description string.
  52
  53 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  54 little-endian, unpadded, non-self-describing binary format.  It is intended
  55 that these functions be as similar as possible to the routines in Samba's
  56 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  57
  58 Python strings are used to specify the format of data to be packed or
  59 unpacked.
  60
  61 Strings are always stored in codepage 850.  Unicode objects are translated
  62 to cp850; plain strings are assumed to be in latin-1 and are also
  63 translated.
  64
  65 This may be a problem in the future if it is different to the Samba codepage.
  66 It might be better to have the caller do the conversion, but that would conflict
  67 with existing CMI code.
  68
  69 tdbpack format strings:
  70
  71     'f':  NULL-terminated string in codepage 850
  72
  73     'P':  same as 'f'
  74
  75     'd':  4 byte little-endian unsigned number
  76
  77     'w':  2 byte little-endian unsigned number
  78
  79     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  80           really just an \"exists\" or \"does not exist\" flag.  The boolean
  81           value of the Python object is used.
  82
  83     'B': 4-byte LE length, followed by that many bytes of binary data.
  84          Corresponds to a Python integer giving the length, followed by a byte
  85          string of the appropriate length.
  86
  87     '$': Special flag indicating that the preceding format code should be
  88          repeated while data remains.  This is only supported for unpacking.
  89
  90     Every code corresponds to a single Python object, except 'B' which
  91     corresponds to two values (length and contents), and '$', which produces
  92     however many make sense.
  93 ";
  94
  95
  96 static char const pytdbpack_doc[] =
  97 "pack(format, values) -> buffer
  98 Pack Python objects into Samba binary format according to format string.
  99
 100 arguments:
 101     format -- string of tdbpack format characters
 102     values -- sequence of value objects corresponding 1:1 to format characters
 103
 104 returns:
 105     buffer -- string containing packed data
 106
 107 raises:
 108     IndexError -- if there are too few values for the format
 109     ValueError -- if any of the format characters is illegal
 110     TypeError  -- if the format is not a string, or values is not a sequence,
 111         or any of the values is of the wrong type for the corresponding
 112         format character
 113
 114 notes:
 115     For historical reasons, it is not an error to pass more values than are consumed
 116     by the format.
 117 ";
 118
 119
 120 static char const pytdbunpack_doc[] =
 121 "unpack(format, buffer) -> (values, rest)
 122 Unpack Samba binary data according to format string.
 123
 124 arguments:
 125     format -- string of tdbpack characters
 126     buffer -- string of packed binary data
 127
 128 returns:
 129     2-tuple of:
 130         values -- sequence of values corresponding 1:1 to format characters
 131         rest -- string containing data that was not decoded, or '' if the
 132             whole string was consumed
 133
 134 raises:
 135     IndexError -- if there is insufficient data in the buffer for the
 136         format (or if the data is corrupt and contains a variable-length
 137         field extending past the end)
 138     ValueError -- if any of the format characters is illegal
 139
 140 notes:
 141     Because unconsumed data is returned, you can feed it back in to the
 142     unpacker to extract further fields.  Alternatively, if you wish to modify
 143     some fields near the start of the data, you may be able to save time by
 144     only unpacking and repacking the necessary part.
 145 ";
 146
 147
 148 const char *pytdb_string_encoding = "cp850";
 149
 150
 151 /*
 152   * Pack objects to bytes.
 153   *
 154   * All objects are first individually encoded onto a list, and then the list
 155   * of strings is concatenated.  This is faster than concatenating strings,
 156   * and reasonably simple to code.
 157   */
 158 static PyObject *
 159 pytdbpack(PyObject *self,
 160                PyObject *args)
 161 {
 162         char *format_str;
 163         PyObject *val_seq, *val_iter = NULL,
 164                 *packed_list = NULL, *packed_str = NULL,
 165                 *empty_str = NULL;
 166
 167         /* TODO: Test passing wrong types or too many arguments */
 168         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 169                 return NULL;
 170
 171         if (!(val_iter = PyObject_GetIter(val_seq)))
 172                 goto out;
 173
 174         /* Create list to hold strings until we're done, then join them all. */
 175         if (!(packed_list = PyList_New(0)))
 176                 goto out;
 177
 178         if (!pytdbpack_data(format_str, val_iter, packed_list))
 179                 goto out;
 180
 181         /* this function is not officially documented but it works */
 182         if (!(empty_str = PyString_InternFromString("")))
 183                 goto out;
 184
 185         packed_str = _PyString_Join(empty_str, packed_list);
 186
 187   out:
 188         Py_XDECREF(empty_str);
 189         Py_XDECREF(val_iter);
 190         Py_XDECREF(packed_list);
 191
 192         return packed_str;
 193 }
 194
 195
 196 /*
 197   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 198   PACKED_BUF.
 199
 200   The string has already been checked out, so we know that VAL_SEQ is large
 201   enough to hold the packed data, and that there are enough value items.
 202   (However, their types may not have been thoroughly checked yet.)
 203
 204   In addition, val_seq is a Python Fast sequence.
 205
 206   Returns NULL for error (with exception set), or None.
 207 */
 208 PyObject *
 209 pytdbpack_data(const char *format_str,
 210                     PyObject *val_iter,
 211                     PyObject *packed_list)
 212 {
 213         int format_i, val_i = 0;
 214
 215         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 216                 char ch = format_str[format_i];
 217
 218                 switch (ch) {
 219                         /* dispatch to the appropriate packer for this type,
 220                            which should pull things off the iterator, and
 221                            append them to the packed_list */
 222                 case 'w':
 223                 case 'd':
 224                 case 'p':
 225                         if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
 226                                 return NULL;
 227                         break;
 228
 229                 case 'f':
 230                 case 'P':
 231                         if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
 232                                 return NULL;
 233                         break;
 234
 235                 case 'B':
 236                         if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
 237                                 return NULL;
 238                         break;
 239
 240                 default:
 241                         PyErr_Format(PyExc_ValueError,
 242                                      "%s: format character '%c' is not supported",
 243                                      __FUNCTION__, ch);
 244                         return NULL;
 245                 }
 246         }
 247
 248         return packed_list;
 249 }
 250
 251
 252 static PyObject *
 253 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 254 {
 255         unsigned long val_long;
 256         PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
 257         PyObject *new_list = NULL;
 258         unsigned char pack_buf[4];
 259
 260         if (!(val_obj = PyIter_Next(val_iter)))
 261                 goto out;
 262
 263         if (!(long_obj = PyNumber_Long(val_obj))) {
 264                 pytdbpack_bad_type(ch, "Number", val_obj);
 265                 goto out;
 266         }
 267
 268         val_long = PyLong_AsUnsignedLong(long_obj);
 269         pack_le_uint32(val_long, pack_buf);
 270
 271         /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
 272            the first two bytes. */
 273
 274         if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
 275                 goto out;
 276
 277         if (PyList_Append(packed_list, result_obj) != -1)
 278                 new_list = packed_list;
 279
 280   out:
 281         Py_XDECREF(val_obj);
 282         Py_XDECREF(long_obj);
 283         Py_XDECREF(result_obj);
 284
 285         return new_list;
 286 }
 287
 288
 289 /*
 290  * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
 291  * return it.
 292  *
 293  * If the input is neither a string nor Unicode, an exception is raised.
 294  *
 295  * If the input is Unicode, then it is converted to CP850.
 296  *
 297  * If the input is a String, then it is converted to Unicode using the default
 298  * decoding method, and then converted to CP850.  This in effect gives
 299  * conversion from latin-1 (currently the PSA's default) to CP850, without
 300  * needing a custom translation table.
 301  *
 302  * I hope this approach avoids being too fragile w.r.t. being passed either
 303  * Unicode or String objects.
 304  */
 305 static PyObject *
 306 pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
 307 {
 308         PyObject *val_obj = NULL;
 309         PyObject *unicode_obj = NULL;
 310         PyObject *cp850_str = NULL;
 311         PyObject *nul_str = NULL;
 312         PyObject *new_list = NULL;
 313
 314         if (!(val_obj = PyIter_Next(val_iter)))
 315                 goto out;
 316
 317         if (PyUnicode_Check(val_obj)) {
 318                 unicode_obj = val_obj;
 319         }
 320         else {
 321                 /* string */
 322                 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 323                         goto out;
 324                 Py_XDECREF(val_obj);
 325                 val_obj = NULL;
 326         }
 327
 328         if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, pytdb_string_encoding, NULL)))
 329                 goto out;
 330
 331         if (!nul_str)
 332                 /* this is constant and often-used; hold it forever */
 333                 if (!(nul_str = PyString_FromStringAndSize("", 1)))
 334                         goto out;
 335
 336         if ((PyList_Append(packed_list, cp850_str) != -1)
 337             && (PyList_Append(packed_list, nul_str) != -1))
 338                 new_list = packed_list;
 339
 340   out:
 341         Py_XDECREF(unicode_obj);
 342         Py_XDECREF(cp850_str);
 343
 344         return new_list;
 345 }
 346
 347
 348 /*
 349  * Pack (LENGTH, BUFFER) pair onto the list.
 350  *
 351  * The buffer must already be a String, not Unicode, because it contains 8-bit
 352  * untranslated data.  In some cases it will actually be UTF_16_LE data.
 353  */
 354 static PyObject *
 355 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 356 {
 357         PyObject *val_obj;
 358         PyObject *new_list = NULL;
 359
 360         /* pull off integer and stick onto list */
 361         if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 362                 return NULL;
 363
 364         /* this assumes that the string is the right length; the old code did the same. */
 365         if (!(val_obj = PyIter_Next(val_iter)))
 366                 return NULL;
 367
 368         if (!PyString_Check(val_obj)) {
 369                 pytdbpack_bad_type('B', "String", val_obj);
 370                 goto out;
 371         }
 372
 373         if (PyList_Append(packed_list, val_obj) != -1)
 374                 new_list = packed_list;
 375
 376   out:
 377         Py_XDECREF(val_obj);
 378         return new_list;
 379 }
 380
 381
 382 static PyObject *pytdbpack_bad_type(char ch,
 383                                     const char *expected,
 384                                     PyObject *val_obj)
 385 {
 386         PyObject *r = PyObject_Repr(val_obj);
 387         if (!r)
 388                 return NULL;
 389         PyErr_Format(PyExc_TypeError,
 390                      "tdbpack: format '%c' requires %s, not %s",
 391                      ch, expected, PyString_AS_STRING(r));
 392         Py_DECREF(r);
 393         return val_obj;
 394 }
 395
 396
 397 /*
 398   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 399   but I don't know of one in plain libc, and it's probably not a big deal.  I
 400   realize this is kind of dumb because we'll almost always be on x86, but
 401   being safe is important.
 402 */
 403 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 404 {
 405         pbuf[0] =         val_long & 0xff;
 406         pbuf[1] = (val_long >> 8)  & 0xff;
 407         pbuf[2] = (val_long >> 16) & 0xff;
 408         pbuf[3] = (val_long >> 24) & 0xff;
 409 }
 410
 411
 412 static void pack_bytes(long len, const char *from,
 413                        unsigned char **pbuf)
 414 {
 415         memcpy(*pbuf, from, len);
 416         (*pbuf) += len;
 417 }
 418
 419
 420
 421 static PyObject *
 422 pytdbunpack(PyObject *self,
 423                  PyObject *args)
 424 {
 425         char *format_str, *packed_str, *ppacked;
 426         PyObject *val_list = NULL, *ret_tuple = NULL;
 427         PyObject *rest_string = NULL;
 428         int format_len, packed_len;
 429         char last_format = '#'; /* invalid */
 430         int i;
 431
 432         /* get arguments */
 433         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 434                 return NULL;
 435
 436         format_len = strlen(format_str);
 437
 438         /* Allocate list to hold results.  Initially empty, and we append
 439            results as we go along. */
 440         val_list = PyList_New(0);
 441         if (!val_list)
 442                 goto failed;
 443         ret_tuple = PyTuple_New(2);
 444         if (!ret_tuple)
 445                 goto failed;
 446
 447         /* For every object, unpack.  */
 448         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 449                 last_format = format_str[i];
 450                 /* packed_len is reduced in place */
 451                 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
 452                         goto failed;
 453         }
 454
 455         /* If the last character was '$', keep going until out of space */
 456         if (format_str[i] == '$') {
 457                 if (i == 0) {
 458                         PyErr_Format(PyExc_ValueError,
 459                                      "%s: '$' may not be first character in format",
 460                                      __FUNCTION__);
 461                         return NULL;
 462                 }
 463                 while (packed_len > 0)
 464                         if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
 465                                 goto failed;
 466         }
 467
 468         /* save leftovers for next time */
 469         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 470         if (!rest_string)
 471                 goto failed;
 472
 473         /* return (values, rest) tuple; give up references to them */
 474         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 475         val_list = NULL;
 476         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 477         val_list = NULL;
 478         return ret_tuple;
 479
 480   failed:
 481         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 482            pointers for objects that haven't been allocated yet. */
 483         Py_XDECREF(val_list);
 484         Py_XDECREF(ret_tuple);
 485         Py_XDECREF(rest_string);
 486         return NULL;
 487 }
 488
 489
 490 static void
 491 pytdbunpack_err_too_short(void)
 492 {
 493         PyErr_Format(PyExc_IndexError,
 494                      __FUNCTION__ ": data too short for unpack format");
 495 }
 496
 497
 498 static PyObject *
 499 pytdbunpack_uint32(char **pbuf, int *plen)
 500 {
 501         unsigned long v;
 502         unsigned char *b;
 503
 504         if (*plen < 4) {
 505                 pytdbunpack_err_too_short();
 506                 return NULL;
 507         }
 508
 509         b = *pbuf;
 510         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 511
 512         (*pbuf) += 4;
 513         (*plen) -= 4;
 514
 515         return PyLong_FromUnsignedLong(v);
 516 }
 517
 518
 519 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
 520 {
 521         long v;
 522         unsigned char *b;
 523
 524         if (*plen < 2) {
 525                 pytdbunpack_err_too_short();
 526                 return NULL;
 527         }
 528
 529         b = *pbuf;
 530         v = b[0] | b[1]<<8;
 531
 532         (*pbuf) += 2;
 533         (*plen) -= 2;
 534
 535         return PyInt_FromLong(v);
 536 }
 537
 538
 539 static PyObject *
 540 pytdbunpack_string(char **pbuf, int *plen)
 541 {
 542         int len;
 543         char *nul_ptr, *start;
 544
 545         start = *pbuf;
 546
 547         nul_ptr = memchr(start, '\0', *plen);
 548         if (!nul_ptr) {
 549                 pytdbunpack_err_too_short();
 550                 return NULL;
 551         }
 552
 553         len = nul_ptr - start;
 554
 555         *pbuf += len + 1;       /* skip \0 */
 556         *plen -= len + 1;
 557
 558         return PyString_Decode(start, len, pytdb_string_encoding, NULL);
 559 }
 560
 561
 562 static PyObject *
 563 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 564 {
 565         /* first get 32-bit len */
 566         long slen;
 567         unsigned char *b;
 568         unsigned char *start;
 569         PyObject *str_obj = NULL, *len_obj = NULL;
 570
 571         if (*plen < 4) {
 572                 pytdbunpack_err_too_short();
 573                 return NULL;
 574         }
 575
 576         b = *pbuf;
 577         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 578
 579         if (slen < 0) { /* surely you jest */
 580                 PyErr_Format(PyExc_ValueError,
 581                              __FUNCTION__ ": buffer seems to have negative length");
 582                 return NULL;
 583         }
 584
 585         (*pbuf) += 4;
 586         (*plen) -= 4;
 587         start = *pbuf;
 588
 589         if (*plen < slen) {
 590                 PyErr_Format(PyExc_IndexError,
 591                              __FUNCTION__ ": not enough data to unpack buffer: "
 592                              "need %d bytes, have %d",
 593                              (int) slen, *plen);
 594                 return NULL;
 595         }
 596
 597         (*pbuf) += slen;
 598         (*plen) -= slen;
 599
 600         if (!(len_obj = PyInt_FromLong(slen)))
 601                 goto failed;
 602
 603         if (PyList_Append(val_list, len_obj) == -1)
 604                 goto failed;
 605
 606         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 607                 goto failed;
 608
 609         if (PyList_Append(val_list, str_obj) == -1)
 610                 goto failed;
 611
 612         return val_list;
 613
 614   failed:
 615         Py_XDECREF(len_obj);    /* handles NULL */
 616         Py_XDECREF(str_obj);
 617         return NULL;
 618 }
 619
 620
 621 /* Unpack a single field from packed data, according to format character CH.
 622    Remaining data is at *PBUF, of *PLEN.
 623
 624    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 625    been consumed.
 626
 627    Returns a reference to None, or NULL for failure.
 628 */
 629 static PyObject *pytdbunpack_item(char ch,
 630                                        char **pbuf,
 631                                        int *plen,
 632                                        PyObject *val_list)
 633 {
 634         PyObject *result;
 635
 636         if (ch == 'w') {        /* 16-bit int */
 637                 result = pytdbunpack_int16(pbuf, plen);
 638         }
 639         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 640                 /* pointers can just come through as integers */
 641                 result = pytdbunpack_uint32(pbuf, plen);
 642         }
 643         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 644                 result = pytdbunpack_string(pbuf, plen);
 645         }
 646         else if (ch == 'B') { /* length, buffer */
 647                 return pytdbunpack_buffer(pbuf, plen, val_list);
 648         }
 649         else {
 650                 PyErr_Format(PyExc_ValueError,
 651                              __FUNCTION__ ": format character '%c' is not supported",
 652                              ch);
 653
 654                 return NULL;
 655         }
 656
 657         /* otherwise OK */
 658         if (!result)
 659                 return NULL;
 660         if (PyList_Append(val_list, result) == -1)
 661                 return NULL;
 662
 663         return val_list;
 664 }
 665
 666
 667
 668
 669
 670
 671 static PyMethodDef pytdbpack_methods[] = {
 672         { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 673         { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
 674 };
 675
 676 DL_EXPORT(void)
 677 inittdbpack(void)
 678 {
 679         Py_InitModule3("tdbpack", pytdbpack_methods,
 680                        (char *) pytdbpack_docstring);
 681 }