source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
  31 static PyObject * pytdbpack_str(char ch,
  32                                 PyObject *val_iter, PyObject *packed_list,
  33                                 const char *encoding);
  34 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  35
  36 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
  37
  38 static PyObject *pytdbpack_data(const char *format_str,
  39                                      PyObject *val_seq,
  40                                      PyObject *val_list);
  41
  42 static PyObject *
  43 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
  44
  45 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  46
  47
  48 static PyObject *pytdbpack_bad_type(char ch,
  49                                     const char *expected,
  50                                     PyObject *val_obj);
  51
  52 static const char * pytdbpack_docstring =
  53 "Convert between Python values and Samba binary encodings.
  54
  55 This module is conceptually similar to the standard 'struct' module, but it
  56 uses both a different binary format and a different description string.
  57
  58 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  59 little-endian, unpadded, non-self-describing binary format.  It is intended
  60 that these functions be as similar as possible to the routines in Samba's
  61 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  62
  63 Python strings are used to specify the format of data to be packed or
  64 unpacked.
  65
  66 String encodings are implied by the database format: they may be either DOS
  67 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
  68 to be the same as the default Python encoding).
  69
  70 tdbpack format strings:
  71
  72     'f': NUL-terminated string in codepage 850
  73
  74     'P': same as 'f'
  75
  76     'F': NUL-terminated string in iso-8859-1
  77
  78     'd':  4 byte little-endian unsigned number
  79
  80     'w':  2 byte little-endian unsigned number
  81
  82     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  83           really just an \"exists\" or \"does not exist\" flag.  The boolean
  84           value of the Python object is used.
  85
  86     'B': 4-byte LE length, followed by that many bytes of binary data.
  87          Corresponds to a Python integer giving the length, followed by a byte
  88          string of the appropriate length.
  89
  90     '$': Special flag indicating that the preceding format code should be
  91          repeated while data remains.  This is only supported for unpacking.
  92
  93     Every code corresponds to a single Python object, except 'B' which
  94     corresponds to two values (length and contents), and '$', which produces
  95     however many make sense.
  96 ";
  97
  98
  99 static char const pytdbpack_doc[] =
 100 "pack(format, values) -> buffer
 101 Pack Python objects into Samba binary format according to format string.
 102
 103 arguments:
 104     format -- string of tdbpack format characters
 105     values -- sequence of value objects corresponding 1:1 to format characters
 106
 107 returns:
 108     buffer -- string containing packed data
 109
 110 raises:
 111     IndexError -- if there are too few values for the format
 112     ValueError -- if any of the format characters is illegal
 113     TypeError  -- if the format is not a string, or values is not a sequence,
 114         or any of the values is of the wrong type for the corresponding
 115         format character
 116
 117 notes:
 118     For historical reasons, it is not an error to pass more values than are consumed
 119     by the format.
 120 ";
 121
 122
 123 static char const pytdbunpack_doc[] =
 124 "unpack(format, buffer) -> (values, rest)
 125 Unpack Samba binary data according to format string.
 126
 127 arguments:
 128     format -- string of tdbpack characters
 129     buffer -- string of packed binary data
 130
 131 returns:
 132     2-tuple of:
 133         values -- sequence of values corresponding 1:1 to format characters
 134         rest -- string containing data that was not decoded, or '' if the
 135             whole string was consumed
 136
 137 raises:
 138     IndexError -- if there is insufficient data in the buffer for the
 139         format (or if the data is corrupt and contains a variable-length
 140         field extending past the end)
 141     ValueError -- if any of the format characters is illegal
 142
 143 notes:
 144     Because unconsumed data is returned, you can feed it back in to the
 145     unpacker to extract further fields.  Alternatively, if you wish to modify
 146     some fields near the start of the data, you may be able to save time by
 147     only unpacking and repacking the necessary part.
 148 ";
 149
 150
 151 const char *pytdb_dos_encoding = "cp850";
 152
 153 /* NULL, meaning that the Samba default encoding *must* be the same as the
 154    Python default encoding. */
 155 const char *pytdb_unix_encoding = NULL;
 156
 157
 158 /*
 159   * Pack objects to bytes.
 160   *
 161   * All objects are first individually encoded onto a list, and then the list
 162   * of strings is concatenated.  This is faster than concatenating strings,
 163   * and reasonably simple to code.
 164   */
 165 static PyObject *
 166 pytdbpack(PyObject *self,
 167                PyObject *args)
 168 {
 169         char *format_str;
 170         PyObject *val_seq, *val_iter = NULL,
 171                 *packed_list = NULL, *packed_str = NULL,
 172                 *empty_str = NULL;
 173
 174         /* TODO: Test passing wrong types or too many arguments */
 175         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 176                 return NULL;
 177
 178         if (!(val_iter = PyObject_GetIter(val_seq)))
 179                 goto out;
 180
 181         /* Create list to hold strings until we're done, then join them all. */
 182         if (!(packed_list = PyList_New(0)))
 183                 goto out;
 184
 185         if (!pytdbpack_data(format_str, val_iter, packed_list))
 186                 goto out;
 187
 188         /* this function is not officially documented but it works */
 189         if (!(empty_str = PyString_InternFromString("")))
 190                 goto out;
 191
 192         packed_str = _PyString_Join(empty_str, packed_list);
 193
 194   out:
 195         Py_XDECREF(empty_str);
 196         Py_XDECREF(val_iter);
 197         Py_XDECREF(packed_list);
 198
 199         return packed_str;
 200 }
 201
 202
 203 /*
 204   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 205   PACKED_BUF.
 206
 207   The string has already been checked out, so we know that VAL_SEQ is large
 208   enough to hold the packed data, and that there are enough value items.
 209   (However, their types may not have been thoroughly checked yet.)
 210
 211   In addition, val_seq is a Python Fast sequence.
 212
 213   Returns NULL for error (with exception set), or None.
 214 */
 215 PyObject *
 216 pytdbpack_data(const char *format_str,
 217                     PyObject *val_iter,
 218                     PyObject *packed_list)
 219 {
 220         int format_i, val_i = 0;
 221
 222         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 223                 char ch = format_str[format_i];
 224
 225                 switch (ch) {
 226                         /* dispatch to the appropriate packer for this type,
 227                            which should pull things off the iterator, and
 228                            append them to the packed_list */
 229                 case 'w':
 230                 case 'd':
 231                 case 'p':
 232                         if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
 233                                 return NULL;
 234                         break;
 235
 236                 case 'f':
 237                 case 'P':
 238                         if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_dos_encoding)))
 239                                 return NULL;
 240                         break;
 241
 242                 case 'F':
 243                         /* We specify NULL encoding: Samba databases in this
 244                            form are written in the default Python encoding. */
 245                         if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
 246                                 return NULL;
 247                         break;
 248
 249                 case 'B':
 250                         if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
 251                                 return NULL;
 252                         break;
 253
 254                 default:
 255                         PyErr_Format(PyExc_ValueError,
 256                                      "%s: format character '%c' is not supported",
 257                                      __FUNCTION__, ch);
 258                         return NULL;
 259                 }
 260         }
 261
 262         return packed_list;
 263 }
 264
 265
 266 static PyObject *
 267 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 268 {
 269         unsigned long val_long;
 270         PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
 271         PyObject *new_list = NULL;
 272         unsigned char pack_buf[4];
 273
 274         if (!(val_obj = PyIter_Next(val_iter)))
 275                 goto out;
 276
 277         if (!(long_obj = PyNumber_Long(val_obj))) {
 278                 pytdbpack_bad_type(ch, "Number", val_obj);
 279                 goto out;
 280         }
 281
 282         val_long = PyLong_AsUnsignedLong(long_obj);
 283         pack_le_uint32(val_long, pack_buf);
 284
 285         /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
 286            the first two bytes. */
 287
 288         if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
 289                 goto out;
 290
 291         if (PyList_Append(packed_list, result_obj) != -1)
 292                 new_list = packed_list;
 293
 294   out:
 295         Py_XDECREF(val_obj);
 296         Py_XDECREF(long_obj);
 297         Py_XDECREF(result_obj);
 298
 299         return new_list;
 300 }
 301
 302
 303 /*
 304  * Take one string from the iterator val_iter, convert it to 8-bit, and return
 305  * it.
 306  *
 307  * If the input is neither a string nor Unicode, an exception is raised.
 308  *
 309  * If the input is Unicode, then it is converted to the appropriate encoding.
 310  *
 311  * If the input is a String, and encoding is not null, then it is converted to
 312  * Unicode using the default decoding method, and then converted to the
 313  * encoding.  If the encoding is NULL, then the string is written out as-is --
 314  * this is used when the default Python encoding is the same as the Samba
 315  * encoding.
 316  *
 317  * I hope this approach avoids being too fragile w.r.t. being passed either
 318  * Unicode or String objects.
 319  */
 320 static PyObject *
 321 pytdbpack_str(char ch,
 322               PyObject *val_iter, PyObject *packed_list, const char *encoding)
 323 {
 324         PyObject *val_obj = NULL;
 325         PyObject *unicode_obj = NULL;
 326         PyObject *coded_str = NULL;
 327         PyObject *nul_str = NULL;
 328         PyObject *new_list = NULL;
 329
 330         if (!(val_obj = PyIter_Next(val_iter)))
 331                 goto out;
 332
 333         if (PyUnicode_Check(val_obj)) {
 334                 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
 335                         goto out;
 336         }
 337         else if (PyString_Check(val_obj) && !encoding) {
 338                 /* For efficiency, we assume that the Python interpreter has
 339                    the same default string encoding as Samba's native string
 340                    encoding.  On the PSA, both are always 8859-1. */
 341                 coded_str = val_obj;
 342                 Py_INCREF(coded_str);
 343         }
 344         else if (PyString_Check(val_obj)) {
 345                 /* String, but needs to be converted */
 346                 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 347                         goto out;
 348                 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
 349                         goto out;
 350         }
 351         else {
 352                 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
 353                 goto out;
 354         }
 355
 356         if (!nul_str)
 357                 /* this is constant and often-used; hold it forever */
 358                 if (!(nul_str = PyString_FromStringAndSize("", 1)))
 359                         goto out;
 360
 361         if ((PyList_Append(packed_list, coded_str) != -1)
 362             && (PyList_Append(packed_list, nul_str) != -1))
 363                 new_list = packed_list;
 364
 365   out:
 366         Py_XDECREF(val_obj);
 367         Py_XDECREF(unicode_obj);
 368         Py_XDECREF(coded_str);
 369
 370         return new_list;
 371 }
 372
 373
 374 /*
 375  * Pack (LENGTH, BUFFER) pair onto the list.
 376  *
 377  * The buffer must already be a String, not Unicode, because it contains 8-bit
 378  * untranslated data.  In some cases it will actually be UTF_16_LE data.
 379  */
 380 static PyObject *
 381 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 382 {
 383         PyObject *val_obj;
 384         PyObject *new_list = NULL;
 385
 386         /* pull off integer and stick onto list */
 387         if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 388                 return NULL;
 389
 390         /* this assumes that the string is the right length; the old code did
 391            the same. */
 392         if (!(val_obj = PyIter_Next(val_iter)))
 393                 return NULL;
 394
 395         if (!PyString_Check(val_obj)) {
 396                 pytdbpack_bad_type('B', "String", val_obj);
 397                 goto out;
 398         }
 399
 400         if (PyList_Append(packed_list, val_obj) != -1)
 401                 new_list = packed_list;
 402
 403   out:
 404         Py_XDECREF(val_obj);
 405         return new_list;
 406 }
 407
 408
 409 static PyObject *pytdbpack_bad_type(char ch,
 410                                     const char *expected,
 411                                     PyObject *val_obj)
 412 {
 413         PyObject *r = PyObject_Repr(val_obj);
 414         if (!r)
 415                 return NULL;
 416         PyErr_Format(PyExc_TypeError,
 417                      "tdbpack: format '%c' requires %s, not %s",
 418                      ch, expected, PyString_AS_STRING(r));
 419         Py_DECREF(r);
 420         return val_obj;
 421 }
 422
 423
 424 /*
 425   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 426   but I don't know of one in plain libc, and it's probably not a big deal.  I
 427   realize this is kind of dumb because we'll almost always be on x86, but
 428   being safe is important.
 429 */
 430 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 431 {
 432         pbuf[0] =         val_long & 0xff;
 433         pbuf[1] = (val_long >> 8)  & 0xff;
 434         pbuf[2] = (val_long >> 16) & 0xff;
 435         pbuf[3] = (val_long >> 24) & 0xff;
 436 }
 437
 438
 439 static void pack_bytes(long len, const char *from,
 440                        unsigned char **pbuf)
 441 {
 442         memcpy(*pbuf, from, len);
 443         (*pbuf) += len;
 444 }
 445
 446
 447
 448 static PyObject *
 449 pytdbunpack(PyObject *self,
 450                  PyObject *args)
 451 {
 452         char *format_str, *packed_str, *ppacked;
 453         PyObject *val_list = NULL, *ret_tuple = NULL;
 454         PyObject *rest_string = NULL;
 455         int format_len, packed_len;
 456         char last_format = '#'; /* invalid */
 457         int i;
 458
 459         /* get arguments */
 460         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 461                 return NULL;
 462
 463         format_len = strlen(format_str);
 464
 465         /* Allocate list to hold results.  Initially empty, and we append
 466            results as we go along. */
 467         val_list = PyList_New(0);
 468         if (!val_list)
 469                 goto failed;
 470         ret_tuple = PyTuple_New(2);
 471         if (!ret_tuple)
 472                 goto failed;
 473
 474         /* For every object, unpack.  */
 475         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 476                 last_format = format_str[i];
 477                 /* packed_len is reduced in place */
 478                 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
 479                         goto failed;
 480         }
 481
 482         /* If the last character was '$', keep going until out of space */
 483         if (format_str[i] == '$') {
 484                 if (i == 0) {
 485                         PyErr_Format(PyExc_ValueError,
 486                                      "%s: '$' may not be first character in format",
 487                                      __FUNCTION__);
 488                         return NULL;
 489                 }
 490                 while (packed_len > 0)
 491                         if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
 492                                 goto failed;
 493         }
 494
 495         /* save leftovers for next time */
 496         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 497         if (!rest_string)
 498                 goto failed;
 499
 500         /* return (values, rest) tuple; give up references to them */
 501         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 502         val_list = NULL;
 503         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 504         val_list = NULL;
 505         return ret_tuple;
 506
 507   failed:
 508         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 509            pointers for objects that haven't been allocated yet. */
 510         Py_XDECREF(val_list);
 511         Py_XDECREF(ret_tuple);
 512         Py_XDECREF(rest_string);
 513         return NULL;
 514 }
 515
 516
 517 static void
 518 pytdbunpack_err_too_short(void)
 519 {
 520         PyErr_Format(PyExc_IndexError,
 521                      __FUNCTION__ ": data too short for unpack format");
 522 }
 523
 524
 525 static PyObject *
 526 pytdbunpack_uint32(char **pbuf, int *plen)
 527 {
 528         unsigned long v;
 529         unsigned char *b;
 530
 531         if (*plen < 4) {
 532                 pytdbunpack_err_too_short();
 533                 return NULL;
 534         }
 535
 536         b = *pbuf;
 537         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 538
 539         (*pbuf) += 4;
 540         (*plen) -= 4;
 541
 542         return PyLong_FromUnsignedLong(v);
 543 }
 544
 545
 546 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
 547 {
 548         long v;
 549         unsigned char *b;
 550
 551         if (*plen < 2) {
 552                 pytdbunpack_err_too_short();
 553                 return NULL;
 554         }
 555
 556         b = *pbuf;
 557         v = b[0] | b[1]<<8;
 558
 559         (*pbuf) += 2;
 560         (*plen) -= 2;
 561
 562         return PyInt_FromLong(v);
 563 }
 564
 565
 566 static PyObject *
 567 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
 568 {
 569         int len;
 570         char *nul_ptr, *start;
 571
 572         start = *pbuf;
 573
 574         nul_ptr = memchr(start, '\0', *plen);
 575         if (!nul_ptr) {
 576                 pytdbunpack_err_too_short();
 577                 return NULL;
 578         }
 579
 580         len = nul_ptr - start;
 581
 582         *pbuf += len + 1;       /* skip \0 */
 583         *plen -= len + 1;
 584
 585         return PyString_Decode(start, len, encoding, NULL);
 586 }
 587
 588
 589 static PyObject *
 590 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 591 {
 592         /* first get 32-bit len */
 593         long slen;
 594         unsigned char *b;
 595         unsigned char *start;
 596         PyObject *str_obj = NULL, *len_obj = NULL;
 597
 598         if (*plen < 4) {
 599                 pytdbunpack_err_too_short();
 600                 return NULL;
 601         }
 602
 603         b = *pbuf;
 604         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 605
 606         if (slen < 0) { /* surely you jest */
 607                 PyErr_Format(PyExc_ValueError,
 608                              __FUNCTION__ ": buffer seems to have negative length");
 609                 return NULL;
 610         }
 611
 612         (*pbuf) += 4;
 613         (*plen) -= 4;
 614         start = *pbuf;
 615
 616         if (*plen < slen) {
 617                 PyErr_Format(PyExc_IndexError,
 618                              __FUNCTION__ ": not enough data to unpack buffer: "
 619                              "need %d bytes, have %d",
 620                              (int) slen, *plen);
 621                 return NULL;
 622         }
 623
 624         (*pbuf) += slen;
 625         (*plen) -= slen;
 626
 627         if (!(len_obj = PyInt_FromLong(slen)))
 628                 goto failed;
 629
 630         if (PyList_Append(val_list, len_obj) == -1)
 631                 goto failed;
 632
 633         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 634                 goto failed;
 635
 636         if (PyList_Append(val_list, str_obj) == -1)
 637                 goto failed;
 638
 639         return val_list;
 640
 641   failed:
 642         Py_XDECREF(len_obj);    /* handles NULL */
 643         Py_XDECREF(str_obj);
 644         return NULL;
 645 }
 646
 647
 648 /* Unpack a single field from packed data, according to format character CH.
 649    Remaining data is at *PBUF, of *PLEN.
 650
 651    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 652    been consumed.
 653
 654    Returns a reference to None, or NULL for failure.
 655 */
 656 static PyObject *pytdbunpack_item(char ch,
 657                                        char **pbuf,
 658                                        int *plen,
 659                                        PyObject *val_list)
 660 {
 661         PyObject *result;
 662
 663         if (ch == 'w') {        /* 16-bit int */
 664                 result = pytdbunpack_int16(pbuf, plen);
 665         }
 666         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 667                 /* pointers can just come through as integers */
 668                 result = pytdbunpack_uint32(pbuf, plen);
 669         }
 670         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 671                 result = pytdbunpack_string(pbuf, plen, pytdb_dos_encoding);
 672         }
 673         else if (ch == 'F') { /* nul-term string  */
 674                 result = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
 675         }
 676         else if (ch == 'B') { /* length, buffer */
 677                 return pytdbunpack_buffer(pbuf, plen, val_list);
 678         }
 679         else {
 680                 PyErr_Format(PyExc_ValueError,
 681                              __FUNCTION__ ": format character '%c' is not supported",
 682                              ch);
 683
 684                 return NULL;
 685         }
 686
 687         /* otherwise OK */
 688         if (!result)
 689                 return NULL;
 690         if (PyList_Append(val_list, result) == -1)
 691                 return NULL;
 692
 693         return val_list;
 694 }
 695
 696
 697
 698
 699
 700
 701 static PyMethodDef pytdbpack_methods[] = {
 702         { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 703         { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
 704 };
 705
 706 DL_EXPORT(void)
 707 inittdbpack(void)
 708 {
 709         Py_InitModule3("tdbpack", pytdbpack_methods,
 710                        (char *) pytdbpack_docstring);
 711 }