source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
  31 static PyObject * pytdbpack_str(char ch,
  32                                 PyObject *val_iter, PyObject *packed_list,
  33                                 const char *encoding);
  34 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  35
  36 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
  37
  38 static PyObject *pytdbpack_data(const char *format_str,
  39                                      PyObject *val_seq,
  40                                      PyObject *val_list);
  41
  42 static PyObject *
  43 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
  44
  45 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  46
  47
  48 static PyObject *pytdbpack_bad_type(char ch,
  49                                     const char *expected,
  50                                     PyObject *val_obj);
  51
  52 static const char * pytdbpack_docstring =
  53 "Convert between Python values and Samba binary encodings.
  54
  55 This module is conceptually similar to the standard 'struct' module, but it
  56 uses both a different binary format and a different description string.
  57
  58 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  59 little-endian, unpadded, non-self-describing binary format.  It is intended
  60 that these functions be as similar as possible to the routines in Samba's
  61 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  62
  63 Python strings are used to specify the format of data to be packed or
  64 unpacked.
  65
  66 String encodings are implied by the database format: they may be either DOS
  67 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
  68 to be the same as the default Python encoding).
  69
  70 tdbpack format strings:
  71
  72     'f': NUL-terminated string in codepage iso8859-1
  73
  74     'P': same as 'f'
  75
  76     'F': NUL-terminated string in iso-8859-1
  77
  78     'd':  4 byte little-endian unsigned number
  79
  80     'w':  2 byte little-endian unsigned number
  81
  82     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  83           really just an \"exists\" or \"does not exist\" flag.  The boolean
  84           value of the Python object is used.
  85
  86     'B': 4-byte LE length, followed by that many bytes of binary data.
  87          Corresponds to a Python integer giving the length, followed by a byte
  88          string of the appropriate length.
  89
  90     '$': Special flag indicating that the preceding format code should be
  91          repeated while data remains.  This is only supported for unpacking.
  92
  93     Every code corresponds to a single Python object, except 'B' which
  94     corresponds to two values (length and contents), and '$', which produces
  95     however many make sense.
  96 ";
  97
  98
  99 static char const pytdbpack_doc[] =
 100 "pack(format, values) -> buffer
 101 Pack Python objects into Samba binary format according to format string.
 102
 103 arguments:
 104     format -- string of tdbpack format characters
 105     values -- sequence of value objects corresponding 1:1 to format characters
 106
 107 returns:
 108     buffer -- string containing packed data
 109
 110 raises:
 111     IndexError -- if there are too few values for the format
 112     ValueError -- if any of the format characters is illegal
 113     TypeError  -- if the format is not a string, or values is not a sequence,
 114         or any of the values is of the wrong type for the corresponding
 115         format character
 116
 117 notes:
 118     For historical reasons, it is not an error to pass more values than are consumed
 119     by the format.
 120 ";
 121
 122
 123 static char const pytdbunpack_doc[] =
 124 "unpack(format, buffer) -> (values, rest)
 125 Unpack Samba binary data according to format string.
 126
 127 arguments:
 128     format -- string of tdbpack characters
 129     buffer -- string of packed binary data
 130
 131 returns:
 132     2-tuple of:
 133         values -- sequence of values corresponding 1:1 to format characters
 134         rest -- string containing data that was not decoded, or '' if the
 135             whole string was consumed
 136
 137 raises:
 138     IndexError -- if there is insufficient data in the buffer for the
 139         format (or if the data is corrupt and contains a variable-length
 140         field extending past the end)
 141     ValueError -- if any of the format characters is illegal
 142
 143 notes:
 144     Because unconsumed data is returned, you can feed it back in to the
 145     unpacker to extract further fields.  Alternatively, if you wish to modify
 146     some fields near the start of the data, you may be able to save time by
 147     only unpacking and repacking the necessary part.
 148 ";
 149
 150
 151 const char *pytdb_dos_encoding = "cp850";
 152
 153 /* NULL, meaning that the Samba default encoding *must* be the same as the
 154    Python default encoding. */
 155 const char *pytdb_unix_encoding = NULL;
 156
 157
 158 /*
 159   * Pack objects to bytes.
 160   *
 161   * All objects are first individually encoded onto a list, and then the list
 162   * of strings is concatenated.  This is faster than concatenating strings,
 163   * and reasonably simple to code.
 164   */
 165 static PyObject *
 166 pytdbpack(PyObject *self,
 167                PyObject *args)
 168 {
 169         char *format_str;
 170         PyObject *val_seq, *val_iter = NULL,
 171                 *packed_list = NULL, *packed_str = NULL,
 172                 *empty_str = NULL;
 173
 174         /* TODO: Test passing wrong types or too many arguments */
 175         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 176                 return NULL;
 177
 178         if (!(val_iter = PyObject_GetIter(val_seq)))
 179                 goto out;
 180
 181         /* Create list to hold strings until we're done, then join them all. */
 182         if (!(packed_list = PyList_New(0)))
 183                 goto out;
 184
 185         if (!pytdbpack_data(format_str, val_iter, packed_list))
 186                 goto out;
 187
 188         /* this function is not officially documented but it works */
 189         if (!(empty_str = PyString_InternFromString("")))
 190                 goto out;
 191
 192         packed_str = _PyString_Join(empty_str, packed_list);
 193
 194   out:
 195         Py_XDECREF(empty_str);
 196         Py_XDECREF(val_iter);
 197         Py_XDECREF(packed_list);
 198
 199         return packed_str;
 200 }
 201
 202
 203 /*
 204   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 205   PACKED_BUF.
 206
 207   The string has already been checked out, so we know that VAL_SEQ is large
 208   enough to hold the packed data, and that there are enough value items.
 209   (However, their types may not have been thoroughly checked yet.)
 210
 211   In addition, val_seq is a Python Fast sequence.
 212
 213   Returns NULL for error (with exception set), or None.
 214 */
 215 PyObject *
 216 pytdbpack_data(const char *format_str,
 217                     PyObject *val_iter,
 218                     PyObject *packed_list)
 219 {
 220         int format_i, val_i = 0;
 221
 222         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 223                 char ch = format_str[format_i];
 224
 225                 switch (ch) {
 226                         /* dispatch to the appropriate packer for this type,
 227                            which should pull things off the iterator, and
 228                            append them to the packed_list */
 229                 case 'w':
 230                 case 'd':
 231                 case 'p':
 232                         if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
 233                                 return NULL;
 234                         break;
 235
 236                 case 'f':
 237                 case 'P':
 238                         if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
 239                                 return NULL;
 240                         break;
 241
 242                 case 'B':
 243                         if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
 244                                 return NULL;
 245                         break;
 246
 247                 default:
 248                         PyErr_Format(PyExc_ValueError,
 249                                      "%s: format character '%c' is not supported",
 250                                      FUNCTION_MACRO, ch);
 251                         return NULL;
 252                 }
 253         }
 254
 255         return packed_list;
 256 }
 257
 258
 259 static PyObject *
 260 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 261 {
 262         unsigned long val_long;
 263         PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
 264         PyObject *new_list = NULL;
 265         unsigned char pack_buf[4];
 266
 267         if (!(val_obj = PyIter_Next(val_iter)))
 268                 goto out;
 269
 270         if (!(long_obj = PyNumber_Long(val_obj))) {
 271                 pytdbpack_bad_type(ch, "Number", val_obj);
 272                 goto out;
 273         }
 274
 275         val_long = PyLong_AsUnsignedLong(long_obj);
 276         pack_le_uint32(val_long, pack_buf);
 277
 278         /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
 279            the first two bytes. */
 280
 281         if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
 282                 goto out;
 283
 284         if (PyList_Append(packed_list, result_obj) != -1)
 285                 new_list = packed_list;
 286
 287   out:
 288         Py_XDECREF(val_obj);
 289         Py_XDECREF(long_obj);
 290         Py_XDECREF(result_obj);
 291
 292         return new_list;
 293 }
 294
 295
 296 /*
 297  * Take one string from the iterator val_iter, convert it to 8-bit, and return
 298  * it.
 299  *
 300  * If the input is neither a string nor Unicode, an exception is raised.
 301  *
 302  * If the input is Unicode, then it is converted to the appropriate encoding.
 303  *
 304  * If the input is a String, and encoding is not null, then it is converted to
 305  * Unicode using the default decoding method, and then converted to the
 306  * encoding.  If the encoding is NULL, then the string is written out as-is --
 307  * this is used when the default Python encoding is the same as the Samba
 308  * encoding.
 309  *
 310  * I hope this approach avoids being too fragile w.r.t. being passed either
 311  * Unicode or String objects.
 312  */
 313 static PyObject *
 314 pytdbpack_str(char ch,
 315               PyObject *val_iter, PyObject *packed_list, const char *encoding)
 316 {
 317         PyObject *val_obj = NULL;
 318         PyObject *unicode_obj = NULL;
 319         PyObject *coded_str = NULL;
 320         PyObject *nul_str = NULL;
 321         PyObject *new_list = NULL;
 322
 323         if (!(val_obj = PyIter_Next(val_iter)))
 324                 goto out;
 325
 326         if (PyUnicode_Check(val_obj)) {
 327                 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
 328                         goto out;
 329         }
 330         else if (PyString_Check(val_obj) && !encoding) {
 331                 /* For efficiency, we assume that the Python interpreter has
 332                    the same default string encoding as Samba's native string
 333                    encoding.  On the PSA, both are always 8859-1. */
 334                 coded_str = val_obj;
 335                 Py_INCREF(coded_str);
 336         }
 337         else if (PyString_Check(val_obj)) {
 338                 /* String, but needs to be converted */
 339                 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 340                         goto out;
 341                 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
 342                         goto out;
 343         }
 344         else {
 345                 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
 346                 goto out;
 347         }
 348
 349         if (!nul_str)
 350                 /* this is constant and often-used; hold it forever */
 351                 if (!(nul_str = PyString_FromStringAndSize("", 1)))
 352                         goto out;
 353
 354         if ((PyList_Append(packed_list, coded_str) != -1)
 355             && (PyList_Append(packed_list, nul_str) != -1))
 356                 new_list = packed_list;
 357
 358   out:
 359         Py_XDECREF(val_obj);
 360         Py_XDECREF(unicode_obj);
 361         Py_XDECREF(coded_str);
 362
 363         return new_list;
 364 }
 365
 366
 367 /*
 368  * Pack (LENGTH, BUFFER) pair onto the list.
 369  *
 370  * The buffer must already be a String, not Unicode, because it contains 8-bit
 371  * untranslated data.  In some cases it will actually be UTF_16_LE data.
 372  */
 373 static PyObject *
 374 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 375 {
 376         PyObject *val_obj;
 377         PyObject *new_list = NULL;
 378
 379         /* pull off integer and stick onto list */
 380         if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 381                 return NULL;
 382
 383         /* this assumes that the string is the right length; the old code did
 384            the same. */
 385         if (!(val_obj = PyIter_Next(val_iter)))
 386                 return NULL;
 387
 388         if (!PyString_Check(val_obj)) {
 389                 pytdbpack_bad_type('B', "String", val_obj);
 390                 goto out;
 391         }
 392
 393         if (PyList_Append(packed_list, val_obj) != -1)
 394                 new_list = packed_list;
 395
 396   out:
 397         Py_XDECREF(val_obj);
 398         return new_list;
 399 }
 400
 401
 402 static PyObject *pytdbpack_bad_type(char ch,
 403                                     const char *expected,
 404                                     PyObject *val_obj)
 405 {
 406         PyObject *r = PyObject_Repr(val_obj);
 407         if (!r)
 408                 return NULL;
 409         PyErr_Format(PyExc_TypeError,
 410                      "tdbpack: format '%c' requires %s, not %s",
 411                      ch, expected, PyString_AS_STRING(r));
 412         Py_DECREF(r);
 413         return val_obj;
 414 }
 415
 416
 417 /*
 418   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 419   but I don't know of one in plain libc, and it's probably not a big deal.  I
 420   realize this is kind of dumb because we'll almost always be on x86, but
 421   being safe is important.
 422 */
 423 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 424 {
 425         pbuf[0] =         val_long & 0xff;
 426         pbuf[1] = (val_long >> 8)  & 0xff;
 427         pbuf[2] = (val_long >> 16) & 0xff;
 428         pbuf[3] = (val_long >> 24) & 0xff;
 429 }
 430
 431
 432 static void pack_bytes(long len, const char *from,
 433                        unsigned char **pbuf)
 434 {
 435         memcpy(*pbuf, from, len);
 436         (*pbuf) += len;
 437 }
 438
 439
 440
 441 static PyObject *
 442 pytdbunpack(PyObject *self,
 443                  PyObject *args)
 444 {
 445         char *format_str, *packed_str, *ppacked;
 446         PyObject *val_list = NULL, *ret_tuple = NULL;
 447         PyObject *rest_string = NULL;
 448         int format_len, packed_len;
 449         char last_format = '#'; /* invalid */
 450         int i;
 451
 452         /* get arguments */
 453         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 454                 return NULL;
 455
 456         format_len = strlen(format_str);
 457
 458         /* Allocate list to hold results.  Initially empty, and we append
 459            results as we go along. */
 460         val_list = PyList_New(0);
 461         if (!val_list)
 462                 goto failed;
 463         ret_tuple = PyTuple_New(2);
 464         if (!ret_tuple)
 465                 goto failed;
 466
 467         /* For every object, unpack.  */
 468         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 469                 last_format = format_str[i];
 470                 /* packed_len is reduced in place */
 471                 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
 472                         goto failed;
 473         }
 474
 475         /* If the last character was '$', keep going until out of space */
 476         if (format_str[i] == '$') {
 477                 if (i == 0) {
 478                         PyErr_Format(PyExc_ValueError,
 479                                      "%s: '$' may not be first character in format",
 480                                      FUNCTION_MACRO);
 481                         return NULL;
 482                 }
 483                 while (packed_len > 0)
 484                         if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
 485                                 goto failed;
 486         }
 487
 488         /* save leftovers for next time */
 489         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 490         if (!rest_string)
 491                 goto failed;
 492
 493         /* return (values, rest) tuple; give up references to them */
 494         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 495         val_list = NULL;
 496         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 497         val_list = NULL;
 498         return ret_tuple;
 499
 500   failed:
 501         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 502            pointers for objects that haven't been allocated yet. */
 503         Py_XDECREF(val_list);
 504         Py_XDECREF(ret_tuple);
 505         Py_XDECREF(rest_string);
 506         return NULL;
 507 }
 508
 509
 510 static void
 511 pytdbunpack_err_too_short(void)
 512 {
 513         PyErr_Format(PyExc_IndexError,
 514                      FUNCTION_MACRO ": data too short for unpack format");
 515 }
 516
 517
 518 static PyObject *
 519 pytdbunpack_uint32(char **pbuf, int *plen)
 520 {
 521         unsigned long v;
 522         unsigned char *b;
 523
 524         if (*plen < 4) {
 525                 pytdbunpack_err_too_short();
 526                 return NULL;
 527         }
 528
 529         b = *pbuf;
 530         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 531
 532         (*pbuf) += 4;
 533         (*plen) -= 4;
 534
 535         return PyLong_FromUnsignedLong(v);
 536 }
 537
 538
 539 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
 540 {
 541         long v;
 542         unsigned char *b;
 543
 544         if (*plen < 2) {
 545                 pytdbunpack_err_too_short();
 546                 return NULL;
 547         }
 548
 549         b = *pbuf;
 550         v = b[0] | b[1]<<8;
 551
 552         (*pbuf) += 2;
 553         (*plen) -= 2;
 554
 555         return PyInt_FromLong(v);
 556 }
 557
 558
 559 static PyObject *
 560 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
 561 {
 562         int len;
 563         char *nul_ptr, *start;
 564
 565         start = *pbuf;
 566
 567         nul_ptr = memchr(start, '\0', *plen);
 568         if (!nul_ptr) {
 569                 pytdbunpack_err_too_short();
 570                 return NULL;
 571         }
 572
 573         len = nul_ptr - start;
 574
 575         *pbuf += len + 1;       /* skip \0 */
 576         *plen -= len + 1;
 577
 578         return PyString_Decode(start, len, encoding, NULL);
 579 }
 580
 581
 582 static PyObject *
 583 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 584 {
 585         /* first get 32-bit len */
 586         long slen;
 587         unsigned char *b;
 588         unsigned char *start;
 589         PyObject *str_obj = NULL, *len_obj = NULL;
 590
 591         if (*plen < 4) {
 592                 pytdbunpack_err_too_short();
 593                 return NULL;
 594         }
 595
 596         b = *pbuf;
 597         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 598
 599         if (slen < 0) { /* surely you jest */
 600                 PyErr_Format(PyExc_ValueError,
 601                              FUNCTION_MACRO ": buffer seems to have negative length");
 602                 return NULL;
 603         }
 604
 605         (*pbuf) += 4;
 606         (*plen) -= 4;
 607         start = *pbuf;
 608
 609         if (*plen < slen) {
 610                 PyErr_Format(PyExc_IndexError,
 611                              FUNCTION_MACRO ": not enough data to unpack buffer: "
 612                              "need %d bytes, have %d",
 613                              (int) slen, *plen);
 614                 return NULL;
 615         }
 616
 617         (*pbuf) += slen;
 618         (*plen) -= slen;
 619
 620         if (!(len_obj = PyInt_FromLong(slen)))
 621                 goto failed;
 622
 623         if (PyList_Append(val_list, len_obj) == -1)
 624                 goto failed;
 625
 626         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 627                 goto failed;
 628
 629         if (PyList_Append(val_list, str_obj) == -1)
 630                 goto failed;
 631
 632         return val_list;
 633
 634   failed:
 635         Py_XDECREF(len_obj);    /* handles NULL */
 636         Py_XDECREF(str_obj);
 637         return NULL;
 638 }
 639
 640
 641 /* Unpack a single field from packed data, according to format character CH.
 642    Remaining data is at *PBUF, of *PLEN.
 643
 644    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 645    been consumed.
 646
 647    Returns a reference to None, or NULL for failure.
 648 */
 649 static PyObject *pytdbunpack_item(char ch,
 650                                        char **pbuf,
 651                                        int *plen,
 652                                        PyObject *val_list)
 653 {
 654         PyObject *result;
 655
 656         if (ch == 'w') {        /* 16-bit int */
 657                 result = pytdbunpack_int16(pbuf, plen);
 658         }
 659         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 660                 /* pointers can just come through as integers */
 661                 result = pytdbunpack_uint32(pbuf, plen);
 662         }
 663         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 664                 result = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
 665         }
 666         else if (ch == 'B') { /* length, buffer */
 667                 return pytdbunpack_buffer(pbuf, plen, val_list);
 668         }
 669         else {
 670                 PyErr_Format(PyExc_ValueError,
 671                              FUNCTION_MACRO ": format character '%c' is not supported",
 672                              ch);
 673
 674                 return NULL;
 675         }
 676
 677         /* otherwise OK */
 678         if (!result)
 679                 return NULL;
 680         if (PyList_Append(val_list, result) == -1)
 681                 return NULL;
 682
 683         return val_list;
 684 }
 685
 686
 687
 688
 689
 690
 691 static PyMethodDef pytdbpack_methods[] = {
 692         { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 693         { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
 694 };
 695
 696 DL_EXPORT(void)
 697 inittdbpack(void)
 698 {
 699         Py_InitModule3("tdbpack", pytdbpack_methods,
 700                        (char *) pytdbpack_docstring);
 701 }