source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26 #include "Python.h"
  27
  28 /* This module is supposed to be standalone, however for portability
  29    it would be good to use the FUNCTION_MACRO preprocessor define. */
  30
  31 #include "include/config.h"
  32
  33 #ifdef HAVE_FUNCTION_MACRO
  34 #define FUNCTION_MACRO  (__FUNCTION__)
  35 #else
  36 #define FUNCTION_MACRO  (__FILE__)
  37 #endif
  38
  39 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
  40 static PyObject * pytdbpack_str(char ch,
  41                                 PyObject *val_iter, PyObject *packed_list,
  42                                 const char *encoding);
  43 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
  44
  45 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
  46
  47 static PyObject *pytdbpack_data(const char *format_str,
  48                                      PyObject *val_seq,
  49                                      PyObject *val_list);
  50
  51 static PyObject *
  52 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
  53
  54 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
  55
  56
  57 static PyObject *pytdbpack_bad_type(char ch,
  58                                     const char *expected,
  59                                     PyObject *val_obj);
  60
  61 static const char * pytdbpack_docstring =
  62 "Convert between Python values and Samba binary encodings.
  63
  64 This module is conceptually similar to the standard 'struct' module, but it
  65 uses both a different binary format and a different description string.
  66
  67 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  68 little-endian, unpadded, non-self-describing binary format.  It is intended
  69 that these functions be as similar as possible to the routines in Samba's
  70 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  71
  72 Python strings are used to specify the format of data to be packed or
  73 unpacked.
  74
  75 String encodings are implied by the database format: they may be either DOS
  76 codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
  77 to be the same as the default Python encoding).
  78
  79 tdbpack format strings:
  80
  81     'f': NUL-terminated string in codepage iso8859-1
  82
  83     'P': same as 'f'
  84
  85     'F': NUL-terminated string in iso-8859-1
  86
  87     'd':  4 byte little-endian unsigned number
  88
  89     'w':  2 byte little-endian unsigned number
  90
  91     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  92           really just an \"exists\" or \"does not exist\" flag.  The boolean
  93           value of the Python object is used.
  94
  95     'B': 4-byte LE length, followed by that many bytes of binary data.
  96          Corresponds to a Python integer giving the length, followed by a byte
  97          string of the appropriate length.
  98
  99     '$': Special flag indicating that the preceding format code should be
 100          repeated while data remains.  This is only supported for unpacking.
 101
 102     Every code corresponds to a single Python object, except 'B' which
 103     corresponds to two values (length and contents), and '$', which produces
 104     however many make sense.
 105 ";
 106
 107
 108 static char const pytdbpack_doc[] =
 109 "pack(format, values) -> buffer
 110 Pack Python objects into Samba binary format according to format string.
 111
 112 arguments:
 113     format -- string of tdbpack format characters
 114     values -- sequence of value objects corresponding 1:1 to format characters
 115
 116 returns:
 117     buffer -- string containing packed data
 118
 119 raises:
 120     IndexError -- if there are too few values for the format
 121     ValueError -- if any of the format characters is illegal
 122     TypeError  -- if the format is not a string, or values is not a sequence,
 123         or any of the values is of the wrong type for the corresponding
 124         format character
 125
 126 notes:
 127     For historical reasons, it is not an error to pass more values than are consumed
 128     by the format.
 129 ";
 130
 131
 132 static char const pytdbunpack_doc[] =
 133 "unpack(format, buffer) -> (values, rest)
 134 Unpack Samba binary data according to format string.
 135
 136 arguments:
 137     format -- string of tdbpack characters
 138     buffer -- string of packed binary data
 139
 140 returns:
 141     2-tuple of:
 142         values -- sequence of values corresponding 1:1 to format characters
 143         rest -- string containing data that was not decoded, or '' if the
 144             whole string was consumed
 145
 146 raises:
 147     IndexError -- if there is insufficient data in the buffer for the
 148         format (or if the data is corrupt and contains a variable-length
 149         field extending past the end)
 150     ValueError -- if any of the format characters is illegal
 151
 152 notes:
 153     Because unconsumed data is returned, you can feed it back in to the
 154     unpacker to extract further fields.  Alternatively, if you wish to modify
 155     some fields near the start of the data, you may be able to save time by
 156     only unpacking and repacking the necessary part.
 157 ";
 158
 159
 160 const char *pytdb_dos_encoding = "cp850";
 161
 162 /* NULL, meaning that the Samba default encoding *must* be the same as the
 163    Python default encoding. */
 164 const char *pytdb_unix_encoding = NULL;
 165
 166
 167 /*
 168   * Pack objects to bytes.
 169   *
 170   * All objects are first individually encoded onto a list, and then the list
 171   * of strings is concatenated.  This is faster than concatenating strings,
 172   * and reasonably simple to code.
 173   */
 174 static PyObject *
 175 pytdbpack(PyObject *self,
 176                PyObject *args)
 177 {
 178         char *format_str;
 179         PyObject *val_seq, *val_iter = NULL,
 180                 *packed_list = NULL, *packed_str = NULL,
 181                 *empty_str = NULL;
 182
 183         /* TODO: Test passing wrong types or too many arguments */
 184         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 185                 return NULL;
 186
 187         if (!(val_iter = PyObject_GetIter(val_seq)))
 188                 goto out;
 189
 190         /* Create list to hold strings until we're done, then join them all. */
 191         if (!(packed_list = PyList_New(0)))
 192                 goto out;
 193
 194         if (!pytdbpack_data(format_str, val_iter, packed_list))
 195                 goto out;
 196
 197         /* this function is not officially documented but it works */
 198         if (!(empty_str = PyString_InternFromString("")))
 199                 goto out;
 200
 201         packed_str = _PyString_Join(empty_str, packed_list);
 202
 203   out:
 204         Py_XDECREF(empty_str);
 205         Py_XDECREF(val_iter);
 206         Py_XDECREF(packed_list);
 207
 208         return packed_str;
 209 }
 210
 211
 212 /*
 213   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 214   PACKED_BUF.
 215
 216   The string has already been checked out, so we know that VAL_SEQ is large
 217   enough to hold the packed data, and that there are enough value items.
 218   (However, their types may not have been thoroughly checked yet.)
 219
 220   In addition, val_seq is a Python Fast sequence.
 221
 222   Returns NULL for error (with exception set), or None.
 223 */
 224 PyObject *
 225 pytdbpack_data(const char *format_str,
 226                     PyObject *val_iter,
 227                     PyObject *packed_list)
 228 {
 229         int format_i, val_i = 0;
 230
 231         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 232                 char ch = format_str[format_i];
 233
 234                 switch (ch) {
 235                         /* dispatch to the appropriate packer for this type,
 236                            which should pull things off the iterator, and
 237                            append them to the packed_list */
 238                 case 'w':
 239                 case 'd':
 240                 case 'p':
 241                         if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
 242                                 return NULL;
 243                         break;
 244
 245                 case 'f':
 246                 case 'P':
 247                         if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
 248                                 return NULL;
 249                         break;
 250
 251                 case 'B':
 252                         if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
 253                                 return NULL;
 254                         break;
 255
 256                 default:
 257                         PyErr_Format(PyExc_ValueError,
 258                                      "%s: format character '%c' is not supported",
 259                                      FUNCTION_MACRO, ch);
 260                         return NULL;
 261                 }
 262         }
 263
 264         return packed_list;
 265 }
 266
 267
 268 static PyObject *
 269 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 270 {
 271         unsigned long val_long;
 272         PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
 273         PyObject *new_list = NULL;
 274         unsigned char pack_buf[4];
 275
 276         if (!(val_obj = PyIter_Next(val_iter)))
 277                 goto out;
 278
 279         if (!(long_obj = PyNumber_Long(val_obj))) {
 280                 pytdbpack_bad_type(ch, "Number", val_obj);
 281                 goto out;
 282         }
 283
 284         val_long = PyLong_AsUnsignedLong(long_obj);
 285         pack_le_uint32(val_long, pack_buf);
 286
 287         /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
 288            the first two bytes. */
 289
 290         if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
 291                 goto out;
 292
 293         if (PyList_Append(packed_list, result_obj) != -1)
 294                 new_list = packed_list;
 295
 296   out:
 297         Py_XDECREF(val_obj);
 298         Py_XDECREF(long_obj);
 299         Py_XDECREF(result_obj);
 300
 301         return new_list;
 302 }
 303
 304
 305 /*
 306  * Take one string from the iterator val_iter, convert it to 8-bit, and return
 307  * it.
 308  *
 309  * If the input is neither a string nor Unicode, an exception is raised.
 310  *
 311  * If the input is Unicode, then it is converted to the appropriate encoding.
 312  *
 313  * If the input is a String, and encoding is not null, then it is converted to
 314  * Unicode using the default decoding method, and then converted to the
 315  * encoding.  If the encoding is NULL, then the string is written out as-is --
 316  * this is used when the default Python encoding is the same as the Samba
 317  * encoding.
 318  *
 319  * I hope this approach avoids being too fragile w.r.t. being passed either
 320  * Unicode or String objects.
 321  */
 322 static PyObject *
 323 pytdbpack_str(char ch,
 324               PyObject *val_iter, PyObject *packed_list, const char *encoding)
 325 {
 326         PyObject *val_obj = NULL;
 327         PyObject *unicode_obj = NULL;
 328         PyObject *coded_str = NULL;
 329         PyObject *nul_str = NULL;
 330         PyObject *new_list = NULL;
 331
 332         if (!(val_obj = PyIter_Next(val_iter)))
 333                 goto out;
 334
 335         if (PyUnicode_Check(val_obj)) {
 336                 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
 337                         goto out;
 338         }
 339         else if (PyString_Check(val_obj) && !encoding) {
 340                 /* For efficiency, we assume that the Python interpreter has
 341                    the same default string encoding as Samba's native string
 342                    encoding.  On the PSA, both are always 8859-1. */
 343                 coded_str = val_obj;
 344                 Py_INCREF(coded_str);
 345         }
 346         else if (PyString_Check(val_obj)) {
 347                 /* String, but needs to be converted */
 348                 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 349                         goto out;
 350                 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
 351                         goto out;
 352         }
 353         else {
 354                 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
 355                 goto out;
 356         }
 357
 358         if (!nul_str)
 359                 /* this is constant and often-used; hold it forever */
 360                 if (!(nul_str = PyString_FromStringAndSize("", 1)))
 361                         goto out;
 362
 363         if ((PyList_Append(packed_list, coded_str) != -1)
 364             && (PyList_Append(packed_list, nul_str) != -1))
 365                 new_list = packed_list;
 366
 367   out:
 368         Py_XDECREF(val_obj);
 369         Py_XDECREF(unicode_obj);
 370         Py_XDECREF(coded_str);
 371
 372         return new_list;
 373 }
 374
 375
 376 /*
 377  * Pack (LENGTH, BUFFER) pair onto the list.
 378  *
 379  * The buffer must already be a String, not Unicode, because it contains 8-bit
 380  * untranslated data.  In some cases it will actually be UTF_16_LE data.
 381  */
 382 static PyObject *
 383 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 384 {
 385         PyObject *val_obj;
 386         PyObject *new_list = NULL;
 387
 388         /* pull off integer and stick onto list */
 389         if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 390                 return NULL;
 391
 392         /* this assumes that the string is the right length; the old code did
 393            the same. */
 394         if (!(val_obj = PyIter_Next(val_iter)))
 395                 return NULL;
 396
 397         if (!PyString_Check(val_obj)) {
 398                 pytdbpack_bad_type('B', "String", val_obj);
 399                 goto out;
 400         }
 401
 402         if (PyList_Append(packed_list, val_obj) != -1)
 403                 new_list = packed_list;
 404
 405   out:
 406         Py_XDECREF(val_obj);
 407         return new_list;
 408 }
 409
 410
 411 static PyObject *pytdbpack_bad_type(char ch,
 412                                     const char *expected,
 413                                     PyObject *val_obj)
 414 {
 415         PyObject *r = PyObject_Repr(val_obj);
 416         if (!r)
 417                 return NULL;
 418         PyErr_Format(PyExc_TypeError,
 419                      "tdbpack: format '%c' requires %s, not %s",
 420                      ch, expected, PyString_AS_STRING(r));
 421         Py_DECREF(r);
 422         return val_obj;
 423 }
 424
 425
 426 /*
 427   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 428   but I don't know of one in plain libc, and it's probably not a big deal.  I
 429   realize this is kind of dumb because we'll almost always be on x86, but
 430   being safe is important.
 431 */
 432 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 433 {
 434         pbuf[0] =         val_long & 0xff;
 435         pbuf[1] = (val_long >> 8)  & 0xff;
 436         pbuf[2] = (val_long >> 16) & 0xff;
 437         pbuf[3] = (val_long >> 24) & 0xff;
 438 }
 439
 440
 441 static void pack_bytes(long len, const char *from,
 442                        unsigned char **pbuf)
 443 {
 444         memcpy(*pbuf, from, len);
 445         (*pbuf) += len;
 446 }
 447
 448
 449
 450 static PyObject *
 451 pytdbunpack(PyObject *self,
 452                  PyObject *args)
 453 {
 454         char *format_str, *packed_str, *ppacked;
 455         PyObject *val_list = NULL, *ret_tuple = NULL;
 456         PyObject *rest_string = NULL;
 457         int format_len, packed_len;
 458         char last_format = '#'; /* invalid */
 459         int i;
 460
 461         /* get arguments */
 462         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 463                 return NULL;
 464
 465         format_len = strlen(format_str);
 466
 467         /* Allocate list to hold results.  Initially empty, and we append
 468            results as we go along. */
 469         val_list = PyList_New(0);
 470         if (!val_list)
 471                 goto failed;
 472         ret_tuple = PyTuple_New(2);
 473         if (!ret_tuple)
 474                 goto failed;
 475
 476         /* For every object, unpack.  */
 477         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 478                 last_format = format_str[i];
 479                 /* packed_len is reduced in place */
 480                 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
 481                         goto failed;
 482         }
 483
 484         /* If the last character was '$', keep going until out of space */
 485         if (format_str[i] == '$') {
 486                 if (i == 0) {
 487                         PyErr_Format(PyExc_ValueError,
 488                                      "%s: '$' may not be first character in format",
 489                                      FUNCTION_MACRO);
 490                         return NULL;
 491                 }
 492                 while (packed_len > 0)
 493                         if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
 494                                 goto failed;
 495         }
 496
 497         /* save leftovers for next time */
 498         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 499         if (!rest_string)
 500                 goto failed;
 501
 502         /* return (values, rest) tuple; give up references to them */
 503         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 504         val_list = NULL;
 505         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 506         val_list = NULL;
 507         return ret_tuple;
 508
 509   failed:
 510         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 511            pointers for objects that haven't been allocated yet. */
 512         Py_XDECREF(val_list);
 513         Py_XDECREF(ret_tuple);
 514         Py_XDECREF(rest_string);
 515         return NULL;
 516 }
 517
 518
 519 static void
 520 pytdbunpack_err_too_short(void)
 521 {
 522         PyErr_Format(PyExc_IndexError,
 523                      "%s: data too short for unpack format", FUNCTION_MACRO);
 524 }
 525
 526
 527 static PyObject *
 528 pytdbunpack_uint32(char **pbuf, int *plen)
 529 {
 530         unsigned long v;
 531         unsigned char *b;
 532
 533         if (*plen < 4) {
 534                 pytdbunpack_err_too_short();
 535                 return NULL;
 536         }
 537
 538         b = *pbuf;
 539         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 540
 541         (*pbuf) += 4;
 542         (*plen) -= 4;
 543
 544         return PyLong_FromUnsignedLong(v);
 545 }
 546
 547
 548 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
 549 {
 550         long v;
 551         unsigned char *b;
 552
 553         if (*plen < 2) {
 554                 pytdbunpack_err_too_short();
 555                 return NULL;
 556         }
 557
 558         b = *pbuf;
 559         v = b[0] | b[1]<<8;
 560
 561         (*pbuf) += 2;
 562         (*plen) -= 2;
 563
 564         return PyInt_FromLong(v);
 565 }
 566
 567
 568 static PyObject *
 569 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
 570 {
 571         int len;
 572         char *nul_ptr, *start;
 573
 574         start = *pbuf;
 575
 576         nul_ptr = memchr(start, '\0', *plen);
 577         if (!nul_ptr) {
 578                 pytdbunpack_err_too_short();
 579                 return NULL;
 580         }
 581
 582         len = nul_ptr - start;
 583
 584         *pbuf += len + 1;       /* skip \0 */
 585         *plen -= len + 1;
 586
 587         return PyString_Decode(start, len, encoding, NULL);
 588 }
 589
 590
 591 static PyObject *
 592 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 593 {
 594         /* first get 32-bit len */
 595         long slen;
 596         unsigned char *b;
 597         unsigned char *start;
 598         PyObject *str_obj = NULL, *len_obj = NULL;
 599
 600         if (*plen < 4) {
 601                 pytdbunpack_err_too_short();
 602                 return NULL;
 603         }
 604
 605         b = *pbuf;
 606         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 607
 608         if (slen < 0) { /* surely you jest */
 609                 PyErr_Format(PyExc_ValueError,
 610                              "%s: buffer seems to have negative length", FUNCTION_MACRO);
 611                 return NULL;
 612         }
 613
 614         (*pbuf) += 4;
 615         (*plen) -= 4;
 616         start = *pbuf;
 617
 618         if (*plen < slen) {
 619                 PyErr_Format(PyExc_IndexError,
 620                              "%s: not enough data to unpack buffer: "
 621                              "need %d bytes, have %d", FUNCTION_MACRO,
 622                              (int) slen, *plen);
 623                 return NULL;
 624         }
 625
 626         (*pbuf) += slen;
 627         (*plen) -= slen;
 628
 629         if (!(len_obj = PyInt_FromLong(slen)))
 630                 goto failed;
 631
 632         if (PyList_Append(val_list, len_obj) == -1)
 633                 goto failed;
 634
 635         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 636                 goto failed;
 637
 638         if (PyList_Append(val_list, str_obj) == -1)
 639                 goto failed;
 640
 641         Py_DECREF(len_obj);
 642         Py_DECREF(str_obj);
 643
 644         return val_list;
 645
 646   failed:
 647         Py_XDECREF(len_obj);    /* handles NULL */
 648         Py_XDECREF(str_obj);
 649         return NULL;
 650 }
 651
 652
 653 /* Unpack a single field from packed data, according to format character CH.
 654    Remaining data is at *PBUF, of *PLEN.
 655
 656    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 657    been consumed.
 658
 659    Returns a reference to None, or NULL for failure.
 660 */
 661 static PyObject *pytdbunpack_item(char ch,
 662                                   char **pbuf,
 663                                   int *plen,
 664                                   PyObject *val_list)
 665 {
 666         PyObject *unpacked;
 667
 668         if (ch == 'w') {        /* 16-bit int */
 669                 unpacked = pytdbunpack_int16(pbuf, plen);
 670         }
 671         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 672                 /* pointers can just come through as integers */
 673                 unpacked = pytdbunpack_uint32(pbuf, plen);
 674         }
 675         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 676                 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
 677         }
 678         else if (ch == 'B') { /* length, buffer */
 679                 return pytdbunpack_buffer(pbuf, plen, val_list);
 680         }
 681         else {
 682                 PyErr_Format(PyExc_ValueError,
 683                              "%s: format character '%c' is not supported",
 684                              FUNCTION_MACRO, ch);
 685
 686                 return NULL;
 687         }
 688
 689         /* otherwise OK */
 690         if (!unpacked)
 691                 return NULL;
 692
 693         if (PyList_Append(val_list, unpacked) == -1)
 694                 val_list = NULL;
 695
 696         /* PyList_Append takes a new reference to the inserted object.
 697            Therefore, we no longer need the original reference. */
 698         Py_DECREF(unpacked);
 699
 700         return val_list;
 701 }
 702
 703
 704
 705
 706
 707
 708 static PyMethodDef pytdbpack_methods[] = {
 709         { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 710         { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
 711 };
 712
 713 DL_EXPORT(void)
 714 inittdbpack(void)
 715 {
 716         Py_InitModule3("tdbpack", pytdbpack_methods,
 717                        (char *) pytdbpack_docstring);
 718 }