source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
  34
  35 static PyObject *pytdbpack_pack_data(const char *format_str,
  36                                      PyObject *val_seq,
  37                                      unsigned char *buf);
  38
  39
  40
  41
  42 static PyObject *pytdbpack_bad_type(char ch,
  43                                     const char *expected,
  44                                     PyObject *val_obj);
  45
  46 static const char * pytdbpack_docstring =
  47 "Convert between Python values and Samba binary encodings.
  48
  49 This module is conceptually similar to the standard 'struct' module, but it
  50 uses both a different binary format and a different description string.
  51
  52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  53 little-endian, unpadded, non-self-describing binary format.  It is intended
  54 that these functions be as similar as possible to the routines in Samba's
  55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  56
  57 Python strings are used to specify the format of data to be packed or
  58 unpacked.
  59
  60 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  61 module must make appropriate translations if necessary, typically to and from
  62 Unicode objects.
  63
  64 tdbpack format strings:
  65
  66     'f':  NULL-terminated string in DOS codepage
  67
  68     'P':  same as 'f'
  69
  70     'd':  4 byte little-endian unsigned number
  71
  72     'w':  2 byte little-endian unsigned number
  73
  74     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  75           really just an \"exists\" or \"does not exist\" flag.  The boolean
  76           value of the Python object is used.
  77
  78     'B': 4-byte LE length, followed by that many bytes of binary data.
  79          Corresponds to a Python integer giving the length, followed by a byte
  80          string of the appropriate length.
  81
  82     '$': Special flag indicating that the preceding format code should be
  83          repeated while data remains.  This is only supported for unpacking.
  84
  85     Every code corresponds to a single Python object, except 'B' which
  86     corresponds to two values (length and contents), and '$', which produces
  87     however many make sense.
  88 ";
  89
  90
  91 static char const pytdbpack_pack_doc[] =
  92 "pack(format, values) -> buffer
  93 Pack Python objects into Samba binary format according to format string.
  94
  95 arguments:
  96     format -- string of tdbpack format characters
  97     values -- sequence of value objects corresponding 1:1 to format characters
  98
  99 returns:
 100     buffer -- string containing packed data
 101
 102 raises:
 103     IndexError -- if there are too few values for the format
 104     ValueError -- if any of the format characters is illegal
 105     TypeError  -- if the format is not a string, or values is not a sequence,
 106         or any of the values is of the wrong type for the corresponding
 107         format character
 108
 109 notes:
 110     For historical reasons, it is not an error to pass more values than are consumed
 111     by the format.
 112 ";
 113
 114
 115 static char const pytdbpack_unpack_doc[] =
 116 "unpack(format, buffer) -> (values, rest)
 117 Unpack Samba binary data according to format string.
 118
 119 arguments:
 120     format -- string of tdbpack characters
 121     buffer -- string of packed binary data
 122
 123 returns:
 124     2-tuple of:
 125         values -- sequence of values corresponding 1:1 to format characters
 126         rest -- string containing data that was not decoded, or '' if the
 127             whole string was consumed
 128
 129 raises:
 130     IndexError -- if there is insufficient data in the buffer for the
 131         format (or if the data is corrupt and contains a variable-length
 132         field extending past the end)
 133     ValueError -- if any of the format characters is illegal
 134
 135 notes:
 136     Because unconsumed data is returned, you can feed it back in to the
 137     unpacker to extract further fields.  Alternatively, if you wish to modify
 138     some fields near the start of the data, you may be able to save time by
 139     only unpacking and repacking the necessary part.
 140 ";
 141
 142
 143
 144 /*
 145   Game plan is to first of all walk through the arguments and calculate the
 146   total length that will be required.  We allocate a Python string of that
 147   size, then walk through again and fill it in.
 148
 149   We just borrow references to all the passed arguments, since none of them
 150   need to be permanently stored.  We transfer ownership to the returned
 151   object.
 152  */
 153 static PyObject *
 154 pytdbpack_pack(PyObject *self,
 155                PyObject *args)
 156 {
 157         char *format_str;
 158         PyObject *val_seq, *fast_seq, *buf_str;
 159         int reqd_len;
 160         char *packed_buf;
 161
 162         /* TODO: Test passing wrong types or too many arguments */
 163         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 164                 return NULL;
 165
 166         /* Convert into a list or tuple (if not already one), so that we can
 167          * index more easily. */
 168         fast_seq = PySequence_Fast(val_seq,
 169                                    __FUNCTION__ ": argument 2 must be sequence");
 170         if (!fast_seq)
 171                 return NULL;
 172
 173         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 174         if (reqd_len == -1)     /* exception was thrown */
 175                 return NULL;
 176
 177         /* Allocate space.
 178
 179            This design causes an unnecessary copying of the data when Python
 180            constructs an object, and that might possibly be avoided by using a
 181            Buffer object of some kind instead.  I'm not doing that for now
 182            though.  */
 183         packed_buf = malloc(reqd_len);
 184         if (!packed_buf) {
 185                 PyErr_Format(PyExc_MemoryError,
 186                              "%s: couldn't allocate %d bytes for packed buffer",
 187                              __FUNCTION__, reqd_len);
 188                 return NULL;
 189         }
 190
 191         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 192                 free(packed_buf);
 193                 return NULL;
 194         }
 195
 196         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 197         free(packed_buf);       /* get rid of tmp buf */
 198
 199         return buf_str;
 200 }
 201
 202
 203
 204 static PyObject *
 205 pytdbpack_unpack(PyObject *self,
 206                  PyObject *args)
 207 {
 208         char *format_str, *packed_str, *ppacked;
 209         PyObject *val_list = NULL, *ret_tuple = NULL;
 210         PyObject *rest_string = NULL;
 211         int format_len, packed_len;
 212         char last_format = '#'; /* invalid */
 213         int i;
 214
 215         /* get arguments */
 216         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 217                 return NULL;
 218
 219         format_len = strlen(format_str);
 220
 221         /* Allocate list to hold results.  Initially empty, and we append
 222            results as we go along. */
 223         val_list = PyList_New(0);
 224         if (!val_list)
 225                 goto failed;
 226         ret_tuple = PyTuple_New(2);
 227         if (!ret_tuple)
 228                 goto failed;
 229
 230         /* For every object, unpack.  */
 231         for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
 232                 last_format = format_str[i];
 233                 /* packed_len is reduced in place */
 234                 if (!pytdbpack_unpack_item(format_str[i], &ppacked, &packed_len, val_list))
 235                         goto failed;
 236         }
 237
 238         /* If the last character was '$', keep going until out of space */
 239         if (format_str[i] == '$') {
 240                 if (i == 0) {
 241                         PyErr_Format(PyExc_ValueError,
 242                                      "%s: '$' may not be first character in format",
 243                                      __FUNCTION__);
 244                         return NULL;
 245                 }
 246                 while (packed_len > 0)
 247                         if (!pytdbpack_unpack_item(last_format, &ppacked, &packed_len, val_list))
 248                                 goto failed;
 249         }
 250
 251         /* save leftovers for next time */
 252         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 253         if (!rest_string)
 254                 goto failed;
 255
 256         /* return (values, rest) tuple; give up references to them */
 257         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 258         val_list = NULL;
 259         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 260         val_list = NULL;
 261         return ret_tuple;
 262
 263   failed:
 264         /* handle failure: deallocate anything.  XDECREF forms handle NULL
 265            pointers for objects that haven't been allocated yet. */
 266         Py_XDECREF(val_list);
 267         Py_XDECREF(ret_tuple);
 268         Py_XDECREF(rest_string);
 269         return NULL;
 270 }
 271
 272
 273 /*
 274   Internal routine that calculates how many bytes will be required to
 275   encode the values in the format.
 276
 277   Also checks that the value list is the right size for the format list.
 278
 279   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 280   which case a Python exception has been raised.
 281
 282   Arguments:
 283
 284     val_seq: a Fast Sequence (list or tuple), being all the values
 285 */
 286 static int
 287 pytdbpack_calc_reqd_len(char *format_str,
 288                         PyObject *val_seq)
 289 {
 290         int len = 0;
 291         char *p;
 292         int val_i;
 293         int val_len;
 294
 295         val_len = PySequence_Length(val_seq);
 296         if (val_len == -1)
 297                 return -1;
 298
 299         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 300                 char ch = *p;
 301
 302                 if (val_i >= val_len) {
 303                         PyErr_Format(PyExc_IndexError,
 304                                      "%s: value list is too short for format string",
 305                                      __FUNCTION__);
 306                         return -1;
 307                 }
 308
 309                 /* borrow a reference to the item */
 310                 if (ch == 'd' || ch == 'p')
 311                         len += 4;
 312                 else if (ch == 'w')
 313                         len += 2;
 314                 else if (ch == 'f' || ch == 'P') {
 315                         /* nul-terminated 8-bit string */
 316                         int item_len;
 317                         PyObject *str_obj;
 318
 319                         str_obj = PySequence_GetItem(val_seq, val_i);
 320                         if (!str_obj)
 321                                 return -1;
 322
 323                         if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
 324                                 pytdbpack_bad_type(ch, "String", str_obj);
 325                                 return -1;
 326                         }
 327
 328                         len += 1 + item_len;
 329                 }
 330                 else if (ch == 'B') {
 331                         /* length-preceded byte buffer: n bytes, plus a preceding
 332                          * word */
 333                         PyObject *len_obj;
 334                         long len_val;
 335
 336                         len_obj = PySequence_GetItem(val_seq, val_i);
 337                         val_i++; /* skip over buffer */
 338
 339                         if (!PyNumber_Check(len_obj)) {
 340                                 pytdbpack_bad_type(ch, "Number", len_obj);
 341                                 return -1;
 342                         }
 343
 344                         len_val = PyInt_AsLong(len_obj);
 345                         if (len_val < 0) {
 346                                 PyErr_Format(PyExc_ValueError,
 347                                              "%s: format 'B' requires positive integer", __FUNCTION__);
 348                                 return -1;
 349                         }
 350
 351                         len += 4 + len_val;
 352                 }
 353                 else {
 354                         PyErr_Format(PyExc_ValueError,
 355                                      "%s: format character '%c' is not supported",
 356                                      __FUNCTION__, ch);
 357
 358                         return -1;
 359                 }
 360         }
 361
 362         return len;
 363 }
 364
 365
 366 static PyObject *pytdbpack_bad_type(char ch,
 367                                     const char *expected,
 368                                     PyObject *val_obj)
 369 {
 370         PyObject *r = PyObject_Repr(val_obj);
 371         if (!r)
 372                 return NULL;
 373         PyErr_Format(PyExc_TypeError,
 374                      "tdbpack: format '%c' requires %s, not %s",
 375                      ch, expected, PyString_AS_STRING(r));
 376         Py_DECREF(r);
 377         return val_obj;
 378 }
 379
 380
 381 /*
 382   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 383   but I don't know of one in plain libc, and it's probably not a big deal.  I
 384   realize this is kind of dumb because we'll almost always be on x86, but
 385   being safe is important.
 386 */
 387 static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
 388 {
 389         (*pbuf)[0] =         val_long & 0xff;
 390         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 391         (*pbuf)[2] = (val_long >> 16) & 0xff;
 392         (*pbuf)[3] = (val_long >> 24) & 0xff;
 393         (*pbuf) += 4;
 394 }
 395
 396
 397 static void pack_bytes(long len, const char *from,
 398                        unsigned char **pbuf)
 399 {
 400         memcpy(*pbuf, from, len);
 401         (*pbuf) += len;
 402 }
 403
 404
 405 static void
 406 unpack_err_too_short(void)
 407 {
 408         PyErr_Format(PyExc_IndexError,
 409                      __FUNCTION__ ": data too short for unpack format");
 410 }
 411
 412
 413 static PyObject *
 414 unpack_uint32(char **pbuf, int *plen)
 415 {
 416         unsigned long v;
 417         unsigned char *b;
 418
 419         if (*plen < 4) {
 420                 unpack_err_too_short();
 421                 return NULL;
 422         }
 423
 424         b = *pbuf;
 425         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 426
 427         (*pbuf) += 4;
 428         (*plen) -= 4;
 429
 430         return PyLong_FromUnsignedLong(v);
 431 }
 432
 433
 434 static PyObject *unpack_int16(char **pbuf, int *plen)
 435 {
 436         long v;
 437         unsigned char *b;
 438
 439         if (*plen < 2) {
 440                 unpack_err_too_short();
 441                 return NULL;
 442         }
 443
 444         b = *pbuf;
 445         v = b[0] | b[1]<<8;
 446
 447         (*pbuf) += 2;
 448         (*plen) -= 2;
 449
 450         return PyInt_FromLong(v);
 451 }
 452
 453
 454 static PyObject *
 455 unpack_string(char **pbuf, int *plen)
 456 {
 457         int len;
 458         char *nul_ptr, *start;
 459
 460         start = *pbuf;
 461
 462         nul_ptr = memchr(start, '\0', *plen);
 463         if (!nul_ptr) {
 464                 unpack_err_too_short();
 465                 return NULL;
 466         }
 467
 468         len = nul_ptr - start;
 469
 470         *pbuf += len + 1;       /* skip \0 */
 471         *plen -= len + 1;
 472
 473         return PyString_FromStringAndSize(start, len);
 474 }
 475
 476
 477 static PyObject *
 478 unpack_buffer(char **pbuf, int *plen, PyObject *val_list)
 479 {
 480         /* first get 32-bit len */
 481         long slen;
 482         unsigned char *b;
 483         unsigned char *start;
 484         PyObject *str_obj = NULL, *len_obj = NULL;
 485
 486         if (*plen < 4) {
 487                 unpack_err_too_short();
 488                 return NULL;
 489         }
 490
 491         b = *pbuf;
 492         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 493
 494         if (slen < 0) { /* surely you jest */
 495                 PyErr_Format(PyExc_ValueError,
 496                              __FUNCTION__ ": buffer seems to have negative length");
 497                 return NULL;
 498         }
 499
 500         (*pbuf) += 4;
 501         (*plen) -= 4;
 502         start = *pbuf;
 503
 504         if (*plen < slen) {
 505                 PyErr_Format(PyExc_IndexError,
 506                              __FUNCTION__ ": not enough data to unpack buffer: "
 507                              "need %d bytes, have %d",
 508                              (int) slen, *plen);
 509                 return NULL;
 510         }
 511
 512         (*pbuf) += slen;
 513         (*plen) -= slen;
 514
 515         if (!(len_obj = PyInt_FromLong(slen)))
 516                 goto failed;
 517
 518         if (PyList_Append(val_list, len_obj) == -1)
 519                 goto failed;
 520
 521         if (!(str_obj = PyString_FromStringAndSize(start, slen)))
 522                 goto failed;
 523
 524         if (PyList_Append(val_list, str_obj) == -1)
 525                 goto failed;
 526
 527         return val_list;
 528
 529   failed:
 530         Py_XDECREF(len_obj);    /* handles NULL */
 531         Py_XDECREF(str_obj);
 532         return NULL;
 533 }
 534
 535
 536 /* Unpack a single field from packed data, according to format character CH.
 537    Remaining data is at *PBUF, of *PLEN.
 538
 539    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 540    been consumed.
 541
 542    Returns a reference to None, or NULL for failure.
 543 */
 544 static PyObject *pytdbpack_unpack_item(char ch,
 545                                        char **pbuf,
 546                                        int *plen,
 547                                        PyObject *val_list)
 548 {
 549         PyObject *result;
 550
 551         if (ch == 'w') {        /* 16-bit int */
 552                 result = unpack_int16(pbuf, plen);
 553         }
 554         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 555                 /* pointers can just come through as integers */
 556                 result = unpack_uint32(pbuf, plen);
 557         }
 558         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 559                 result = unpack_string(pbuf, plen);
 560         }
 561         else if (ch == 'B') { /* length, buffer */
 562                 return unpack_buffer(pbuf, plen, val_list);
 563         }
 564         else {
 565                 PyErr_Format(PyExc_ValueError,
 566                              __FUNCTION__ ": format character '%c' is not supported",
 567                              ch);
 568
 569                 return NULL;
 570         }
 571
 572         /* otherwise OK */
 573         if (!result)
 574                 return NULL;
 575         if (PyList_Append(val_list, result) == -1)
 576                 return NULL;
 577
 578         return val_list;
 579 }
 580
 581
 582
 583
 584 /*
 585   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 586   PACKED_BUF.
 587
 588   The string has already been checked out, so we know that VAL_SEQ is large
 589   enough to hold the packed data, and that there are enough value items.
 590   (However, their types may not have been thoroughly checked yet.)
 591
 592   In addition, val_seq is a Python Fast sequence.
 593
 594   Returns NULL for error (with exception set), or None.
 595 */
 596 PyObject *
 597 pytdbpack_pack_data(const char *format_str,
 598                     PyObject *val_seq,
 599                     unsigned char *packed)
 600 {
 601         int format_i, val_i = 0;
 602
 603         for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
 604                 char ch = format_str[format_i];
 605                 PyObject *val_obj;
 606
 607                 /* borrow a reference to the item */
 608                 val_obj = PySequence_GetItem(val_seq, val_i++);
 609                 if (!val_obj)
 610                         return NULL;
 611
 612                 if (ch == 'w') {
 613                         unsigned long val_long;
 614                         PyObject *long_obj;
 615
 616                         if (!(long_obj = PyNumber_Long(val_obj))) {
 617                                 pytdbpack_bad_type(ch, "Long", val_obj);
 618                                 return NULL;
 619                         }
 620
 621                         val_long = PyLong_AsUnsignedLong(long_obj);
 622                         (packed)[0] = val_long & 0xff;
 623                         (packed)[1] = (val_long >> 8) & 0xff;
 624                         (packed) += 2;
 625                         Py_DECREF(long_obj);
 626                 }
 627                 else if (ch == 'd') {
 628                         /* 4-byte LE number */
 629                         PyObject *long_obj;
 630
 631                         if (!(long_obj = PyNumber_Long(val_obj))) {
 632                                 pytdbpack_bad_type(ch, "Long", val_obj);
 633                                 return NULL;
 634                         }
 635
 636                         pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
 637
 638                         Py_DECREF(long_obj);
 639                 }
 640                 else if (ch == 'p') {
 641                         /* "Pointer" value -- in the subset of DCERPC used by Samba,
 642                            this is really just an "exists" or "does not exist"
 643                            flag. */
 644                         pack_uint32(PyObject_IsTrue(val_obj), &packed);
 645                 }
 646                 else if (ch == 'f' || ch == 'P') {
 647                         int size;
 648                         char *sval;
 649
 650                         size = PySequence_Length(val_obj);
 651                         if (size < 0)
 652                                 return NULL;
 653                         sval = PyString_AsString(val_obj);
 654                         if (!sval)
 655                                 return NULL;
 656                         pack_bytes(size+1, sval, &packed); /* include nul */
 657                 }
 658                 else if (ch == 'B') {
 659                         long size;
 660                         char *sval;
 661
 662                         if (!PyNumber_Check(val_obj)) {
 663                                 pytdbpack_bad_type(ch, "Number", val_obj);
 664                                 return NULL;
 665                         }
 666
 667                         if (!(val_obj = PyNumber_Long(val_obj)))
 668                                 return NULL;
 669
 670                         size = PyLong_AsLong(val_obj);
 671                         pack_uint32(size, &packed);
 672
 673                         /* Release the new reference created by the cast */
 674                         Py_DECREF(val_obj);
 675
 676                         val_obj = PySequence_GetItem(val_seq, val_i++);
 677                         if (!val_obj)
 678                                 return NULL;
 679
 680                         sval = PyString_AsString(val_obj);
 681                         if (!sval)
 682                                 return NULL;
 683
 684                         pack_bytes(size, sval, &packed); /* do not include nul */
 685                 }
 686                 else {
 687                         /* this ought to be caught while calculating the length, but
 688                            just in case. */
 689                         PyErr_Format(PyExc_ValueError,
 690                                      "%s: format character '%c' is not supported",
 691                                      __FUNCTION__, ch);
 692
 693                         return NULL;
 694                 }
 695         }
 696
 697         return Py_None;
 698 }
 699
 700
 701
 702 static PyMethodDef pytdbpack_methods[] = {
 703         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 704         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 705 };
 706
 707 DL_EXPORT(void)
 708 inittdbpack(void)
 709 {
 710         Py_InitModule3("tdbpack", pytdbpack_methods,
 711                        (char *) pytdbpack_docstring);
 712 }