source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char,
  34                                        char **pbuf,
  35                                        int *plen);
  36
  37 static PyObject *pytdbpack_pack_data(const char *format_str,
  38                                      PyObject *val_seq,
  39                                      unsigned char *buf);
  40
  41
  42
  43
  44 static PyObject *pytdbpack_bad_type(char ch,
  45                                     const char *expected,
  46                                     PyObject *val_obj);
  47
  48 static const char * pytdbpack_docstring =
  49 "Convert between Python values and Samba binary encodings.
  50
  51 This module is conceptually similar to the standard 'struct' module, but it
  52 uses both a different binary format and a different description string.
  53
  54 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  55 little-endian, unpadded, non-self-describing binary format.  It is intended
  56 that these functions be as similar as possible to the routines in Samba's
  57 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  58
  59 Python strings are used to specify the format of data to be packed or
  60 unpacked.
  61
  62 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  63 module must make appropriate translations if necessary, typically to and from
  64 Unicode objects.
  65
  66 tdbpack format strings:
  67
  68     'f':  NULL-terminated string in DOS codepage
  69
  70     'P':  same as 'f'
  71
  72     'd':  4 byte little-endian unsigned number
  73
  74     'w':  2 byte little-endian unsigned number
  75
  76     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  77           really just an \"exists\" or \"does not exist\" flag.  The boolean
  78           value of the Python object is used.
  79
  80     'B': 4-byte LE length, followed by that many bytes of binary data.
  81          Corresponds to a Python integer giving the length, followed by a byte
  82          string of the appropriate length.
  83
  84     '$': Special flag indicating that the preceding format code should be
  85          repeated while data remains.  This is only supported for unpacking.
  86
  87     Every code corresponds to a single Python object, except 'B' which
  88     corresponds to two values (length and contents), and '$', which produces
  89     however many make sense.
  90 ";
  91
  92
  93 static char const pytdbpack_pack_doc[] =
  94 "pack(format, values) -> buffer
  95 Pack Python objects into Samba binary format according to format string.
  96
  97 arguments:
  98     format -- string of tdbpack format characters
  99     values -- sequence of value objects corresponding 1:1 to format characters
 100
 101 returns:
 102     buffer -- string containing packed data
 103
 104 raises:
 105     IndexError -- if there are too few values for the format
 106     ValueError -- if any of the format characters is illegal
 107     TypeError  -- if the format is not a string, or values is not a sequence,
 108         or any of the values is of the wrong type for the corresponding
 109         format character
 110
 111 notes:
 112     For historical reasons, it is not an error to pass more values than are consumed
 113     by the format.
 114 ";
 115
 116
 117 static char const pytdbpack_unpack_doc[] =
 118 "unpack(format, buffer) -> (values, rest)
 119 Unpack Samba binary data according to format string.
 120
 121 arguments:
 122     format -- string of tdbpack characters
 123     buffer -- string of packed binary data
 124
 125 returns:
 126     2-tuple of:
 127         values -- sequence of values corresponding 1:1 to format characters
 128         rest -- string containing data that was not decoded, or '' if the
 129             whole string was consumed
 130
 131 raises:
 132     IndexError -- if there is insufficient data in the buffer for the
 133         format (or if the data is corrupt and contains a variable-length
 134         field extending past the end)
 135     ValueError -- if any of the format characters is illegal
 136
 137 notes:
 138     Because unconsumed data is returned, you can feed it back in to the
 139     unpacker to extract further fields.  Alternatively, if you wish to modify
 140     some fields near the start of the data, you may be able to save time by
 141     only unpacking and repacking the necessary part.
 142 ";
 143
 144
 145
 146 /*
 147   Game plan is to first of all walk through the arguments and calculate the
 148   total length that will be required.  We allocate a Python string of that
 149   size, then walk through again and fill it in.
 150
 151   We just borrow references to all the passed arguments, since none of them
 152   need to be permanently stored.  We transfer ownership to the returned
 153   object.
 154  */
 155 static PyObject *
 156 pytdbpack_pack(PyObject *self,
 157                PyObject *args)
 158 {
 159         char *format_str;
 160         PyObject *val_seq, *fast_seq, *buf_str;
 161         int reqd_len;
 162         char *packed_buf;
 163
 164         /* TODO: Test passing wrong types or too many arguments */
 165         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 166                 return NULL;
 167
 168         /* Convert into a list or tuple (if not already one), so that we can
 169          * index more easily. */
 170         fast_seq = PySequence_Fast(val_seq,
 171                                    __FUNCTION__ ": argument 2 must be sequence");
 172         if (!fast_seq)
 173                 return NULL;
 174
 175         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 176         if (reqd_len == -1)     /* exception was thrown */
 177                 return NULL;
 178
 179         /* Allocate space.
 180
 181            This design causes an unnecessary copying of the data when Python
 182            constructs an object, and that might possibly be avoided by using a
 183            Buffer object of some kind instead.  I'm not doing that for now
 184            though.  */
 185         packed_buf = malloc(reqd_len);
 186         if (!packed_buf) {
 187                 PyErr_Format(PyExc_MemoryError,
 188                              "%s: couldn't allocate %d bytes for packed buffer",
 189                              __FUNCTION__, reqd_len);
 190                 return NULL;
 191         }
 192
 193         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 194                 free(packed_buf);
 195                 return NULL;
 196         }
 197
 198         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 199         free(packed_buf);       /* get rid of tmp buf */
 200
 201         return buf_str;
 202 }
 203
 204
 205
 206 static PyObject *
 207 pytdbpack_unpack(PyObject *self,
 208                  PyObject *args)
 209 {
 210         char *format_str, *packed_str, *ppacked;
 211         PyObject *val_list = NULL, *ret_tuple = NULL;
 212         PyObject *rest_string = NULL;
 213         int format_len, packed_len;
 214         int i;
 215         char last_format = '#';
 216
 217         /* get arguments */
 218         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 219                 return NULL;
 220
 221         format_len = strlen(format_str);
 222
 223         /* allocate list to hold results */
 224         val_list = PyList_New(format_len);
 225         if (!val_list)
 226                 goto failed;
 227         ret_tuple = PyTuple_New(2);
 228         if (!ret_tuple)
 229                 goto failed;
 230
 231         /* For every object, unpack.  */
 232         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 233                 PyObject *val_obj;
 234                 char format;
 235
 236                 format = format_str[i];
 237                 if (format == '$') {
 238                         if (i == 0) {
 239                                 PyErr_Format(PyExc_ValueError,
 240                                              "%s: '$' may not be first character in format",
 241                                              __FUNCTION__);
 242                                 goto failed;
 243                         }
 244                         else {
 245                                 format = last_format; /* repeat */
 246                         }
 247                 }
 248
 249                 val_obj = pytdbpack_unpack_item(format,
 250                                                 &ppacked,
 251                                                 &packed_len);
 252                 if (!val_obj)
 253                         goto failed;
 254
 255                 PyList_SET_ITEM(val_list, i, val_obj);
 256                 last_format = format;
 257         }
 258
 259         /* save leftovers for next time */
 260         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 261         if (!rest_string)
 262                 goto failed;
 263
 264         /* return (values, rest) tuple; give up references to them */
 265         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 266         val_list = NULL;
 267         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 268         val_list = NULL;
 269         return ret_tuple;
 270
 271   failed:
 272         /* handle failure: deallocate anything */
 273         Py_XDECREF(val_list);
 274         Py_XDECREF(ret_tuple);
 275         Py_XDECREF(rest_string);
 276         return NULL;
 277 }
 278
 279
 280 /*
 281   Internal routine that calculates how many bytes will be required to
 282   encode the values in the format.
 283
 284   Also checks that the value list is the right size for the format list.
 285
 286   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 287   which case a Python exception has been raised.
 288
 289   Arguments:
 290
 291     val_seq: a Fast Sequence (list or tuple), being all the values
 292 */
 293 static int
 294 pytdbpack_calc_reqd_len(char *format_str,
 295                         PyObject *val_seq)
 296 {
 297         int len = 0;
 298         char *p;
 299         int val_i;
 300         int val_len;
 301
 302         val_len = PySequence_Length(val_seq);
 303         if (val_len == -1)
 304                 return -1;
 305
 306         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 307                 char ch = *p;
 308
 309                 if (val_i >= val_len) {
 310                         PyErr_Format(PyExc_IndexError,
 311                                      "%s: value list is too short for format string",
 312                                      __FUNCTION__);
 313                         return -1;
 314                 }
 315
 316                 /* borrow a reference to the item */
 317                 if (ch == 'd' || ch == 'p')
 318                         len += 4;
 319                 else if (ch == 'w')
 320                         len += 2;
 321                 else if (ch == 'f' || ch == 'P') {
 322                         /* nul-terminated 8-bit string */
 323                         int item_len;
 324                         PyObject *str_obj;
 325
 326                         str_obj = PySequence_GetItem(val_seq, val_i);
 327                         if (!str_obj)
 328                                 return -1;
 329
 330                         if (!PyString_Check(str_obj) || ((item_len = PyString_Size(str_obj)) == -1)) {
 331                                 pytdbpack_bad_type(ch, "String", str_obj);
 332                                 return -1;
 333                         }
 334
 335                         len += 1 + item_len;
 336                 }
 337                 else if (ch == 'B') {
 338                         /* length-preceded byte buffer: n bytes, plus a preceding
 339                          * word */
 340                         PyObject *len_obj;
 341                         long len_val;
 342
 343                         len_obj = PySequence_GetItem(val_seq, val_i);
 344                         val_i++; /* skip over buffer */
 345
 346                         if (!PyNumber_Check(len_obj)) {
 347                                 pytdbpack_bad_type(ch, "Number", len_obj);
 348                                 return -1;
 349                         }
 350
 351                         len_val = PyInt_AsLong(len_obj);
 352                         if (len_val < 0) {
 353                                 PyErr_Format(PyExc_ValueError,
 354                                              "%s: format 'B' requires positive integer", __FUNCTION__);
 355                                 return -1;
 356                         }
 357
 358                         len += 4 + len_val;
 359                 }
 360                 else {
 361                         PyErr_Format(PyExc_ValueError,
 362                                      "%s: format character '%c' is not supported",
 363                                      __FUNCTION__, ch);
 364
 365                         return -1;
 366                 }
 367         }
 368
 369         return len;
 370 }
 371
 372
 373 static PyObject *pytdbpack_bad_type(char ch,
 374                                     const char *expected,
 375                                     PyObject *val_obj)
 376 {
 377         PyObject *r = PyObject_Repr(val_obj);
 378         if (!r)
 379                 return NULL;
 380         PyErr_Format(PyExc_TypeError,
 381                      "tdbpack: format '%c' requires %s, not %s",
 382                      ch, expected, PyString_AS_STRING(r));
 383         Py_DECREF(r);
 384         return val_obj;
 385 }
 386
 387
 388 /*
 389   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 390   but I don't know of one in plain libc, and it's probably not a big deal.  I
 391   realize this is kind of dumb because we'll almost always be on x86, but
 392   being safe is important.
 393 */
 394 static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
 395 {
 396         (*pbuf)[0] =         val_long & 0xff;
 397         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 398         (*pbuf)[2] = (val_long >> 16) & 0xff;
 399         (*pbuf)[3] = (val_long >> 24) & 0xff;
 400         (*pbuf) += 4;
 401 }
 402
 403
 404 static void pack_bytes(long len, const char *from,
 405                        unsigned char **pbuf)
 406 {
 407         memcpy(*pbuf, from, len);
 408         (*pbuf) += len;
 409 }
 410
 411
 412 static void
 413 unpack_err_too_short(void)
 414 {
 415         PyErr_Format(PyExc_IndexError,
 416                      __FUNCTION__ ": data too short for unpack format");
 417 }
 418
 419
 420 static PyObject *
 421 unpack_uint32(char **pbuf, int *plen)
 422 {
 423         unsigned long v;
 424         unsigned char *b;
 425
 426         if (*plen < 4) {
 427                 unpack_err_too_short();
 428                 return NULL;
 429         }
 430
 431         b = *pbuf;
 432         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 433
 434         (*pbuf) += 4;
 435         (*plen) -= 4;
 436
 437         return PyLong_FromUnsignedLong(v);
 438 }
 439
 440
 441 static PyObject *unpack_int16(char **pbuf, int *plen)
 442 {
 443         long v;
 444         unsigned char *b;
 445
 446         if (*plen < 2) {
 447                 unpack_err_too_short();
 448                 return NULL;
 449         }
 450
 451         b = *pbuf;
 452         v = b[0] | b[1]<<8;
 453
 454         (*pbuf) += 2;
 455         (*plen) -= 2;
 456
 457         return PyInt_FromLong(v);
 458 }
 459
 460
 461 static PyObject *
 462 unpack_string(char **pbuf, int *plen)
 463 {
 464         int len;
 465         char *nul_ptr, *start;
 466
 467         start = *pbuf;
 468
 469         nul_ptr = memchr(start, '\0', *plen);
 470         if (!nul_ptr) {
 471                 unpack_err_too_short();
 472                 return NULL;
 473         }
 474
 475         len = nul_ptr - start;
 476
 477         *pbuf += len + 1;       /* skip \0 */
 478         *plen -= len + 1;
 479
 480         return PyString_FromStringAndSize(start, len);
 481 }
 482
 483
 484 static PyObject *
 485 unpack_buffer(char **pbuf, int *plen)
 486 {
 487         /* first get 32-bit len */
 488         long slen;
 489         unsigned char *b;
 490         unsigned char *start;
 491
 492         if (*plen < 4) {
 493                 unpack_err_too_short();
 494                 return NULL;
 495         }
 496
 497         b = *pbuf;
 498         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 499
 500         if (slen < 0) { /* surely you jest */
 501                 PyErr_Format(PyExc_ValueError,
 502                              __FUNCTION__ ": buffer seems to have negative length");
 503                 return NULL;
 504         }
 505
 506         (*pbuf) += 4;
 507         (*plen) -= 4;
 508         start = *pbuf;
 509
 510         if (*plen < slen) {
 511                 PyErr_Format(PyExc_IndexError,
 512                              __FUNCTION__ ": not enough data to unpack buffer: "
 513                              "need %d bytes, have %d",
 514                              (int) slen, *plen);
 515                 return NULL;
 516         }
 517
 518         (*pbuf) += slen;
 519         (*plen) -= slen;
 520
 521         return PyString_FromStringAndSize(start, slen);
 522 }
 523
 524
 525 /* Unpack a single field from packed data, according to format character CH.
 526    Remaining data is at *PBUF, of *PLEN.
 527
 528    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 529    been consumed.
 530
 531    Returns a reference to the unpacked Python object, or NULL for failure.
 532 */
 533 static PyObject *pytdbpack_unpack_item(char ch,
 534                                        char **pbuf,
 535                                        int *plen)
 536 {
 537         if (ch == 'w') {        /* 16-bit int */
 538                 return unpack_int16(pbuf, plen);
 539         }
 540         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 541                 /* pointers can just come through as integers */
 542                 return unpack_uint32(pbuf, plen);
 543         }
 544         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 545                 return unpack_string(pbuf, plen);
 546         }
 547         else if (ch == 'B') { /* length, buffer */
 548                 return unpack_buffer(pbuf, plen);
 549         }
 550         else {
 551                 PyErr_Format(PyExc_ValueError,
 552                              __FUNCTION__ ": format character '%c' is not supported",
 553                              ch);
 554
 555                 return NULL;
 556         }
 557 }
 558
 559
 560
 561
 562 /*
 563   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 564   PACKED_BUF.
 565
 566   The string has already been checked out, so we know that VAL_SEQ is large
 567   enough to hold the packed data, and that there are enough value items.
 568   (However, their types may not have been thoroughly checked yet.)
 569
 570   In addition, val_seq is a Python Fast sequence.
 571
 572   Returns NULL for error (with exception set), or None.
 573 */
 574 PyObject *
 575 pytdbpack_pack_data(const char *format_str,
 576                     PyObject *val_seq,
 577                     unsigned char *packed)
 578 {
 579         int i;
 580
 581         for (i = 0; format_str[i]; i++) {
 582                 char ch = format_str[i];
 583                 PyObject *val_obj;
 584
 585                 /* borrow a reference to the item */
 586                 val_obj = PySequence_GetItem(val_seq, i);
 587                 if (!val_obj)
 588                         return NULL;
 589
 590                 if (ch == 'w') {
 591                         unsigned long val_long;
 592                         PyObject *long_obj;
 593
 594                         if (!(long_obj = PyNumber_Long(val_obj))) {
 595                                 pytdbpack_bad_type(ch, "Long", val_obj);
 596                                 return NULL;
 597                         }
 598
 599                         val_long = PyLong_AsUnsignedLong(long_obj);
 600                         (packed)[0] = val_long & 0xff;
 601                         (packed)[1] = (val_long >> 8) & 0xff;
 602                         (packed) += 2;
 603                         Py_DECREF(long_obj);
 604                 }
 605                 else if (ch == 'd') {
 606                         /* 4-byte LE number */
 607                         PyObject *long_obj;
 608
 609                         if (!(long_obj = PyNumber_Long(val_obj))) {
 610                                 pytdbpack_bad_type(ch, "Long", val_obj);
 611                                 return NULL;
 612                         }
 613
 614                         pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
 615
 616                         Py_DECREF(long_obj);
 617                 }
 618                 else if (ch == 'p') {
 619                         /* "Pointer" value -- in the subset of DCERPC used by Samba,
 620                            this is really just an "exists" or "does not exist"
 621                            flag. */
 622                         pack_uint32(PyObject_IsTrue(val_obj), &packed);
 623                 }
 624                 else if (ch == 'f' || ch == 'P') {
 625                         int size;
 626                         char *sval;
 627
 628                         size = PySequence_Length(val_obj);
 629                         if (size < 0)
 630                                 return NULL;
 631                         sval = PyString_AsString(val_obj);
 632                         if (!sval)
 633                                 return NULL;
 634                         pack_bytes(size+1, sval, &packed); /* include nul */
 635                 }
 636                 else if (ch == 'B') {
 637                         long size;
 638                         char *sval;
 639
 640                         size = PyInt_AsLong(val_obj);
 641                         pack_uint32(size, &packed);
 642
 643                         val_obj = PySequence_GetItem(val_seq, ++i);
 644                         if (!val_obj)
 645                                 return NULL;
 646
 647                         sval = PyString_AsString(val_obj);
 648                         if (!sval)
 649                                 return NULL;
 650
 651                         pack_bytes(size, sval, &packed); /* do not include nul */
 652                 }
 653                 else {
 654                         /* this ought to be caught while calculating the length, but
 655                            just in case. */
 656                         PyErr_Format(PyExc_ValueError,
 657                                      "%s: format character '%c' is not supported",
 658                                      __FUNCTION__, ch);
 659
 660                         return NULL;
 661                 }
 662         }
 663
 664         return Py_None;
 665 }
 666
 667
 668
 669 static PyMethodDef pytdbpack_methods[] = {
 670         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 671         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 672 };
 673
 674 DL_EXPORT(void)
 675 inittdbpack(void)
 676 {
 677         Py_InitModule3("tdbpack", pytdbpack_methods,
 678                        (char *) pytdbpack_docstring);
 679 }