source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char,
  34                                       char **pbuf,
  35                                       int *plen);
  36 static int
  37 pytdbpack_calc_item_len(char format_ch,
  38                         PyObject *val_obj);
  39
  40 static PyObject *pytdbpack_pack_data(const char *format_str,
  41                                      PyObject *val_seq,
  42                                      unsigned char *buf);
  43
  44
  45
  46 static const char * pytdbpack_docstring =
  47 "Convert between Python values and Samba binary encodings.
  48
  49 This module is conceptually similar to the standard 'struct' module, but it
  50 uses both a different binary format and a different description string.
  51
  52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  53 little-endian, unpadded, non-self-describing binary format.  It is intended
  54 that these functions be as similar as possible to the routines in Samba's
  55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  56
  57 Python strings are used to specify the format of data to be packed or
  58 unpacked.
  59
  60 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  61 module must make appropriate translations if necessary, typically to and from
  62 Unicode objects.
  63
  64 tdbpack format strings:
  65
  66     'f':  NULL-terminated string in DOS codepage
  67
  68     'P':  same as 'f'
  69
  70     'd':  4 byte little-endian number
  71
  72     'w':  2 byte little-endian number
  73
  74     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  75           really just an \"exists\" or \"does not exist\" flag.  The boolean
  76           value of the Python object is used.
  77
  78     'B': 4-byte LE length, followed by that many bytes of binary data.
  79          Corresponds to a Python byte string of the appropriate length.
  80
  81     '$': Special flag indicating that the preceding format code should be
  82          repeated while data remains.  This is only supported for unpacking.
  83
  84     Every code corresponds to a single Python object, except 'B' which
  85     corresponds to two values (length and contents), and '$', which produces
  86     however many make sense.
  87 ";
  88
  89
  90 static char const pytdbpack_pack_doc[] =
  91 "pack(format, values) -> buffer
  92 Pack Python objects into Samba binary format according to format string.
  93
  94 arguments:
  95     format -- string of tdbpack format characters
  96     values -- sequence of value objects corresponding 1:1 to format characters
  97
  98 returns:
  99     buffer -- string containing packed data
 100
 101 raises:
 102     IndexError -- if there are too few values for the format
 103     ValueError -- if any of the format characters is illegal
 104     TypeError  -- if the format is not a string, or values is not a sequence,
 105         or any of the values is of the wrong type for the corresponding
 106         format character
 107
 108 notes:
 109     For historical reasons, it is not an error to pass more values than are consumed
 110     by the format.
 111 ";
 112
 113
 114 static char const pytdbpack_unpack_doc[] =
 115 "unpack(format, buffer) -> (values, rest)
 116 Unpack Samba binary data according to format string.
 117
 118 arguments:
 119     format -- string of tdbpack characters
 120     buffer -- string of packed binary data
 121
 122 returns:
 123     2-tuple of:
 124         values -- sequence of values corresponding 1:1 to format characters
 125         rest -- string containing data that was not decoded, or '' if the
 126             whole string was consumed
 127
 128 raises:
 129     IndexError -- if there is insufficient data in the buffer for the
 130         format (or if the data is corrupt and contains a variable-length
 131         field extending past the end)
 132     ValueError -- if any of the format characters is illegal
 133
 134 notes:
 135     Because unconsumed data is returned, you can feed it back in to the
 136     unpacker to extract further fields.  Alternatively, if you wish to modify
 137     some fields near the start of the data, you may be able to save time by
 138     only unpacking and repacking the necessary part.
 139 ";
 140
 141
 142
 143 /*
 144   Game plan is to first of all walk through the arguments and calculate the
 145   total length that will be required.  We allocate a Python string of that
 146   size, then walk through again and fill it in.
 147
 148   We just borrow references to all the passed arguments, since none of them
 149   need to be permanently stored.  We transfer ownership to the returned
 150   object.
 151  */
 152 static PyObject *
 153 pytdbpack_pack(PyObject *self,
 154                PyObject *args)
 155 {
 156         char *format_str;
 157         PyObject *val_seq, *fast_seq, *buf_str;
 158         int reqd_len;
 159         char *packed_buf;
 160
 161         /* TODO: Test passing wrong types or too many arguments */
 162         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 163                 return NULL;
 164
 165         /* Convert into a list or tuple (if not already one), so that we can
 166          * index more easily. */
 167         fast_seq = PySequence_Fast(val_seq,
 168                                    __FUNCTION__ ": argument 2 must be sequence");
 169         if (!fast_seq)
 170                 return NULL;
 171
 172         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 173         if (reqd_len == -1)     /* exception was thrown */
 174                 return NULL;
 175
 176         /* Allocate space.
 177
 178            This design causes an unnecessary copying of the data when Python
 179            constructs an object, and that might possibly be avoided by using a
 180            Buffer object of some kind instead.  I'm not doing that for now
 181            though.  */
 182         packed_buf = malloc(reqd_len);
 183         if (!packed_buf) {
 184                 PyErr_Format(PyExc_MemoryError,
 185                              "%s: couldn't allocate %d bytes for packed buffer",
 186                              __FUNCTION__, reqd_len);
 187                 return NULL;
 188         }
 189
 190         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 191                 free(packed_buf);
 192                 return NULL;
 193         }
 194
 195         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 196         free(packed_buf);       /* get rid of tmp buf */
 197
 198         return buf_str;
 199 }
 200
 201
 202
 203 static PyObject *
 204 pytdbpack_unpack(PyObject *self,
 205                  PyObject *args)
 206 {
 207         char *format_str, *packed_str, *ppacked;
 208         PyObject *val_list = NULL, *ret_tuple = NULL;
 209         PyObject *rest_string = NULL;
 210         int format_len, packed_len;
 211         int i;
 212         char last_format = '#';
 213
 214         /* get arguments */
 215         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 216                 return NULL;
 217
 218         format_len = strlen(format_str);
 219
 220         /* allocate list to hold results */
 221         val_list = PyList_New(format_len);
 222         if (!val_list)
 223                 goto failed;
 224         ret_tuple = PyTuple_New(2);
 225         if (!ret_tuple)
 226                 goto failed;
 227
 228         /* For every object, unpack.  */
 229         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 230                 PyObject *val_obj;
 231                 char format;
 232
 233                 format = format_str[i];
 234                 if (format == '$') {
 235                         if (i == 0) {
 236                                 PyErr_Format(PyExc_ValueError,
 237                                              "%s: '$' may not be first character in format",
 238                                              __FUNCTION__);
 239                                 goto failed;
 240                         }
 241                         else {
 242                                 format = last_format; /* repeat */
 243                         }
 244                 }
 245
 246                 val_obj = pytdbpack_unpack_item(format,
 247                                                 &ppacked,
 248                                                 &packed_len);
 249                 if (!val_obj)
 250                         goto failed;
 251
 252                 PyList_SET_ITEM(val_list, i, val_obj);
 253                 last_format = format;
 254         }
 255
 256         /* put leftovers in box for lunch tomorrow */
 257         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 258         if (!rest_string)
 259                 goto failed;
 260
 261         /* return (values, rest) tuple; give up references to them */
 262         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 263         val_list = NULL;
 264         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 265         val_list = NULL;
 266         return ret_tuple;
 267
 268   failed:
 269         /* handle failure: deallocate anything */
 270         Py_XDECREF(val_list);
 271         Py_XDECREF(ret_tuple);
 272         Py_XDECREF(rest_string);
 273         return NULL;
 274 }
 275
 276
 277 /*
 278   Internal routine that calculates how many bytes will be required to
 279   encode the values in the format.
 280
 281   Also checks that the value list is the right size for the format list.
 282
 283   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 284   which case a Python exception has been raised.
 285
 286   Arguments:
 287
 288     val_seq: a Fast Sequence (list or tuple), being all the values
 289 */
 290 static int
 291 pytdbpack_calc_reqd_len(char *format_str,
 292                         PyObject *val_seq)
 293 {
 294         int len = 0;
 295         char *p;
 296         int val_i;
 297         int val_len;
 298
 299         val_len = PySequence_Length(val_seq);
 300         if (val_len == -1)
 301                 return -1;
 302
 303         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 304                 char ch = *p;
 305                 PyObject *val_obj;
 306                 int item_len;
 307
 308                 if (val_i >= val_len) {
 309                         PyErr_Format(PyExc_IndexError,
 310                                      "samba.tdbpack.pack: value list is too short for format string");
 311                         return -1;
 312                 }
 313
 314                 /* borrow a reference to the item */
 315                 val_obj = PySequence_GetItem(val_seq, val_i);
 316                 if (!val_obj)
 317                         return -1;
 318
 319                 item_len = pytdbpack_calc_item_len(ch, val_obj);
 320                 if (item_len == -1)
 321                         return -1;
 322                 else
 323                         len += item_len;
 324         }
 325
 326         if (val_i != val_len) {
 327                 PyErr_Format(PyExc_IndexError,
 328                              "%s: value list is wrong length for format string",
 329                              __FUNCTION__);
 330                 return -1;
 331         }
 332
 333         return len;
 334 }
 335
 336
 337 static PyObject *pytdbpack_bad_type(char ch,
 338                                     const char *expected,
 339                                     PyObject *val_obj)
 340 {
 341         PyObject *r = PyObject_Repr(val_obj);
 342         if (!r)
 343                 return NULL;
 344         PyErr_Format(PyExc_TypeError,
 345                      "tdbpack: format '%c' requires %s, not %s",
 346                      ch, expected, PyString_AS_STRING(r));
 347         Py_DECREF(r);
 348         return val_obj;
 349 }
 350
 351
 352 /*
 353  * Calculate the number of bytes required to pack a single value.  While doing
 354  * this, also conduct some initial checks that the argument types are
 355  * reasonable.
 356  *
 357  * Returns -1 on exception.
 358  */
 359 static int
 360 pytdbpack_calc_item_len(char ch,
 361                         PyObject *val_obj)
 362 {
 363         if (ch == 'd' || ch == 'w') {
 364                 if (!PyInt_Check(val_obj)) {
 365                         pytdbpack_bad_type(ch, "Int", val_obj);
 366                         return -1;
 367                 }
 368                 if (ch == 'w')
 369                         return 2;
 370                 else
 371                         return 4;
 372         } else if (ch == 'p') {
 373                 return 4;
 374         }
 375         else if (ch == 'f' || ch == 'P' || ch == 'B') {
 376                 /* nul-terminated 8-bit string */
 377                 if (!PyString_Check(val_obj)) {
 378                         pytdbpack_bad_type(ch, "String", val_obj);
 379                         return -1;
 380                 }
 381
 382                 if (ch == 'B') {
 383                         /* byte buffer; just use Python string's length, plus
 384                            a preceding word */
 385                         return 4 + PyString_GET_SIZE(val_obj);
 386                 }
 387                 else {
 388                         /* one nul character */
 389                         return 1 + PyString_GET_SIZE(val_obj);
 390                 }
 391         }
 392         else {
 393                 PyErr_Format(PyExc_ValueError,
 394                              "tdbpack: format character '%c' is not supported",
 395                              ch);
 396
 397                 return -1;
 398         }
 399 }
 400
 401
 402 /*
 403   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 404   but I don't know of one in plain libc, and it's probably not a big deal.  I
 405   realize this is kind of dumb because we'll almost always be on x86, but
 406   being safe is important.
 407 */
 408 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
 409 {
 410         (*pbuf)[0] =         val_long & 0xff;
 411         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 412         (*pbuf)[2] = (val_long >> 16) & 0xff;
 413         (*pbuf)[3] = (val_long >> 24) & 0xff;
 414         (*pbuf) += 4;
 415 }
 416
 417
 418 static void pack_bytes(long len, const char *from,
 419                        unsigned char **pbuf)
 420 {
 421         memcpy(*pbuf, from, len);
 422         (*pbuf) += len;
 423 }
 424
 425
 426 static void
 427 unpack_err_too_short(void)
 428 {
 429         PyErr_Format(PyExc_IndexError,
 430                      __FUNCTION__ ": data too short for unpack format");
 431 }
 432
 433
 434 static PyObject *
 435 unpack_int32(char **pbuf, int *plen)
 436 {
 437         long v;
 438         unsigned char *b;
 439
 440         if (*plen < 4) {
 441                 unpack_err_too_short();
 442                 return NULL;
 443         }
 444
 445         b = *pbuf;
 446         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 447
 448         (*pbuf) += 4;
 449         (*plen) -= 4;
 450
 451         return PyInt_FromLong(v);
 452 }
 453
 454
 455 static PyObject *unpack_int16(char **pbuf, int *plen)
 456 {
 457         long v;
 458         unsigned char *b;
 459
 460         if (*plen < 2) {
 461                 unpack_err_too_short();
 462                 return NULL;
 463         }
 464
 465         b = *pbuf;
 466         v = b[0] | b[1]<<8;
 467
 468         (*pbuf) += 2;
 469         (*plen) -= 2;
 470
 471         return PyInt_FromLong(v);
 472 }
 473
 474
 475 static PyObject *
 476 unpack_string(char **pbuf, int *plen)
 477 {
 478         int len;
 479         char *nul_ptr, *start;
 480
 481         start = *pbuf;
 482
 483         nul_ptr = memchr(start, '\0', *plen);
 484         if (!nul_ptr) {
 485                 unpack_err_too_short();
 486                 return NULL;
 487         }
 488
 489         len = nul_ptr - start;
 490
 491         *pbuf += len + 1;       /* skip \0 */
 492         *plen -= len + 1;
 493
 494         return PyString_FromStringAndSize(start, len);
 495 }
 496
 497
 498 static PyObject *
 499 unpack_buffer(char **pbuf, int *plen)
 500 {
 501         /* first get 32-bit len */
 502         long slen;
 503         unsigned char *b;
 504         unsigned char *start;
 505
 506         if (*plen < 4) {
 507                 unpack_err_too_short();
 508                 return NULL;
 509         }
 510
 511         b = *pbuf;
 512         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 513
 514         if (slen < 0) { /* surely you jest */
 515                 PyErr_Format(PyExc_ValueError,
 516                              __FUNCTION__ ": buffer seems to have negative length");
 517                 return NULL;
 518         }
 519
 520         (*pbuf) += 4;
 521         (*plen) -= 4;
 522         start = *pbuf;
 523
 524         if (*plen < slen) {
 525                 PyErr_Format(PyExc_IndexError,
 526                              __FUNCTION__ ": not enough data to unpack buffer: "
 527                              "need %d bytes, have %d",
 528                              (int) slen, *plen);
 529                 return NULL;
 530         }
 531
 532         (*pbuf) += slen;
 533         (*plen) -= slen;
 534
 535         return PyString_FromStringAndSize(start, slen);
 536 }
 537
 538
 539 /* Unpack a single field from packed data, according to format character CH.
 540    Remaining data is at *PBUF, of *PLEN.
 541
 542    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 543    been consumed.
 544
 545    Returns a reference to the unpacked Python object, or NULL for failure.
 546 */
 547 static PyObject *pytdbpack_unpack_item(char ch,
 548                                        char **pbuf,
 549                                        int *plen)
 550 {
 551         if (ch == 'w') {        /* 16-bit int */
 552                 return unpack_int16(pbuf, plen);
 553         }
 554         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 555                 /* pointers can just come through as integers */
 556                 return unpack_int32(pbuf, plen);
 557         }
 558         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 559                 return unpack_string(pbuf, plen);
 560         }
 561         else if (ch == 'B') { /* length, buffer */
 562                 return unpack_buffer(pbuf, plen);
 563         }
 564         else {
 565                 PyErr_Format(PyExc_ValueError,
 566                              __FUNCTION__ ": format character '%c' is not supported",
 567                              ch);
 568
 569                 return NULL;
 570         }
 571 }
 572
 573
 574
 575 /*
 576   Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
 577   and advance the pointer.  Buffer length has been pre-calculated so we are
 578   sure that there is enough space.
 579
 580 */
 581 static PyObject *
 582 pytdbpack_pack_item(char ch,
 583                     PyObject *val_obj,
 584                     unsigned char **pbuf)
 585 {
 586         if (ch == 'w') {
 587                 unsigned long val_long = PyInt_AsLong(val_obj);
 588                 (*pbuf)[0] = val_long & 0xff;
 589                 (*pbuf)[1] = (val_long >> 8) & 0xff;
 590                 (*pbuf) += 2;
 591         }
 592         else if (ch == 'd') {
 593                 /* 4-byte LE number */
 594                 pack_int32(PyInt_AsLong(val_obj), pbuf);
 595         }
 596         else if (ch == 'p') {
 597                 /* "Pointer" value -- in the subset of DCERPC used by Samba,
 598                    this is really just an "exists" or "does not exist"
 599                    flag. */
 600                 pack_int32(PyObject_IsTrue(val_obj), pbuf);
 601         }
 602         else if (ch == 'f' || ch == 'P') {
 603                 int size;
 604                 char *sval;
 605
 606                 size = PyString_GET_SIZE(val_obj);
 607                 sval = PyString_AS_STRING(val_obj);
 608                 pack_bytes(size+1, sval, pbuf); /* include nul */
 609         }
 610         else if (ch == 'B') {
 611                 int size;
 612                 char *sval;
 613
 614                 size = PyString_GET_SIZE(val_obj);
 615                 pack_int32(size, pbuf);
 616                 sval = PyString_AS_STRING(val_obj);
 617                 pack_bytes(size, sval, pbuf); /* do not include nul */
 618         }
 619         else {
 620                 /* this ought to be caught while calculating the length, but
 621                    just in case. */
 622                 PyErr_Format(PyExc_ValueError,
 623                              "%s: format character '%c' is not supported",
 624                              __FUNCTION__, ch);
 625
 626                 return NULL;
 627         }
 628
 629         return Py_None;
 630 }
 631
 632
 633 /*
 634   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 635   PACKED_BUF.
 636
 637   The string has already been checked out, so we know that VAL_SEQ is large
 638   enough to hold the packed data, and that there are enough value items.
 639   (However, their types may not have been thoroughly checked yet.)
 640
 641   In addition, val_seq is a Python Fast sequence.
 642
 643   Returns NULL for error (with exception set), or None.
 644 */
 645 PyObject *
 646 pytdbpack_pack_data(const char *format_str,
 647                     PyObject *val_seq,
 648                     unsigned char *packed_buf)
 649 {
 650         int i;
 651
 652         for (i = 0; format_str[i]; i++) {
 653                 char ch = format_str[i];
 654                 PyObject *val_obj;
 655
 656                 /* borrow a reference to the item */
 657                 val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
 658                 if (!val_obj)
 659                         return NULL;
 660
 661                 if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
 662                         return NULL;
 663         }
 664
 665         return Py_None;
 666 }
 667
 668
 669
 670
 671
 672 static PyMethodDef pytdbpack_methods[] = {
 673         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 674         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 675 };
 676
 677 DL_EXPORT(void)
 678 inittdbpack(void)
 679 {
 680         Py_InitModule3("tdbpack", pytdbpack_methods,
 681                        (char *) pytdbpack_docstring);
 682 }