source/python/py_tdbpack.c

   1 /* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
   2
   3    Python wrapper for Samba tdb pack/unpack functions
   4    Copyright (C) Martin Pool 2002
   5
   6
   7    NOTE PYTHON STYLE GUIDE
   8    http://www.python.org/peps/pep-0007.html
   9
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2 of the License, or
  14    (at your option) any later version.
  15
  16    This program is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19    GNU General Public License for more details.
  20
  21    You should have received a copy of the GNU General Public License
  22    along with this program; if not, write to the Free Software
  23    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  24 */
  25
  26
  27
  28 #include "Python.h"
  29
  30 static int pytdbpack_calc_reqd_len(char *format_str,
  31                                    PyObject *val_seq);
  32
  33 static PyObject *pytdbpack_unpack_item(char,
  34                                       char **pbuf,
  35                                       int *plen);
  36 static int
  37 pytdbpack_calc_item_len(char format_ch,
  38                         PyObject *val_obj);
  39
  40 static PyObject *pytdbpack_pack_data(const char *format_str,
  41                                      PyObject *val_seq,
  42                                      unsigned char *buf);
  43
  44
  45
  46 static const char * pytdbpack_docstring =
  47 "Convert between Python values and Samba binary encodings.
  48
  49 This module is conceptually similar to the standard 'struct' module, but it
  50 uses both a different binary format and a different description string.
  51
  52 Samba's encoding is based on that used inside DCE-RPC and SMB: a
  53 little-endian, unpadded, non-self-describing binary format.  It is intended
  54 that these functions be as similar as possible to the routines in Samba's
  55 tdb/tdbutil module, with appropriate adjustments for Python datatypes.
  56
  57 Python strings are used to specify the format of data to be packed or
  58 unpacked.
  59
  60 Strings in TDBs are typically stored in DOS codepages.  The caller of this
  61 module must make appropriate translations if necessary, typically to and from
  62 Unicode objects.
  63
  64 tdbpack format strings:
  65
  66     'f':  NULL-terminated string in DOS codepage
  67
  68     'P':  same as 'f'
  69
  70     'd':  4 byte little-endian number
  71
  72     'w':  2 byte little-endian number
  73
  74     'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
  75           really just an \"exists\" or \"does not exist\" flag.  The boolean
  76           value of the Python object is used.
  77
  78     'B': 4-byte LE length, followed by that many bytes of binary data.
  79          Corresponds to a Python byte string of the appropriate length.
  80
  81     '$': Special flag indicating that the preceding format code should be
  82          repeated while data remains.  This is only supported for unpacking.
  83
  84     Every code corresponds to a single Python object, except 'B' which
  85     corresponds to two values (length and contents), and '$', which produces
  86     however many make sense.
  87 ";
  88
  89
  90 static char const pytdbpack_pack_doc[] =
  91 "pack(format, values) -> buffer
  92 Pack Python objects into Samba binary format according to format string.
  93
  94 arguments:
  95     format -- string of tdbpack format characters
  96     values -- sequence of value objects corresponding 1:1 to format characters
  97
  98 returns:
  99     buffer -- string containing packed data
 100
 101 raises:
 102     IndexError -- if there are not the same number of format codes as of
 103         values
 104     ValueError -- if any of the format characters is illegal
 105     TypeError  -- if the format is not a string, or values is not a sequence,
 106         or any of the values is of the wrong type for the corresponding
 107         format character
 108 ";
 109
 110
 111 static char const pytdbpack_unpack_doc[] =
 112 "unpack(format, buffer) -> (values, rest)
 113 Unpack Samba binary data according to format string.
 114
 115 arguments:
 116     format -- string of tdbpack characters
 117     buffer -- string of packed binary data
 118
 119 returns:
 120     2-tuple of:
 121         values -- sequence of values corresponding 1:1 to format characters
 122         rest -- string containing data that was not decoded, or '' if the
 123             whole string was consumed
 124
 125 raises:
 126     IndexError -- if there is insufficient data in the buffer for the
 127         format (or if the data is corrupt and contains a variable-length
 128         field extending past the end)
 129     ValueError -- if any of the format characters is illegal
 130
 131 notes:
 132     Because unconsumed data is returned, you can feed it back in to the
 133     unpacker to extract further fields.  Alternatively, if you wish to modify
 134     some fields near the start of the data, you may be able to save time by
 135     only unpacking and repacking the necessary part.
 136 ";
 137
 138
 139
 140 /*
 141   Game plan is to first of all walk through the arguments and calculate the
 142   total length that will be required.  We allocate a Python string of that
 143   size, then walk through again and fill it in.
 144
 145   We just borrow references to all the passed arguments, since none of them
 146   need to be permanently stored.  We transfer ownership to the returned
 147   object.
 148  */
 149 static PyObject *
 150 pytdbpack_pack(PyObject *self,
 151                PyObject *args)
 152 {
 153         char *format_str;
 154         PyObject *val_seq, *fast_seq, *buf_str;
 155         int reqd_len;
 156         char *packed_buf;
 157
 158         /* TODO: Test passing wrong types or too many arguments */
 159         if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 160                 return NULL;
 161
 162         /* Convert into a list or tuple (if not already one), so that we can
 163          * index more easily. */
 164         fast_seq = PySequence_Fast(val_seq,
 165                                    __FUNCTION__ ": argument 2 must be sequence");
 166         if (!fast_seq)
 167                 return NULL;
 168
 169         reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
 170         if (reqd_len == -1)     /* exception was thrown */
 171                 return NULL;
 172
 173         /* Allocate space.
 174
 175            This design causes an unnecessary copying of the data when Python
 176            constructs an object, and that might possibly be avoided by using a
 177            Buffer object of some kind instead.  I'm not doing that for now
 178            though.  */
 179         packed_buf = malloc(reqd_len);
 180         if (!packed_buf) {
 181                 PyErr_Format(PyExc_MemoryError,
 182                              "%s: couldn't allocate %d bytes for packed buffer",
 183                              __FUNCTION__, reqd_len);
 184                 return NULL;
 185         }
 186
 187         if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
 188                 free(packed_buf);
 189                 return NULL;
 190         }
 191
 192         buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
 193         free(packed_buf);       /* get rid of tmp buf */
 194
 195         return buf_str;
 196 }
 197
 198
 199
 200 static PyObject *
 201 pytdbpack_unpack(PyObject *self,
 202                  PyObject *args)
 203 {
 204         char *format_str, *packed_str, *ppacked;
 205         PyObject *val_list = NULL, *ret_tuple = NULL;
 206         PyObject *rest_string = NULL;
 207         int format_len, packed_len;
 208         int i;
 209         char last_format = '#';
 210
 211         /* get arguments */
 212         if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
 213                 return NULL;
 214
 215         format_len = strlen(format_str);
 216
 217         /* allocate list to hold results */
 218         val_list = PyList_New(format_len);
 219         if (!val_list)
 220                 goto failed;
 221         ret_tuple = PyTuple_New(2);
 222         if (!ret_tuple)
 223                 goto failed;
 224
 225         /* For every object, unpack.  */
 226         for (ppacked = packed_str, i = 0; i < format_len; i++) {
 227                 PyObject *val_obj;
 228                 char format;
 229
 230                 format = format_str[i];
 231                 if (format == '$') {
 232                         if (i == 0) {
 233                                 PyErr_Format(PyExc_ValueError,
 234                                              "%s: '$' may not be first character in format",
 235                                              __FUNCTION__);
 236                                 goto failed;
 237                         }
 238                         else {
 239                                 format = last_format; /* repeat */
 240                         }
 241                 }
 242
 243                 val_obj = pytdbpack_unpack_item(format,
 244                                                 &ppacked,
 245                                                 &packed_len);
 246                 if (!val_obj)
 247                         goto failed;
 248
 249                 PyList_SET_ITEM(val_list, i, val_obj);
 250                 last_format = format;
 251         }
 252
 253         /* put leftovers in box for lunch tomorrow */
 254         rest_string = PyString_FromStringAndSize(ppacked, packed_len);
 255         if (!rest_string)
 256                 goto failed;
 257
 258         /* return (values, rest) tuple; give up references to them */
 259         PyTuple_SET_ITEM(ret_tuple, 0, val_list);
 260         val_list = NULL;
 261         PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
 262         val_list = NULL;
 263         return ret_tuple;
 264
 265   failed:
 266         /* handle failure: deallocate anything */
 267         Py_XDECREF(val_list);
 268         Py_XDECREF(ret_tuple);
 269         Py_XDECREF(rest_string);
 270         return NULL;
 271 }
 272
 273
 274 /*
 275   Internal routine that calculates how many bytes will be required to
 276   encode the values in the format.
 277
 278   Also checks that the value list is the right size for the format list.
 279
 280   Returns number of bytes (may be 0), or -1 if there's something wrong, in
 281   which case a Python exception has been raised.
 282
 283   Arguments:
 284
 285     val_seq: a Fast Sequence (list or tuple), being all the values
 286 */
 287 static int
 288 pytdbpack_calc_reqd_len(char *format_str,
 289                         PyObject *val_seq)
 290 {
 291         int len = 0;
 292         char *p;
 293         int val_i;
 294         int val_len;
 295
 296         val_len = PySequence_Length(val_seq);
 297         if (val_len == -1)
 298                 return -1;
 299
 300         for (p = format_str, val_i = 0; *p; p++, val_i++) {
 301                 char ch = *p;
 302                 PyObject *val_obj;
 303                 int item_len;
 304
 305                 if (val_i >= val_len) {
 306                         PyErr_Format(PyExc_IndexError,
 307                                      "samba.tdbpack.pack: value list is too short for format string");
 308                         return -1;
 309                 }
 310
 311                 /* borrow a reference to the item */
 312                 val_obj = PySequence_GetItem(val_seq, val_i);
 313                 if (!val_obj)
 314                         return -1;
 315
 316                 item_len = pytdbpack_calc_item_len(ch, val_obj);
 317                 if (item_len == -1)
 318                         return -1;
 319                 else
 320                         len += item_len;
 321         }
 322
 323         if (val_i != val_len) {
 324                 PyErr_Format(PyExc_IndexError,
 325                              "%s: value list is wrong length for format string",
 326                              __FUNCTION__);
 327                 return -1;
 328         }
 329
 330         return len;
 331 }
 332
 333
 334 static PyObject *pytdbpack_bad_type(char ch,
 335                                     const char *expected,
 336                                     PyObject *val_obj)
 337 {
 338         PyObject *r = PyObject_Repr(val_obj);
 339         if (!r)
 340                 return NULL;
 341         PyErr_Format(PyExc_TypeError,
 342                      "tdbpack: format '%c' requires %s, not %s",
 343                      ch, expected, PyString_AS_STRING(r));
 344         Py_DECREF(r);
 345         return val_obj;
 346 }
 347
 348
 349 /*
 350  * Calculate the number of bytes required to pack a single value.  While doing
 351  * this, also conduct some initial checks that the argument types are
 352  * reasonable.
 353  *
 354  * Returns -1 on exception.
 355  */
 356 static int
 357 pytdbpack_calc_item_len(char ch,
 358                         PyObject *val_obj)
 359 {
 360         if (ch == 'd' || ch == 'w') {
 361                 if (!PyInt_Check(val_obj)) {
 362                         pytdbpack_bad_type(ch, "Int", val_obj);
 363                         return -1;
 364                 }
 365                 if (ch == 'w')
 366                         return 2;
 367                 else
 368                         return 4;
 369         } else if (ch == 'p') {
 370                 return 4;
 371         }
 372         else if (ch == 'f' || ch == 'P' || ch == 'B') {
 373                 /* nul-terminated 8-bit string */
 374                 if (!PyString_Check(val_obj)) {
 375                         pytdbpack_bad_type(ch, "String", val_obj);
 376                         return -1;
 377                 }
 378
 379                 if (ch == 'B') {
 380                         /* byte buffer; just use Python string's length, plus
 381                            a preceding word */
 382                         return 4 + PyString_GET_SIZE(val_obj);
 383                 }
 384                 else {
 385                         /* one nul character */
 386                         return 1 + PyString_GET_SIZE(val_obj);
 387                 }
 388         }
 389         else {
 390                 PyErr_Format(PyExc_ValueError,
 391                              "tdbpack: format character '%c' is not supported",
 392                              ch);
 393
 394                 return -1;
 395         }
 396 }
 397
 398
 399 /*
 400   XXX: glib and Samba have quicker macro for doing the endianness conversions,
 401   but I don't know of one in plain libc, and it's probably not a big deal.  I
 402   realize this is kind of dumb because we'll almost always be on x86, but
 403   being safe is important.
 404 */
 405 static void pack_int32(unsigned long val_long, unsigned char **pbuf)
 406 {
 407         (*pbuf)[0] =         val_long & 0xff;
 408         (*pbuf)[1] = (val_long >> 8)  & 0xff;
 409         (*pbuf)[2] = (val_long >> 16) & 0xff;
 410         (*pbuf)[3] = (val_long >> 24) & 0xff;
 411         (*pbuf) += 4;
 412 }
 413
 414
 415 static void pack_bytes(long len, const char *from,
 416                        unsigned char **pbuf)
 417 {
 418         memcpy(*pbuf, from, len);
 419         (*pbuf) += len;
 420 }
 421
 422
 423 static void
 424 unpack_err_too_short(void)
 425 {
 426         PyErr_Format(PyExc_IndexError,
 427                      __FUNCTION__ ": data too short for unpack format");
 428 }
 429
 430
 431 static PyObject *
 432 unpack_int32(char **pbuf, int *plen)
 433 {
 434         long v;
 435         unsigned char *b;
 436
 437         if (*plen < 4) {
 438                 unpack_err_too_short();
 439                 return NULL;
 440         }
 441
 442         b = *pbuf;
 443         v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 444
 445         (*pbuf) += 4;
 446         (*plen) -= 4;
 447
 448         return PyInt_FromLong(v);
 449 }
 450
 451
 452 static PyObject *unpack_int16(char **pbuf, int *plen)
 453 {
 454         long v;
 455         unsigned char *b;
 456
 457         if (*plen < 2) {
 458                 unpack_err_too_short();
 459                 return NULL;
 460         }
 461
 462         b = *pbuf;
 463         v = b[0] | b[1]<<8;
 464
 465         (*pbuf) += 2;
 466         (*plen) -= 2;
 467
 468         return PyInt_FromLong(v);
 469 }
 470
 471
 472 static PyObject *
 473 unpack_string(char **pbuf, int *plen)
 474 {
 475         int len;
 476         char *nul_ptr, *start;
 477
 478         start = *pbuf;
 479
 480         nul_ptr = memchr(start, '\0', *plen);
 481         if (!nul_ptr) {
 482                 unpack_err_too_short();
 483                 return NULL;
 484         }
 485
 486         len = nul_ptr - start;
 487
 488         *pbuf += len + 1;       /* skip \0 */
 489         *plen -= len + 1;
 490
 491         return PyString_FromStringAndSize(start, len);
 492 }
 493
 494
 495 static PyObject *
 496 unpack_buffer(char **pbuf, int *plen)
 497 {
 498         /* first get 32-bit len */
 499         long slen;
 500         unsigned char *b;
 501         unsigned char *start;
 502
 503         if (*plen < 4) {
 504                 unpack_err_too_short();
 505                 return NULL;
 506         }
 507
 508         b = *pbuf;
 509         slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
 510
 511         if (slen < 0) { /* surely you jest */
 512                 PyErr_Format(PyExc_ValueError,
 513                              __FUNCTION__ ": buffer seems to have negative length");
 514                 return NULL;
 515         }
 516
 517         (*pbuf) += 4;
 518         (*plen) -= 4;
 519         start = *pbuf;
 520
 521         if (*plen < slen) {
 522                 PyErr_Format(PyExc_IndexError,
 523                              __FUNCTION__ ": not enough data to unpack buffer: "
 524                              "need %d bytes, have %d",
 525                              (int) slen, *plen);
 526                 return NULL;
 527         }
 528
 529         (*pbuf) += slen;
 530         (*plen) -= slen;
 531
 532         return PyString_FromStringAndSize(start, slen);
 533 }
 534
 535
 536 /* Unpack a single field from packed data, according to format character CH.
 537    Remaining data is at *PBUF, of *PLEN.
 538
 539    *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
 540    been consumed.
 541
 542    Returns a reference to the unpacked Python object, or NULL for failure.
 543 */
 544 static PyObject *pytdbpack_unpack_item(char ch,
 545                                        char **pbuf,
 546                                        int *plen)
 547 {
 548         if (ch == 'w') {        /* 16-bit int */
 549                 return unpack_int16(pbuf, plen);
 550         }
 551         else if (ch == 'd' || ch == 'p') { /* 32-bit int */
 552                 /* pointers can just come through as integers */
 553                 return unpack_int32(pbuf, plen);
 554         }
 555         else if (ch == 'f' || ch == 'P') { /* nul-term string  */
 556                 return unpack_string(pbuf, plen);
 557         }
 558         else if (ch == 'B') { /* length, buffer */
 559                 return unpack_buffer(pbuf, plen);
 560         }
 561         else {
 562                 PyErr_Format(PyExc_ValueError,
 563                              __FUNCTION__ ": format character '%c' is not supported",
 564                              ch);
 565
 566                 return NULL;
 567         }
 568 }
 569
 570
 571
 572 /*
 573   Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
 574   and advance the pointer.  Buffer length has been pre-calculated so we are
 575   sure that there is enough space.
 576
 577 */
 578 static PyObject *
 579 pytdbpack_pack_item(char ch,
 580                     PyObject *val_obj,
 581                     unsigned char **pbuf)
 582 {
 583         if (ch == 'w') {
 584                 unsigned long val_long = PyInt_AsLong(val_obj);
 585                 (*pbuf)[0] = val_long & 0xff;
 586                 (*pbuf)[1] = (val_long >> 8) & 0xff;
 587                 (*pbuf) += 2;
 588         }
 589         else if (ch == 'd') {
 590                 /* 4-byte LE number */
 591                 pack_int32(PyInt_AsLong(val_obj), pbuf);
 592         }
 593         else if (ch == 'p') {
 594                 /* "Pointer" value -- in the subset of DCERPC used by Samba,
 595                    this is really just an "exists" or "does not exist"
 596                    flag. */
 597                 pack_int32(PyObject_IsTrue(val_obj), pbuf);
 598         }
 599         else if (ch == 'f' || ch == 'P') {
 600                 int size;
 601                 char *sval;
 602
 603                 size = PyString_GET_SIZE(val_obj);
 604                 sval = PyString_AS_STRING(val_obj);
 605                 pack_bytes(size+1, sval, pbuf); /* include nul */
 606         }
 607         else if (ch == 'B') {
 608                 int size;
 609                 char *sval;
 610
 611                 size = PyString_GET_SIZE(val_obj);
 612                 pack_int32(size, pbuf);
 613                 sval = PyString_AS_STRING(val_obj);
 614                 pack_bytes(size, sval, pbuf); /* do not include nul */
 615         }
 616         else {
 617                 /* this ought to be caught while calculating the length, but
 618                    just in case. */
 619                 PyErr_Format(PyExc_ValueError,
 620                              "%s: format character '%c' is not supported",
 621                              __FUNCTION__, ch);
 622
 623                 return NULL;
 624         }
 625
 626         return Py_None;
 627 }
 628
 629
 630 /*
 631   Pack data according to FORMAT_STR from the elements of VAL_SEQ into
 632   PACKED_BUF.
 633
 634   The string has already been checked out, so we know that VAL_SEQ is large
 635   enough to hold the packed data, and that there are enough value items.
 636   (However, their types may not have been thoroughly checked yet.)
 637
 638   In addition, val_seq is a Python Fast sequence.
 639
 640   Returns NULL for error (with exception set), or None.
 641 */
 642 PyObject *
 643 pytdbpack_pack_data(const char *format_str,
 644                     PyObject *val_seq,
 645                     unsigned char *packed_buf)
 646 {
 647         int i;
 648
 649         for (i = 0; format_str[i]; i++) {
 650                 char ch = format_str[i];
 651                 PyObject *val_obj;
 652
 653                 /* borrow a reference to the item */
 654                 val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
 655                 if (!val_obj)
 656                         return NULL;
 657
 658                 if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
 659                         return NULL;
 660         }
 661
 662         return Py_None;
 663 }
 664
 665
 666
 667
 668
 669 static PyMethodDef pytdbpack_methods[] = {
 670         { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
 671         { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 672 };
 673
 674 DL_EXPORT(void)
 675 inittdbpack(void)
 676 {
 677         Py_InitModule3("tdbpack", pytdbpack_methods,
 678                        (char *) pytdbpack_docstring);
 679 }