epan/tvbuff.c

   1 /* tvbuff.c
   2  *
   3  * Testy, Virtual(-izable) Buffer of guint8*'s
   4  *
   5  * "Testy" -- the buffer gets mad when an attempt to access data
   6  *              beyond the bounds of the buffer. An exception is thrown.
   7  *
   8  * "Virtual" -- the buffer can have its own data, can use a subset of
   9  *              the data of a backing tvbuff, or can be a composite of
  10  *              other tvbuffs.
  11  *
  12  * $Id$
  13  *
  14  * Copyright (c) 2000 by Gilbert Ramirez <gram@alumni.rice.edu>
  15  *
  16  * Code to convert IEEE floating point formats to native floating point
  17  * derived from code Copyright (c) Ashok Narayanan, 2000
  18  *
  19  * Wireshark - Network traffic analyzer
  20  * By Gerald Combs <gerald@wireshark.org>
  21  * Copyright 1998 Gerald Combs
  22  *
  23  * This program is free software; you can redistribute it and/or
  24  * modify it under the terms of the GNU General Public License
  25  * as published by the Free Software Foundation; either version 2
  26  * of the License, or (at your option) any later version.
  27  *
  28  * This program is distributed in the hope that it will be useful,
  29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  31  * GNU General Public License for more details.
  32  *
  33  * You should have received a copy of the GNU General Public License
  34  * along with this program; if not, write to the Free Software
  35  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  36  */
  37
  38 #include "config.h"
  39
  40 #include <string.h>
  41
  42 #include "wsutil/pint.h"
  43 #include "wsutil/sign_ext.h"
  44 #include "wsutil/unicode-utils.h"
  45 #include "tvbuff.h"
  46 #include "tvbuff-int.h"
  47 #include "strutil.h"
  48 #include "to_str.h"
  49 #include "charsets.h"
  50 #include "proto.h"      /* XXX - only used for DISSECTOR_ASSERT, probably a new header file? */
  51 #include "exceptions.h"
  52
  53 static guint64
  54 _tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits);
  55
  56 tvbuff_t *
  57 tvb_new(const struct tvb_ops *ops)
  58 {
  59         tvbuff_t *tvb;
  60         gsize     size = ops->tvb_size;
  61
  62         g_assert(size >= sizeof(*tvb));
  63
  64         tvb = (tvbuff_t *) g_slice_alloc(size);
  65
  66         tvb->next            = NULL;
  67         tvb->ops             = ops;
  68         tvb->initialized     = FALSE;
  69         tvb->flags           = 0;
  70         tvb->length          = 0;
  71         tvb->reported_length = 0;
  72         tvb->real_data       = NULL;
  73         tvb->raw_offset      = -1;
  74         tvb->ds_tvb          = NULL;
  75
  76         return tvb;
  77 }
  78
  79 static void
  80 tvb_free_internal(tvbuff_t *tvb)
  81 {
  82         gsize     size;
  83
  84         DISSECTOR_ASSERT(tvb);
  85
  86         if (tvb->ops->tvb_free)
  87                 tvb->ops->tvb_free(tvb);
  88
  89         size = tvb->ops->tvb_size;
  90
  91         g_slice_free1(size, tvb);
  92 }
  93
  94 /* XXX: just call tvb_free_chain();
  95  *      Not removed so that existing dissectors using tvb_free() need not be changed.
  96  *      I'd argue that existing calls to tvb_free() should have actually beeen
  97  *      calls to tvb_free_chain() although the calls were OK as long as no
  98  *      subsets, etc had been created on the tvb. */
  99 void
 100 tvb_free(tvbuff_t *tvb)
 101 {
 102         tvb_free_chain(tvb);
 103 }
 104
 105 void
 106 tvb_free_chain(tvbuff_t  *tvb)
 107 {
 108         tvbuff_t *next_tvb;
 109         DISSECTOR_ASSERT(tvb);
 110         while (tvb) {
 111                 next_tvb = tvb->next;
 112                 tvb_free_internal(tvb);
 113                 tvb  = next_tvb;
 114         }
 115 }
 116
 117 tvbuff_t *
 118 tvb_new_chain(tvbuff_t *parent, tvbuff_t *backing)
 119 {
 120         tvbuff_t *tvb = tvb_new_proxy(backing);
 121
 122         tvb_add_to_chain(parent, tvb);
 123         return tvb;
 124 }
 125
 126 void
 127 tvb_add_to_chain(tvbuff_t *parent, tvbuff_t *child)
 128 {
 129         tvbuff_t *tmp = child;
 130
 131         DISSECTOR_ASSERT(parent);
 132         DISSECTOR_ASSERT(child);
 133
 134         while (child) {
 135                 tmp   = child;
 136                 child = child->next;
 137
 138                 tmp->next    = parent->next;
 139                 parent->next = tmp;
 140         }
 141 }
 142
 143 /*
 144  * Check whether that offset goes more than one byte past the
 145  * end of the buffer.
 146  *
 147  * If not, return 0; otherwise, return exception
 148  */
 149 static inline int
 150 validate_offset(const tvbuff_t *tvb, const guint abs_offset)
 151 {
 152         if (G_LIKELY(abs_offset <= tvb->length))
 153                 return 0;
 154         else if (abs_offset <= tvb->reported_length)
 155                 return BoundsError;
 156         else if (tvb->flags & TVBUFF_FRAGMENT)
 157                 return FragmentBoundsError;
 158         else
 159                 return ReportedBoundsError;
 160 }
 161
 162 static int
 163 compute_offset(const tvbuff_t *tvb, const gint offset, guint *offset_ptr)
 164 {
 165         if (offset >= 0) {
 166                 /* Positive offset - relative to the beginning of the packet. */
 167                 if ((guint) offset <= tvb->length) {
 168                         *offset_ptr = offset;
 169                 } else if ((guint) offset <= tvb->reported_length) {
 170                         return BoundsError;
 171                 } else if (tvb->flags & TVBUFF_FRAGMENT) {
 172                         return FragmentBoundsError;
 173                 } else {
 174                         return ReportedBoundsError;
 175                 }
 176         }
 177         else {
 178                 /* Negative offset - relative to the end of the packet. */
 179                 if ((guint) -offset <= tvb->length) {
 180                         *offset_ptr = tvb->length + offset;
 181                 } else if ((guint) -offset <= tvb->reported_length) {
 182                         return BoundsError;
 183                 } else if (tvb->flags & TVBUFF_FRAGMENT) {
 184                         return FragmentBoundsError;
 185                 } else {
 186                         return ReportedBoundsError;
 187                 }
 188         }
 189
 190         return 0;
 191 }
 192
 193 static int
 194 compute_offset_and_remaining(const tvbuff_t *tvb, const gint offset, guint *offset_ptr, guint *rem_len)
 195 {
 196         int exception;
 197
 198         exception = compute_offset(tvb, offset, offset_ptr);
 199         if (!exception)
 200                 *rem_len = tvb->length - *offset_ptr;
 201
 202         return exception;
 203 }
 204
 205 /* Computes the absolute offset and length based on a possibly-negative offset
 206  * and a length that is possible -1 (which means "to the end of the data").
 207  * Returns integer indicating whether the offset is in bounds (0) or
 208  * not (exception number). The integer ptrs are modified with the new offset and length.
 209  * No exception is thrown.
 210  *
 211  * XXX - we return success (0), if the offset is positive and right
 212  * after the end of the tvbuff (i.e., equal to the length).  We do this
 213  * so that a dissector constructing a subset tvbuff for the next protocol
 214  * will get a zero-length tvbuff, not an exception, if there's no data
 215  * left for the next protocol - we want the next protocol to be the one
 216  * that gets an exception, so the error is reported as an error in that
 217  * protocol rather than the containing protocol.  */
 218 static int
 219 check_offset_length_no_exception(const tvbuff_t *tvb,
 220                                  const gint offset, gint const length_val,
 221                                  guint *offset_ptr, guint *length_ptr)
 222 {
 223         guint end_offset;
 224         int exception;
 225
 226         DISSECTOR_ASSERT(offset_ptr);
 227         DISSECTOR_ASSERT(length_ptr);
 228
 229         /* Compute the offset */
 230         exception = compute_offset(tvb, offset, offset_ptr);
 231         if (exception)
 232                 return exception;
 233
 234         if (length_val < -1) {
 235                 /* XXX - ReportedBoundsError? */
 236                 return BoundsError;
 237         }
 238
 239         /* Compute the length */
 240         if (length_val == -1)
 241                 *length_ptr = tvb->length - *offset_ptr;
 242         else
 243                 *length_ptr = length_val;
 244
 245         /*
 246          * Compute the offset of the first byte past the length.
 247          */
 248         end_offset = *offset_ptr + *length_ptr;
 249
 250         /*
 251          * Check for an overflow
 252          */
 253         if (end_offset < *offset_ptr)
 254                 return BoundsError;
 255
 256         return validate_offset(tvb, end_offset);
 257 }
 258
 259 /* Checks (+/-) offset and length and throws an exception if
 260  * either is out of bounds. Sets integer ptrs to the new offset
 261  * and length. */
 262 static void
 263 check_offset_length(const tvbuff_t *tvb,
 264                     const gint offset, gint const length_val,
 265                     guint *offset_ptr, guint *length_ptr)
 266 {
 267         int exception;
 268
 269         exception = check_offset_length_no_exception(tvb, offset, length_val, offset_ptr, length_ptr);
 270         if (exception)
 271                 THROW(exception);
 272 }
 273
 274 void
 275 tvb_check_offset_length(const tvbuff_t *tvb,
 276                         const gint offset, gint const length_val,
 277                         guint *offset_ptr, guint *length_ptr)
 278 {
 279         check_offset_length(tvb, offset, length_val, offset_ptr, length_ptr);
 280 }
 281
 282 static const unsigned char left_aligned_bitmask[] = {
 283         0xff,
 284         0x80,
 285         0xc0,
 286         0xe0,
 287         0xf0,
 288         0xf8,
 289         0xfc,
 290         0xfe
 291 };
 292
 293 tvbuff_t *
 294 tvb_new_octet_aligned(tvbuff_t *tvb, guint32 bit_offset, gint32 no_of_bits)
 295 {
 296         tvbuff_t     *sub_tvb = NULL;
 297         guint32       byte_offset;
 298         gint32        datalen, i;
 299         guint8        left, right, remaining_bits, *buf;
 300         const guint8 *data;
 301
 302         byte_offset = bit_offset >> 3;
 303         left = bit_offset % 8; /* for left-shifting */
 304         right = 8 - left; /* for right-shifting */
 305
 306         if (no_of_bits == -1) {
 307                 datalen = tvb_length_remaining(tvb, byte_offset);
 308                 remaining_bits = 0;
 309         } else {
 310                 datalen = no_of_bits >> 3;
 311                 remaining_bits = no_of_bits % 8;
 312                 if (remaining_bits) {
 313                         datalen++;
 314                 }
 315         }
 316
 317         /* already aligned -> shortcut */
 318         if ((left == 0) && (remaining_bits == 0)) {
 319                 return tvb_new_subset(tvb, byte_offset, datalen, -1);
 320         }
 321
 322         DISSECTOR_ASSERT(datalen>0);
 323
 324         /* if at least one trailing byte is available, we must use the content
 325         * of that byte for the last shift (i.e. tvb_get_ptr() must use datalen + 1
 326         * if non extra byte is available, the last shifted byte requires
 327         * special treatment
 328         */
 329         if (tvb_length_remaining(tvb, byte_offset) > datalen) {
 330                 data = tvb_get_ptr(tvb, byte_offset, datalen + 1);
 331
 332                 /* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
 333                 buf = (guint8 *)g_malloc(datalen);
 334
 335                 /* shift tvb data bit_offset bits to the left */
 336                 for (i = 0; i < datalen; i++)
 337                         buf[i] = (data[i] << left) | (data[i+1] >> right);
 338         } else {
 339                 data = tvb_get_ptr(tvb, byte_offset, datalen);
 340
 341                 /* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
 342                 buf = (guint8 *)g_malloc(datalen);
 343
 344                 /* shift tvb data bit_offset bits to the left */
 345                 for (i = 0; i < (datalen-1); i++)
 346                         buf[i] = (data[i] << left) | (data[i+1] >> right);
 347                 buf[datalen-1] = data[datalen-1] << left; /* set last octet */
 348         }
 349         buf[datalen-1] &= left_aligned_bitmask[remaining_bits];
 350
 351         sub_tvb = tvb_new_child_real_data(tvb, buf, datalen, datalen);
 352         tvb_set_free_cb(sub_tvb, g_free);
 353
 354         return sub_tvb;
 355 }
 356
 357 static tvbuff_t *
 358 tvb_generic_clone_offset_len(tvbuff_t *tvb, guint offset, guint len)
 359 {
 360         tvbuff_t *cloned_tvb;
 361
 362         guint8 *data = (guint8 *) g_malloc(len);
 363
 364         tvb_memcpy(tvb, data, offset, len);
 365
 366         cloned_tvb = tvb_new_real_data(data, len, len);
 367         tvb_set_free_cb(cloned_tvb, g_free);
 368
 369         return cloned_tvb;
 370 }
 371
 372 tvbuff_t *
 373 tvb_clone_offset_len(tvbuff_t *tvb, guint offset, guint len)
 374 {
 375         if (tvb->ops->tvb_clone) {
 376                 tvbuff_t *cloned_tvb;
 377
 378                 cloned_tvb = tvb->ops->tvb_clone(tvb, offset, len);
 379                 if (cloned_tvb)
 380                         return cloned_tvb;
 381         }
 382
 383         return tvb_generic_clone_offset_len(tvb, offset, len);
 384 }
 385
 386 tvbuff_t *
 387 tvb_clone(tvbuff_t *tvb)
 388 {
 389         return tvb_clone_offset_len(tvb, 0, tvb->length);
 390 }
 391
 392 guint
 393 tvb_length(const tvbuff_t *tvb)
 394 {
 395         DISSECTOR_ASSERT(tvb && tvb->initialized);
 396
 397         return tvb->length;
 398 }
 399
 400 gint
 401 tvb_length_remaining(const tvbuff_t *tvb, const gint offset)
 402 {
 403         guint abs_offset, rem_length;
 404         int exception;
 405
 406         DISSECTOR_ASSERT(tvb && tvb->initialized);
 407
 408         exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
 409         if (exception)
 410                 return 0;
 411
 412         return rem_length;
 413 }
 414
 415 guint
 416 tvb_ensure_length_remaining(const tvbuff_t *tvb, const gint offset)
 417 {
 418         guint abs_offset, rem_length;
 419         int   exception;
 420
 421         DISSECTOR_ASSERT(tvb && tvb->initialized);
 422
 423         exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
 424         if (exception)
 425                 THROW(exception);
 426
 427         if (rem_length == 0) {
 428                 /*
 429                  * This routine ensures there's at least one byte available.
 430                  * There aren't any bytes available, so throw the appropriate
 431                  * exception.
 432                  */
 433                 if (abs_offset >= tvb->reported_length) {
 434                         if (tvb->flags & TVBUFF_FRAGMENT) {
 435                                 THROW(FragmentBoundsError);
 436                         } else {
 437                                 THROW(ReportedBoundsError);
 438                         }
 439                 } else
 440                         THROW(BoundsError);
 441         }
 442         return rem_length;
 443 }
 444
 445
 446
 447
 448 /* Validates that 'length' bytes are available starting from
 449  * offset (pos/neg). Does not throw an exception. */
 450 gboolean
 451 tvb_bytes_exist(const tvbuff_t *tvb, const gint offset, const gint length)
 452 {
 453         guint abs_offset, abs_length;
 454         int exception;
 455
 456         DISSECTOR_ASSERT(tvb && tvb->initialized);
 457
 458         exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
 459         if (exception)
 460                 return FALSE;
 461
 462         return TRUE;
 463 }
 464
 465 /* Validates that 'length' bytes are available starting from
 466  * offset (pos/neg). Throws an exception if they aren't. */
 467 void
 468 tvb_ensure_bytes_exist(const tvbuff_t *tvb, const gint offset, const gint length)
 469 {
 470         guint real_offset, end_offset;
 471
 472         DISSECTOR_ASSERT(tvb && tvb->initialized);
 473
 474         /*
 475          * -1 doesn't mean "until end of buffer", as that's pointless
 476          * for this routine.  We must treat it as a Really Large Positive
 477          * Number, so that we throw an exception; we throw
 478          * ReportedBoundsError, as if it were past even the end of a
 479          * reassembled packet, and past the end of even the data we
 480          * didn't capture.
 481          *
 482          * We do the same with other negative lengths.
 483          */
 484         if (length < 0) {
 485                 THROW(ReportedBoundsError);
 486         }
 487
 488         /* XXX: Below this point could be replaced with a call to
 489          * check_offset_length with no functional change, however this is a
 490          * *very* hot path and check_offset_length is not well-optimized for
 491          * this case, so we eat some code duplication for a lot of speedup. */
 492
 493         if (offset >= 0) {
 494                 /* Positive offset - relative to the beginning of the packet. */
 495                 if ((guint) offset <= tvb->length) {
 496                         real_offset = offset;
 497                 } else if ((guint) offset <= tvb->reported_length) {
 498                         THROW(BoundsError);
 499                 } else if (tvb->flags & TVBUFF_FRAGMENT) {
 500                         THROW(FragmentBoundsError);
 501                 } else {
 502                         THROW(ReportedBoundsError);
 503                 }
 504         }
 505         else {
 506                 /* Negative offset - relative to the end of the packet. */
 507                 if ((guint) -offset <= tvb->length) {
 508                         real_offset = tvb->length + offset;
 509                 } else if ((guint) -offset <= tvb->reported_length) {
 510                         THROW(BoundsError);
 511                 } else if (tvb->flags & TVBUFF_FRAGMENT) {
 512                         THROW(FragmentBoundsError);
 513                 } else {
 514                         THROW(ReportedBoundsError);
 515                 }
 516         }
 517
 518         /*
 519          * Compute the offset of the first byte past the length.
 520          */
 521         end_offset = real_offset + length;
 522
 523         /*
 524          * Check for an overflow
 525          */
 526         if (end_offset < real_offset)
 527                 THROW(BoundsError);
 528
 529         if (G_LIKELY(end_offset <= tvb->length))
 530                 return;
 531         else if (end_offset <= tvb->reported_length)
 532                 THROW(BoundsError);
 533         else if (tvb->flags & TVBUFF_FRAGMENT)
 534                 THROW(FragmentBoundsError);
 535         else
 536                 THROW(ReportedBoundsError);
 537 }
 538
 539 gboolean
 540 tvb_offset_exists(const tvbuff_t *tvb, const gint offset)
 541 {
 542         guint abs_offset;
 543         int exception;
 544
 545         DISSECTOR_ASSERT(tvb && tvb->initialized);
 546
 547         exception = compute_offset(tvb, offset, &abs_offset);
 548         if (exception)
 549                 return FALSE;
 550
 551         /* compute_offset only throws an exception on >, not >= because of the
 552          * comment above check_offset_length_no_exception, but here we want the
 553          * opposite behaviour so we check ourselves... */
 554         if (abs_offset < tvb->length) {
 555                 return TRUE;
 556         }
 557         else {
 558                 return FALSE;
 559         }
 560 }
 561
 562 guint
 563 tvb_reported_length(const tvbuff_t *tvb)
 564 {
 565         DISSECTOR_ASSERT(tvb && tvb->initialized);
 566
 567         return tvb->reported_length;
 568 }
 569
 570 gint
 571 tvb_reported_length_remaining(const tvbuff_t *tvb, const gint offset)
 572 {
 573         guint abs_offset;
 574         int exception;
 575
 576         DISSECTOR_ASSERT(tvb && tvb->initialized);
 577
 578         exception = compute_offset(tvb, offset, &abs_offset);
 579         if (exception)
 580                 return 0;
 581
 582         if (tvb->reported_length >= abs_offset)
 583                 return tvb->reported_length - abs_offset;
 584         else
 585                 return 0;
 586 }
 587
 588 /* Set the reported length of a tvbuff to a given value; used for protocols
 589  * whose headers contain an explicit length and where the calling
 590  * dissector's payload may include padding as well as the packet for
 591  * this protocol.
 592  * Also adjusts the data length. */
 593 void
 594 tvb_set_reported_length(tvbuff_t *tvb, const guint reported_length)
 595 {
 596         DISSECTOR_ASSERT(tvb && tvb->initialized);
 597
 598         if (reported_length > tvb->reported_length)
 599                 THROW(ReportedBoundsError);
 600
 601         tvb->reported_length = reported_length;
 602         if (reported_length < tvb->length)
 603                 tvb->length = reported_length;
 604 }
 605
 606 guint
 607 tvb_offset_from_real_beginning_counter(const tvbuff_t *tvb, const guint counter)
 608 {
 609         if (tvb->ops->tvb_offset)
 610                 return tvb->ops->tvb_offset(tvb, counter);
 611
 612         DISSECTOR_ASSERT_NOT_REACHED();
 613         return 0;
 614 }
 615
 616 guint
 617 tvb_offset_from_real_beginning(const tvbuff_t *tvb)
 618 {
 619         return tvb_offset_from_real_beginning_counter(tvb, 0);
 620 }
 621
 622 static const guint8*
 623 ensure_contiguous_no_exception(tvbuff_t *tvb, const gint offset, const gint length, int *pexception)
 624 {
 625         guint abs_offset, abs_length;
 626         int exception;
 627
 628         exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
 629         if (exception) {
 630                 if (pexception)
 631                         *pexception = exception;
 632                 return NULL;
 633         }
 634
 635         /*
 636          * We know that all the data is present in the tvbuff, so
 637          * no exceptions should be thrown.
 638          */
 639         if (tvb->real_data)
 640                 return tvb->real_data + abs_offset;
 641
 642         if (tvb->ops->tvb_get_ptr)
 643                 return tvb->ops->tvb_get_ptr(tvb, abs_offset, abs_length);
 644
 645         DISSECTOR_ASSERT_NOT_REACHED();
 646         return NULL;
 647 }
 648
 649 static const guint8*
 650 ensure_contiguous(tvbuff_t *tvb, const gint offset, const gint length)
 651 {
 652         int           exception = 0;
 653         const guint8 *p;
 654
 655         p = ensure_contiguous_no_exception(tvb, offset, length, &exception);
 656         if (p == NULL) {
 657                 DISSECTOR_ASSERT(exception > 0);
 658                 THROW(exception);
 659         }
 660         return p;
 661 }
 662
 663 static const guint8*
 664 fast_ensure_contiguous(tvbuff_t *tvb, const gint offset, const guint length)
 665 {
 666         guint end_offset;
 667         guint u_offset;
 668
 669         DISSECTOR_ASSERT(tvb && tvb->initialized);
 670         /* We don't check for overflow in this fast path so we only handle simple types */
 671         DISSECTOR_ASSERT(length <= 8);
 672
 673         if (offset < 0 || !tvb->real_data) {
 674                 return ensure_contiguous(tvb, offset, length);
 675         }
 676
 677         u_offset = offset;
 678         end_offset = u_offset + length;
 679
 680         if (end_offset <= tvb->length) {
 681                 return tvb->real_data + u_offset;
 682         }
 683
 684         if (end_offset > tvb->reported_length) {
 685                 if (tvb->flags & TVBUFF_FRAGMENT) {
 686                         THROW(FragmentBoundsError);
 687                 } else {
 688                         THROW(ReportedBoundsError);
 689                 }
 690                 /* not reached */
 691         }
 692         THROW(BoundsError);
 693         /* not reached */
 694         return NULL;
 695 }
 696
 697 static const guint8*
 698 guint8_pbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles, guchar *found_needle)
 699 {
 700         gchar         tmp[256] = { 0 };
 701         const guint8 *haystack_end;
 702
 703         while (*needles)
 704                 tmp[*needles++] = 1;
 705
 706         haystack_end = haystack + haystacklen;
 707         while (haystack < haystack_end) {
 708                 if (tmp[*haystack]) {
 709                         if (found_needle)
 710                                 *found_needle = *haystack;
 711                         return haystack;
 712                 }
 713                 haystack++;
 714         }
 715
 716         return NULL;
 717 }
 718
 719
 720
 721 /************** ACCESSORS **************/
 722
 723 void *
 724 tvb_memcpy(tvbuff_t *tvb, void *target, const gint offset, size_t length)
 725 {
 726         guint   abs_offset, abs_length;
 727
 728         DISSECTOR_ASSERT(tvb && tvb->initialized);
 729
 730         /*
 731          * XXX - we should eliminate the "length = -1 means 'to the end
 732          * of the tvbuff'" convention, and use other means to achieve
 733          * that; this would let us eliminate a bunch of checks for
 734          * negative lengths in cases where the protocol has a 32-bit
 735          * length field.
 736          *
 737          * Allowing -1 but throwing an assertion on other negative
 738          * lengths is a bit more work with the length being a size_t;
 739          * instead, we check for a length <= 2^31-1.
 740          */
 741         DISSECTOR_ASSERT(length <= 0x7FFFFFFF);
 742         check_offset_length(tvb, offset, (gint) length, &abs_offset, &abs_length);
 743
 744         if (tvb->real_data) {
 745                 return memcpy(target, tvb->real_data + abs_offset, abs_length);
 746         }
 747
 748         if (tvb->ops->tvb_memcpy)
 749                 return tvb->ops->tvb_memcpy(tvb, target, abs_offset, abs_length);
 750
 751         /* XXX, fallback to slower method */
 752
 753         DISSECTOR_ASSERT_NOT_REACHED();
 754         return NULL;
 755 }
 756
 757
 758 /*
 759  * XXX - this doesn't treat a length of -1 as an error.
 760  * If it did, this could replace some code that calls
 761  * "tvb_ensure_bytes_exist()" and then allocates a buffer and copies
 762  * data to it.
 763  *
 764  * "composite_get_ptr()" depends on -1 not being
 765  * an error; does anything else depend on this routine treating -1 as
 766  * meaning "to the end of the buffer"?
 767  *
 768  * If scope is NULL, memory is allocated with g_malloc() and user must
 769  * explicitly free it with g_free().
 770  * If scope is not NULL, memory is allocated with the corresponding pool
 771  * lifetime.
 772  */
 773 void *
 774 tvb_memdup(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, size_t length)
 775 {
 776         guint   abs_offset, abs_length;
 777         void    *duped;
 778
 779         DISSECTOR_ASSERT(tvb && tvb->initialized);
 780
 781         check_offset_length(tvb, offset, (gint) length, &abs_offset, &abs_length);
 782
 783         duped = wmem_alloc(scope, abs_length);
 784         return tvb_memcpy(tvb, duped, abs_offset, abs_length);
 785 }
 786
 787
 788
 789 const guint8*
 790 tvb_get_ptr(tvbuff_t *tvb, const gint offset, const gint length)
 791 {
 792         return ensure_contiguous(tvb, offset, length);
 793 }
 794
 795 /* ---------------- */
 796 guint8
 797 tvb_get_guint8(tvbuff_t *tvb, const gint offset)
 798 {
 799         const guint8 *ptr;
 800
 801         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint8));
 802         return *ptr;
 803 }
 804
 805 guint16
 806 tvb_get_ntohs(tvbuff_t *tvb, const gint offset)
 807 {
 808         const guint8 *ptr;
 809
 810         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint16));
 811         return pntoh16(ptr);
 812 }
 813
 814 guint32
 815 tvb_get_ntoh24(tvbuff_t *tvb, const gint offset)
 816 {
 817         const guint8 *ptr;
 818
 819         ptr = fast_ensure_contiguous(tvb, offset, 3);
 820         return pntoh24(ptr);
 821 }
 822
 823 guint32
 824 tvb_get_ntohl(tvbuff_t *tvb, const gint offset)
 825 {
 826         const guint8 *ptr;
 827
 828         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint32));
 829         return pntoh32(ptr);
 830 }
 831
 832 guint64
 833 tvb_get_ntoh40(tvbuff_t *tvb, const gint offset)
 834 {
 835         const guint8 *ptr;
 836
 837         ptr = fast_ensure_contiguous(tvb, offset, 5);
 838         return pntoh40(ptr);
 839 }
 840
 841 gint64
 842 tvb_get_ntohi40(tvbuff_t *tvb, const gint offset)
 843 {
 844         guint64 ret;
 845
 846         ret = ws_sign_ext64(tvb_get_ntoh40(tvb, offset), 40);
 847
 848         return (gint64)ret;
 849 }
 850
 851 guint64
 852 tvb_get_ntoh48(tvbuff_t *tvb, const gint offset)
 853 {
 854         const guint8 *ptr;
 855
 856         ptr = fast_ensure_contiguous(tvb, offset, 6);
 857         return pntoh48(ptr);
 858 }
 859
 860 gint64
 861 tvb_get_ntohi48(tvbuff_t *tvb, const gint offset)
 862 {
 863         guint64 ret;
 864
 865         ret = ws_sign_ext64(tvb_get_ntoh48(tvb, offset), 48);
 866
 867         return (gint64)ret;
 868 }
 869
 870 guint64
 871 tvb_get_ntoh56(tvbuff_t *tvb, const gint offset)
 872 {
 873         const guint8 *ptr;
 874
 875         ptr = fast_ensure_contiguous(tvb, offset, 7);
 876         return pntoh56(ptr);
 877 }
 878
 879 gint64
 880 tvb_get_ntohi56(tvbuff_t *tvb, const gint offset)
 881 {
 882         guint64 ret;
 883
 884         ret = ws_sign_ext64(tvb_get_ntoh56(tvb, offset), 56);
 885
 886         return (gint64)ret;
 887 }
 888
 889 guint64
 890 tvb_get_ntoh64(tvbuff_t *tvb, const gint offset)
 891 {
 892         const guint8 *ptr;
 893
 894         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint64));
 895         return pntoh64(ptr);
 896 }
 897
 898 /*
 899  * Stuff for IEEE float handling on platforms that don't have IEEE
 900  * format as the native floating-point format.
 901  *
 902  * For now, we treat only the VAX as such a platform.
 903  *
 904  * XXX - other non-IEEE boxes that can run UNIX include some Crays,
 905  * and possibly other machines.
 906  *
 907  * It appears that the official Linux port to System/390 and
 908  * zArchitecture uses IEEE format floating point (not a
 909  * huge surprise).
 910  *
 911  * I don't know whether there are any other machines that
 912  * could run Wireshark and that don't use IEEE format.
 913  * As far as I know, all of the main commercial microprocessor
 914  * families on which OSes that support Wireshark can run
 915  * use IEEE format (x86, 68k, SPARC, MIPS, PA-RISC, Alpha,
 916  * IA-64, and so on).
 917  */
 918
 919 #if defined(vax)
 920
 921 #include <math.h>
 922
 923 /*
 924  * Single-precision.
 925  */
 926 #define IEEE_SP_NUMBER_WIDTH    32      /* bits in number */
 927 #define IEEE_SP_EXP_WIDTH       8       /* bits in exponent */
 928 #define IEEE_SP_MANTISSA_WIDTH  23      /* IEEE_SP_NUMBER_WIDTH - 1 - IEEE_SP_EXP_WIDTH */
 929
 930 #define IEEE_SP_SIGN_MASK       0x80000000
 931 #define IEEE_SP_EXPONENT_MASK   0x7F800000
 932 #define IEEE_SP_MANTISSA_MASK   0x007FFFFF
 933 #define IEEE_SP_INFINITY        IEEE_SP_EXPONENT_MASK
 934
 935 #define IEEE_SP_IMPLIED_BIT (1 << IEEE_SP_MANTISSA_WIDTH)
 936 #define IEEE_SP_INFINITE ((1 << IEEE_SP_EXP_WIDTH) - 1)
 937 #define IEEE_SP_BIAS ((1 << (IEEE_SP_EXP_WIDTH - 1)) - 1)
 938
 939 static int
 940 ieee_float_is_zero(const guint32 w)
 941 {
 942         return ((w & ~IEEE_SP_SIGN_MASK) == 0);
 943 }
 944
 945 static gfloat
 946 get_ieee_float(const guint32 w)
 947 {
 948         long sign;
 949         long exponent;
 950         long mantissa;
 951
 952         sign = w & IEEE_SP_SIGN_MASK;
 953         exponent = w & IEEE_SP_EXPONENT_MASK;
 954         mantissa = w & IEEE_SP_MANTISSA_MASK;
 955
 956         if (ieee_float_is_zero(w)) {
 957                 /* number is zero, unnormalized, or not-a-number */
 958                 return 0.0;
 959         }
 960 #if 0
 961         /*
 962          * XXX - how to handle this?
 963          */
 964         if (IEEE_SP_INFINITY == exponent) {
 965                 /*
 966                  * number is positive or negative infinity, or a special value
 967                  */
 968                 return (sign? MINUS_INFINITY: PLUS_INFINITY);
 969         }
 970 #endif
 971
 972         exponent = ((exponent >> IEEE_SP_MANTISSA_WIDTH) - IEEE_SP_BIAS) -
 973                 IEEE_SP_MANTISSA_WIDTH;
 974         mantissa |= IEEE_SP_IMPLIED_BIT;
 975
 976         if (sign)
 977                 return -mantissa * pow(2, exponent);
 978         else
 979                 return mantissa * pow(2, exponent);
 980 }
 981
 982 /*
 983  * Double-precision.
 984  * We assume that if you don't have IEEE floating-point, you have a
 985  * compiler that understands 64-bit integral quantities.
 986  */
 987 #define IEEE_DP_NUMBER_WIDTH    64      /* bits in number */
 988 #define IEEE_DP_EXP_WIDTH       11      /* bits in exponent */
 989 #define IEEE_DP_MANTISSA_WIDTH  52      /* IEEE_DP_NUMBER_WIDTH - 1 - IEEE_DP_EXP_WIDTH */
 990
 991 #define IEEE_DP_SIGN_MASK       G_GINT64_CONSTANT(0x8000000000000000)
 992 #define IEEE_DP_EXPONENT_MASK   G_GINT64_CONSTANT(0x7FF0000000000000)
 993 #define IEEE_DP_MANTISSA_MASK   G_GINT64_CONSTANT(0x000FFFFFFFFFFFFF)
 994 #define IEEE_DP_INFINITY        IEEE_DP_EXPONENT_MASK
 995
 996 #define IEEE_DP_IMPLIED_BIT (G_GINT64_CONSTANT(1) << IEEE_DP_MANTISSA_WIDTH)
 997 #define IEEE_DP_INFINITE ((1 << IEEE_DP_EXP_WIDTH) - 1)
 998 #define IEEE_DP_BIAS ((1 << (IEEE_DP_EXP_WIDTH - 1)) - 1)
 999
1000 static int
1001 ieee_double_is_zero(const guint64 w)
1002 {
1003         return ((w & ~IEEE_SP_SIGN_MASK) == 0);
1004 }
1005
1006 static gdouble
1007 get_ieee_double(const guint64 w)
1008 {
1009         gint64 sign;
1010         gint64 exponent;
1011         gint64 mantissa;
1012
1013         sign = w & IEEE_DP_SIGN_MASK;
1014         exponent = w & IEEE_DP_EXPONENT_MASK;
1015         mantissa = w & IEEE_DP_MANTISSA_MASK;
1016
1017         if (ieee_double_is_zero(w)) {
1018                 /* number is zero, unnormalized, or not-a-number */
1019                 return 0.0;
1020         }
1021 #if 0
1022         /*
1023          * XXX - how to handle this?
1024          */
1025         if (IEEE_DP_INFINITY == exponent) {
1026                 /*
1027                  * number is positive or negative infinity, or a special value
1028                  */
1029                 return (sign? MINUS_INFINITY: PLUS_INFINITY);
1030         }
1031 #endif
1032
1033         exponent = ((exponent >> IEEE_DP_MANTISSA_WIDTH) - IEEE_DP_BIAS) -
1034                 IEEE_DP_MANTISSA_WIDTH;
1035         mantissa |= IEEE_DP_IMPLIED_BIT;
1036
1037         if (sign)
1038                 return -mantissa * pow(2, exponent);
1039         else
1040                 return mantissa * pow(2, exponent);
1041 }
1042 #endif
1043
1044 /*
1045  * Fetches an IEEE single-precision floating-point number, in
1046  * big-endian form, and returns a "float".
1047  *
1048  * XXX - should this be "double", in case there are IEEE single-
1049  * precision numbers that won't fit in some platform's native
1050  * "float" format?
1051  */
1052 gfloat
1053 tvb_get_ntohieee_float(tvbuff_t *tvb, const int offset)
1054 {
1055 #if defined(vax)
1056         return get_ieee_float(tvb_get_ntohl(tvb, offset));
1057 #else
1058         union {
1059                 gfloat f;
1060                 guint32 w;
1061         } ieee_fp_union;
1062
1063         ieee_fp_union.w = tvb_get_ntohl(tvb, offset);
1064         return ieee_fp_union.f;
1065 #endif
1066 }
1067
1068 /*
1069  * Fetches an IEEE double-precision floating-point number, in
1070  * big-endian form, and returns a "double".
1071  */
1072 gdouble
1073 tvb_get_ntohieee_double(tvbuff_t *tvb, const int offset)
1074 {
1075 #if defined(vax)
1076         union {
1077                 guint32 w[2];
1078                 guint64 dw;
1079         } ieee_fp_union;
1080 #else
1081         union {
1082                 gdouble d;
1083                 guint32 w[2];
1084         } ieee_fp_union;
1085 #endif
1086
1087 #ifdef WORDS_BIGENDIAN
1088         ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset);
1089         ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset+4);
1090 #else
1091         ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset+4);
1092         ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset);
1093 #endif
1094 #if defined(vax)
1095         return get_ieee_double(ieee_fp_union.dw);
1096 #else
1097         return ieee_fp_union.d;
1098 #endif
1099 }
1100
1101 guint16
1102 tvb_get_letohs(tvbuff_t *tvb, const gint offset)
1103 {
1104         const guint8 *ptr;
1105
1106         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint16));
1107         return pletoh16(ptr);
1108 }
1109
1110 guint32
1111 tvb_get_letoh24(tvbuff_t *tvb, const gint offset)
1112 {
1113         const guint8 *ptr;
1114
1115         ptr = fast_ensure_contiguous(tvb, offset, 3);
1116         return pletoh24(ptr);
1117 }
1118
1119 guint32
1120 tvb_get_letohl(tvbuff_t *tvb, const gint offset)
1121 {
1122         const guint8 *ptr;
1123
1124         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint32));
1125         return pletoh32(ptr);
1126 }
1127
1128 guint64
1129 tvb_get_letoh40(tvbuff_t *tvb, const gint offset)
1130 {
1131         const guint8 *ptr;
1132
1133         ptr = fast_ensure_contiguous(tvb, offset, 5);
1134         return pletoh40(ptr);
1135 }
1136
1137 gint64
1138 tvb_get_letohi40(tvbuff_t *tvb, const gint offset)
1139 {
1140         guint64 ret;
1141
1142         ret = ws_sign_ext64(tvb_get_letoh40(tvb, offset), 40);
1143
1144         return (gint64)ret;
1145 }
1146
1147 guint64
1148 tvb_get_letoh48(tvbuff_t *tvb, const gint offset)
1149 {
1150         const guint8 *ptr;
1151
1152         ptr = fast_ensure_contiguous(tvb, offset, 6);
1153         return pletoh48(ptr);
1154 }
1155
1156 gint64
1157 tvb_get_letohi48(tvbuff_t *tvb, const gint offset)
1158 {
1159         guint64 ret;
1160
1161         ret = ws_sign_ext64(tvb_get_letoh48(tvb, offset), 48);
1162
1163         return (gint64)ret;
1164 }
1165
1166 guint64
1167 tvb_get_letoh56(tvbuff_t *tvb, const gint offset)
1168 {
1169         const guint8 *ptr;
1170
1171         ptr = fast_ensure_contiguous(tvb, offset, 7);
1172         return pletoh56(ptr);
1173 }
1174
1175 gint64
1176 tvb_get_letohi56(tvbuff_t *tvb, const gint offset)
1177 {
1178         guint64 ret;
1179
1180         ret = ws_sign_ext64(tvb_get_letoh56(tvb, offset), 56);
1181
1182         return (gint64)ret;
1183 }
1184
1185 guint64
1186 tvb_get_letoh64(tvbuff_t *tvb, const gint offset)
1187 {
1188         const guint8 *ptr;
1189
1190         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint64));
1191         return pletoh64(ptr);
1192 }
1193
1194 /*
1195  * Fetches an IEEE single-precision floating-point number, in
1196  * little-endian form, and returns a "float".
1197  *
1198  * XXX - should this be "double", in case there are IEEE single-
1199  * precision numbers that won't fit in some platform's native
1200  * "float" format?
1201  */
1202 gfloat
1203 tvb_get_letohieee_float(tvbuff_t *tvb, const int offset)
1204 {
1205 #if defined(vax)
1206         return get_ieee_float(tvb_get_letohl(tvb, offset));
1207 #else
1208         union {
1209                 gfloat f;
1210                 guint32 w;
1211         } ieee_fp_union;
1212
1213         ieee_fp_union.w = tvb_get_letohl(tvb, offset);
1214         return ieee_fp_union.f;
1215 #endif
1216 }
1217
1218 /*
1219  * Fetches an IEEE double-precision floating-point number, in
1220  * little-endian form, and returns a "double".
1221  */
1222 gdouble
1223 tvb_get_letohieee_double(tvbuff_t *tvb, const int offset)
1224 {
1225 #if defined(vax)
1226         union {
1227                 guint32 w[2];
1228                 guint64 dw;
1229         } ieee_fp_union;
1230 #else
1231         union {
1232                 gdouble d;
1233                 guint32 w[2];
1234         } ieee_fp_union;
1235 #endif
1236
1237 #ifdef WORDS_BIGENDIAN
1238         ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset+4);
1239         ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset);
1240 #else
1241         ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset);
1242         ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset+4);
1243 #endif
1244 #if defined(vax)
1245         return get_ieee_double(ieee_fp_union.dw);
1246 #else
1247         return ieee_fp_union.d;
1248 #endif
1249 }
1250
1251 /* Fetch an IPv4 address, in network byte order.
1252  * We do *not* convert them to host byte order; we leave them in
1253  * network byte order. */
1254 guint32
1255 tvb_get_ipv4(tvbuff_t *tvb, const gint offset)
1256 {
1257         const guint8 *ptr;
1258         guint32       addr;
1259
1260         ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint32));
1261         memcpy(&addr, ptr, sizeof addr);
1262         return addr;
1263 }
1264
1265 /* Fetch an IPv6 address. */
1266 void
1267 tvb_get_ipv6(tvbuff_t *tvb, const gint offset, struct e_in6_addr *addr)
1268 {
1269         const guint8 *ptr;
1270
1271         ptr = ensure_contiguous(tvb, offset, sizeof(*addr));
1272         memcpy(addr, ptr, sizeof *addr);
1273 }
1274
1275 /* Fetch a GUID. */
1276 void
1277 tvb_get_ntohguid(tvbuff_t *tvb, const gint offset, e_guid_t *guid)
1278 {
1279         const guint8 *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
1280
1281         guid->data1 = pntoh32(ptr + 0);
1282         guid->data2 = pntoh16(ptr + 4);
1283         guid->data3 = pntoh16(ptr + 6);
1284         memcpy(guid->data4, ptr + 8, sizeof guid->data4);
1285 }
1286
1287 void
1288 tvb_get_letohguid(tvbuff_t *tvb, const gint offset, e_guid_t *guid)
1289 {
1290         const guint8 *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
1291
1292         guid->data1 = pletoh32(ptr + 0);
1293         guid->data2 = pletoh16(ptr + 4);
1294         guid->data3 = pletoh16(ptr + 6);
1295         memcpy(guid->data4, ptr + 8, sizeof guid->data4);
1296 }
1297
1298 /*
1299  * NOTE: to support code written when proto_tree_add_item() took a
1300  * gboolean as its last argument, with FALSE meaning "big-endian"
1301  * and TRUE meaning "little-endian", we treat any non-zero value of
1302  * "representation" as meaning "little-endian".
1303  */
1304 void
1305 tvb_get_guid(tvbuff_t *tvb, const gint offset, e_guid_t *guid, const guint representation)
1306 {
1307         if (representation) {
1308                 tvb_get_letohguid(tvb, offset, guid);
1309         } else {
1310                 tvb_get_ntohguid(tvb, offset, guid);
1311         }
1312 }
1313
1314 static const guint8 bit_mask8[] = {
1315         0x00,
1316         0x01,
1317         0x03,
1318         0x07,
1319         0x0f,
1320         0x1f,
1321         0x3f,
1322         0x7f,
1323         0xff
1324 };
1325
1326 /* Get 1 - 8 bits */
1327 guint8
1328 tvb_get_bits8(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits)
1329 {
1330         return (guint8)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
1331 }
1332
1333 /* Get 9 - 16 bits */
1334 guint16
1335 tvb_get_bits16(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits,const guint encoding _U_)
1336 {
1337         /* note that encoding has no meaning here, as the tvb is considered to contain an octet array */
1338         return (guint16)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
1339 }
1340
1341 /* Get 1 - 32 bits */
1342 guint32
1343 tvb_get_bits32(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits, const guint encoding _U_)
1344 {
1345         /* note that encoding has no meaning here, as the tvb is considered to contain an octet array */
1346         return (guint32)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
1347 }
1348
1349 /* Get 1 - 64 bits */
1350 guint64
1351 tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits, const guint encoding _U_)
1352 {
1353         /* note that encoding has no meaning here, as the tvb is considered to contain an octet array */
1354         return _tvb_get_bits64(tvb, bit_offset, no_of_bits);
1355 }
1356 /*
1357  * This function will dissect a sequence of bits that does not need to be byte aligned; the bits
1358  * set will be shown in the tree as ..10 10.. and the integer value returned if return_value is set.
1359  * Offset should be given in bits from the start of the tvb.
1360  * The function tolerates requests for more than 64 bits, but will only return the least significant 64 bits.
1361  */
1362 static guint64
1363 _tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits)
1364 {
1365         guint64 value;
1366         guint octet_offset = bit_offset >> 3;
1367         guint8 required_bits_in_first_octet = 8 - (bit_offset % 8);
1368
1369         if(required_bits_in_first_octet > total_no_of_bits)
1370         {
1371                 /* the required bits don't extend to the end of the first octet */
1372                 guint8 right_shift = required_bits_in_first_octet - total_no_of_bits;
1373                 value = (tvb_get_guint8(tvb, octet_offset) >> right_shift) & bit_mask8[total_no_of_bits % 8];
1374         }
1375         else
1376         {
1377                 guint8 remaining_bit_length = total_no_of_bits;
1378
1379                 /* get the bits up to the first octet boundary */
1380                 value = 0;
1381                 required_bits_in_first_octet %= 8;
1382                 if(required_bits_in_first_octet != 0)
1383                 {
1384                         value = tvb_get_guint8(tvb, octet_offset) & bit_mask8[required_bits_in_first_octet];
1385                         remaining_bit_length -= required_bits_in_first_octet;
1386                         octet_offset ++;
1387                 }
1388                 /* take the biggest words, shorts or octets that we can */
1389                 while (remaining_bit_length > 7)
1390                 {
1391                         switch (remaining_bit_length >> 4)
1392                         {
1393                         case 0:
1394                                 /* 8 - 15 bits. (note that 0 - 7 would have dropped out of the while() loop) */
1395                                 value <<= 8;
1396                                 value += tvb_get_guint8(tvb, octet_offset);
1397                                 remaining_bit_length -= 8;
1398                                 octet_offset ++;
1399                                 break;
1400
1401                         case 1:
1402                                 /* 16 - 31 bits */
1403                                 value <<= 16;
1404                                 value += tvb_get_ntohs(tvb, octet_offset);
1405                                 remaining_bit_length -= 16;
1406                                 octet_offset += 2;
1407                                 break;
1408
1409                         case 2:
1410                         case 3:
1411                                 /* 32 - 63 bits */
1412                                 value <<= 32;
1413                                 value += tvb_get_ntohl(tvb, octet_offset);
1414                                 remaining_bit_length -= 32;
1415                                 octet_offset += 4;
1416                                 break;
1417
1418                         default:
1419                                 /* 64 bits (or more???) */
1420                                 value = tvb_get_ntoh64(tvb, octet_offset);
1421                                 remaining_bit_length -= 64;
1422                                 octet_offset += 8;
1423                                 break;
1424                         }
1425                 }
1426                 /* get bits from any partial octet at the tail */
1427                 if(remaining_bit_length)
1428                 {
1429                         value <<= remaining_bit_length;
1430                         value += (tvb_get_guint8(tvb, octet_offset) >> (8 - remaining_bit_length));
1431                 }
1432         }
1433         return value;
1434 }
1435 /* Get 1 - 32 bits (should be deprecated as same as tvb_get_bits32??) */
1436 guint32
1437 tvb_get_bits(tvbuff_t *tvb, const guint bit_offset, const gint no_of_bits, const guint encoding _U_)
1438 {
1439         /* note that encoding has no meaning here, as the tvb is considered to contain an octet array */
1440         return (guint32)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
1441 }
1442
1443 static gint
1444 tvb_find_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, guint8 needle)
1445 {
1446         const guint8 *ptr;
1447         const guint8 *result;
1448
1449         ptr = tvb_get_ptr(tvb, abs_offset, limit);
1450
1451         result = (const guint8 *) memchr(ptr, needle, limit);
1452         if (!result)
1453                 return -1;
1454
1455         return (gint) ((result - ptr) + abs_offset);
1456 }
1457
1458 /* Find first occurrence of needle in tvbuff, starting at offset. Searches
1459  * at most maxlength number of bytes; if maxlength is -1, searches to
1460  * end of tvbuff.
1461  * Returns the offset of the found needle, or -1 if not found.
1462  * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
1463  * in that case, -1 will be returned if the boundary is reached before
1464  * finding needle. */
1465 gint
1466 tvb_find_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const guint8 needle)
1467 {
1468         const guint8 *result;
1469         guint         abs_offset;
1470         guint         tvbufflen;
1471         guint         limit;
1472
1473         DISSECTOR_ASSERT(tvb && tvb->initialized);
1474
1475         check_offset_length(tvb, offset, -1, &abs_offset, &tvbufflen);
1476
1477         /* Only search to end of tvbuff, w/o throwing exception. */
1478         if (maxlength == -1) {
1479                 /* No maximum length specified; search to end of tvbuff. */
1480                 limit = tvbufflen;
1481         }
1482         else if (tvbufflen < (guint) maxlength) {
1483                 /* Maximum length goes past end of tvbuff; search to end
1484                    of tvbuff. */
1485                 limit = tvbufflen;
1486         }
1487         else {
1488                 /* Maximum length doesn't go past end of tvbuff; search
1489                    to that value. */
1490                 limit = maxlength;
1491         }
1492
1493         /* If we have real data, perform our search now. */
1494         if (tvb->real_data) {
1495                 result = (const guint8 *)memchr(tvb->real_data + abs_offset, needle, limit);
1496                 if (result == NULL) {
1497                         return -1;
1498                 }
1499                 else {
1500                         return (gint) (result - tvb->real_data);
1501                 }
1502         }
1503
1504         if (tvb->ops->tvb_find_guint8)
1505                 return tvb->ops->tvb_find_guint8(tvb, abs_offset, limit, needle);
1506
1507         return tvb_find_guint8_generic(tvb, offset, limit, needle);
1508 }
1509
1510 static gint
1511 tvb_pbrk_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, const guint8 *needles, guchar *found_needle)
1512 {
1513         const guint8 *ptr;
1514         const guint8 *result;
1515
1516         ptr = tvb_get_ptr(tvb, abs_offset, limit);
1517
1518         result = guint8_pbrk(ptr, limit, needles, found_needle);
1519         if (!result)
1520                 return -1;
1521
1522         return (gint) ((result - ptr) + abs_offset);
1523 }
1524
1525 /* Find first occurrence of any of the needles in tvbuff, starting at offset.
1526  * Searches at most maxlength number of bytes; if maxlength is -1, searches
1527  * to end of tvbuff.
1528  * Returns the offset of the found needle, or -1 if not found.
1529  * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
1530  * in that case, -1 will be returned if the boundary is reached before
1531  * finding needle. */
1532 gint
1533 tvb_pbrk_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const guint8 *needles, guchar *found_needle)
1534 {
1535         const guint8 *result;
1536         guint         abs_offset;
1537         guint         tvbufflen;
1538         guint         limit;
1539
1540         DISSECTOR_ASSERT(tvb && tvb->initialized);
1541
1542         check_offset_length(tvb, offset, -1, &abs_offset, &tvbufflen);
1543
1544         /* Only search to end of tvbuff, w/o throwing exception. */
1545         if (maxlength == -1) {
1546                 /* No maximum length specified; search to end of tvbuff. */
1547                 limit = tvbufflen;
1548         }
1549         else if (tvbufflen < (guint) maxlength) {
1550                 /* Maximum length goes past end of tvbuff; search to end
1551                    of tvbuff. */
1552                 limit = tvbufflen;
1553         }
1554         else {
1555                 /* Maximum length doesn't go past end of tvbuff; search
1556                    to that value. */
1557                 limit = maxlength;
1558         }
1559
1560         /* If we have real data, perform our search now. */
1561         if (tvb->real_data) {
1562                 result = guint8_pbrk(tvb->real_data + abs_offset, limit, needles, found_needle);
1563                 if (result == NULL) {
1564                         return -1;
1565                 }
1566                 else {
1567                         return (gint) (result - tvb->real_data);
1568                 }
1569         }
1570
1571         if (tvb->ops->tvb_pbrk_guint8)
1572                 return tvb->ops->tvb_pbrk_guint8(tvb, abs_offset, limit, needles, found_needle);
1573
1574         return tvb_pbrk_guint8_generic(tvb, abs_offset, limit, needles, found_needle);
1575 }
1576
1577 /* Find size of stringz (NUL-terminated string) by looking for terminating
1578  * NUL.  The size of the string includes the terminating NUL.
1579  *
1580  * If the NUL isn't found, it throws the appropriate exception.
1581  */
1582 guint
1583 tvb_strsize(tvbuff_t *tvb, const gint offset)
1584 {
1585         guint abs_offset, junk_length;
1586         gint  nul_offset;
1587
1588         DISSECTOR_ASSERT(tvb && tvb->initialized);
1589
1590         check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
1591         nul_offset = tvb_find_guint8(tvb, abs_offset, -1, 0);
1592         if (nul_offset == -1) {
1593                 /*
1594                  * OK, we hit the end of the tvbuff, so we should throw
1595                  * an exception.
1596                  *
1597                  * Did we hit the end of the captured data, or the end
1598                  * of the actual data?  If there's less captured data
1599                  * than actual data, we presumably hit the end of the
1600                  * captured data, otherwise we hit the end of the actual
1601                  * data.
1602                  */
1603                 if (tvb->length < tvb->reported_length) {
1604                         THROW(BoundsError);
1605                 } else {
1606                         if (tvb->flags & TVBUFF_FRAGMENT) {
1607                                 THROW(FragmentBoundsError);
1608                         } else {
1609                                 THROW(ReportedBoundsError);
1610                         }
1611                 }
1612         }
1613         return (nul_offset - abs_offset) + 1;
1614 }
1615
1616 /* UTF-16/UCS-2 version of tvb_strsize */
1617 /* Returns number of bytes including the (two-bytes) null terminator */
1618 guint
1619 tvb_unicode_strsize(tvbuff_t *tvb, const gint offset)
1620 {
1621         guint     i = 0;
1622         gunichar2 uchar;
1623
1624         DISSECTOR_ASSERT(tvb && tvb->initialized);
1625
1626         do {
1627                 /* Endianness doesn't matter when looking for null */
1628                 uchar = tvb_get_ntohs(tvb, offset + i);
1629                 i += 2;
1630         } while(uchar != 0);
1631
1632         return i;
1633 }
1634
1635 /* Find length of string by looking for end of string ('\0'), up to
1636  * 'maxlength' characters'; if 'maxlength' is -1, searches to end
1637  * of tvbuff.
1638  * Returns -1 if 'maxlength' reached before finding EOS. */
1639 gint
1640 tvb_strnlen(tvbuff_t *tvb, const gint offset, const guint maxlength)
1641 {
1642         gint  result_offset;
1643         guint abs_offset, junk_length;
1644
1645         DISSECTOR_ASSERT(tvb && tvb->initialized);
1646
1647         check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
1648
1649         result_offset = tvb_find_guint8(tvb, abs_offset, maxlength, 0);
1650
1651         if (result_offset == -1) {
1652                 return -1;
1653         }
1654         else {
1655                 return result_offset - abs_offset;
1656         }
1657 }
1658
1659 /*
1660  * Implement strneql etc
1661  */
1662
1663 /*
1664  * Call strncmp after checking if enough chars left, returning 0 if
1665  * it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
1666  */
1667 gint
1668 tvb_strneql(tvbuff_t *tvb, const gint offset, const gchar *str, const size_t size)
1669 {
1670         const guint8 *ptr;
1671
1672         ptr = ensure_contiguous_no_exception(tvb, offset, (gint)size, NULL);
1673
1674         if (ptr) {
1675                 int cmp = strncmp((const char *)ptr, str, size);
1676
1677                 /*
1678                  * Return 0 if equal, -1 otherwise.
1679                  */
1680                 return (cmp == 0 ? 0 : -1);
1681         } else {
1682                 /*
1683                  * Not enough characters in the tvbuff to match the
1684                  * string.
1685                  */
1686                 return -1;
1687         }
1688 }
1689
1690 /*
1691  * Call g_ascii_strncasecmp after checking if enough chars left, returning
1692  * 0 if it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
1693  */
1694 gint
1695 tvb_strncaseeql(tvbuff_t *tvb, const gint offset, const gchar *str, const size_t size)
1696 {
1697         const guint8 *ptr;
1698
1699         ptr = ensure_contiguous_no_exception(tvb, offset, (gint)size, NULL);
1700
1701         if (ptr) {
1702                 int cmp = g_ascii_strncasecmp((const char *)ptr, str, size);
1703
1704                 /*
1705                  * Return 0 if equal, -1 otherwise.
1706                  */
1707                 return (cmp == 0 ? 0 : -1);
1708         } else {
1709                 /*
1710                  * Not enough characters in the tvbuff to match the
1711                  * string.
1712                  */
1713                 return -1;
1714         }
1715 }
1716
1717 /*
1718  * Call memcmp after checking if enough chars left, returning 0 if
1719  * it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
1720  */
1721 gint
1722 tvb_memeql(tvbuff_t *tvb, const gint offset, const guint8 *str, size_t size)
1723 {
1724         const guint8 *ptr;
1725
1726         ptr = ensure_contiguous_no_exception(tvb, offset, (gint) size, NULL);
1727
1728         if (ptr) {
1729                 int cmp = memcmp(ptr, str, size);
1730
1731                 /*
1732                  * Return 0 if equal, -1 otherwise.
1733                  */
1734                 return (cmp == 0 ? 0 : -1);
1735         } else {
1736                 /*
1737                  * Not enough characters in the tvbuff to match the
1738                  * string.
1739                  */
1740                 return -1;
1741         }
1742 }
1743
1744 /*
1745  * Format the data in the tvb from offset for length ...
1746  */
1747 gchar *
1748 tvb_format_text(tvbuff_t *tvb, const gint offset, const gint size)
1749 {
1750         const guint8 *ptr;
1751         gint          len;
1752
1753         len = (size > 0) ? size : 0;
1754
1755         ptr = ensure_contiguous(tvb, offset, size);
1756         return format_text(ptr, len);
1757 }
1758
1759 /*
1760  * Format the data in the tvb from offset for length ...
1761  */
1762 gchar *
1763 tvb_format_text_wsp(tvbuff_t *tvb, const gint offset, const gint size)
1764 {
1765         const guint8 *ptr;
1766         gint          len;
1767
1768         len = (size > 0) ? size : 0;
1769
1770         ptr = ensure_contiguous(tvb, offset, size);
1771         return format_text_wsp(ptr, len);
1772 }
1773
1774 /*
1775  * Like "tvb_format_text()", but for null-padded strings; don't show
1776  * the null padding characters as "\000".
1777  */
1778 gchar *
1779 tvb_format_stringzpad(tvbuff_t *tvb, const gint offset, const gint size)
1780 {
1781         const guint8 *ptr, *p;
1782         gint          len;
1783         gint          stringlen;
1784
1785         len = (size > 0) ? size : 0;
1786
1787         ptr = ensure_contiguous(tvb, offset, size);
1788         for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
1789                 ;
1790         return format_text(ptr, stringlen);
1791 }
1792
1793 /*
1794  * Like "tvb_format_text_wsp()", but for null-padded strings; don't show
1795  * the null padding characters as "\000".
1796  */
1797 gchar *
1798 tvb_format_stringzpad_wsp(tvbuff_t *tvb, const gint offset, const gint size)
1799 {
1800         const guint8 *ptr, *p;
1801         gint          len;
1802         gint          stringlen;
1803
1804         len = (size > 0) ? size : 0;
1805
1806         ptr = ensure_contiguous(tvb, offset, size);
1807         for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
1808                 ;
1809         return format_text_wsp(ptr, stringlen);
1810 }
1811
1812 /* Unicode REPLACEMENT CHARACTER */
1813 #define UNREPL 0x00FFFD
1814
1815 /*
1816  * All string functions below take a scope as an argument.
1817  *
1818  *
1819  * If scope is NULL, memory is allocated with g_malloc() and user must
1820  * explicitly free it with g_free().
1821  * If scope is not NULL, memory is allocated with the corresponding pool
1822  * lifetime.
1823  *
1824  * All functions throw an exception if the tvbuff ends before the string
1825  * does.
1826  */
1827
1828 /*
1829  * Given a tvbuff, an offset, and a length, treat the string of bytes
1830  * referred to by them as an ASCII string, with all bytes with the
1831  * high-order bit set being invalid, and return a pointer to a
1832  * UTF-8 string.
1833  *
1834  * Octets with the highest bit set will be converted to the Unicode
1835  * REPLACEMENT CHARACTER.
1836  */
1837 static guint8 *
1838 tvb_get_ascii_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
1839 {
1840         wmem_strbuf_t *str;
1841
1842         tvb_ensure_bytes_exist(tvb, offset, length); /* make sure length = -1 fails */
1843
1844         str = wmem_strbuf_new(scope, "");
1845
1846         while (length > 0) {
1847                 guint8 ch = tvb_get_guint8(tvb, offset);
1848
1849                 if (ch < 0x80)
1850                         wmem_strbuf_append_c(str, ch);
1851                 else
1852                         wmem_strbuf_append_unichar(str, UNREPL);
1853                 offset++;
1854                 length--;
1855         }
1856         wmem_strbuf_append_c(str, '\0');
1857
1858         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str
1859            (like when strbuf is no longer needed) */
1860         return (guint8 *) wmem_strbuf_get_str(str);
1861 }
1862
1863 /*
1864  * Given a tvbuff, an offset, and a length, treat the string of bytes
1865  * referred to by them as a UTF-8 string, and return a pointer to that
1866  * string.
1867  *
1868  * XXX - should map invalid UTF-8 sequences to UNREPL.
1869  */
1870 static guint8 *
1871 tvb_get_utf_8_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint length)
1872 {
1873         guint8       *strbuf;
1874
1875         tvb_ensure_bytes_exist(tvb, offset, length); /* make sure length = -1 fails */
1876         strbuf = (guint8 *)wmem_alloc(scope, length + 1);
1877         tvb_memcpy(tvb, strbuf, offset, length);
1878         strbuf[length] = '\0';
1879         return strbuf;
1880 }
1881
1882 /*
1883  * Given a tvbuff, an offset, and a length, treat the string of bytes
1884  * referred to by them as an ISO 8859/1 string, with all bytes with the
1885  * high-order bit set being invalid, and return a pointer to a UTF-8
1886  * string.
1887  */
1888 static guint8 *
1889 tvb_get_string_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
1890 {
1891         wmem_strbuf_t *str;
1892
1893         str = wmem_strbuf_new(scope, "");
1894
1895         while (length > 0) {
1896                 guint8 ch = tvb_get_guint8(tvb, offset);
1897
1898                 if (ch < 0x80)
1899                         wmem_strbuf_append_c(str, ch);
1900                 else {
1901                         /*
1902                          * Note: we assume here that the code points
1903                          * 0x80-0x9F are used for C1 control characters,
1904                          * and thus have the same value as the corresponding
1905                          * Unicode code points.
1906                          */
1907                         wmem_strbuf_append_unichar(str, ch);
1908                 }
1909                 offset++;
1910                 length--;
1911         }
1912
1913         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
1914         return (guint8 *) wmem_strbuf_get_str(str);
1915 }
1916
1917 /*
1918  * Given a tvbuff, an offset, and a length, and a translation table,
1919  * treat the string of bytes referred to by them as a string encoded
1920  * using one octet per character, with octets with the high-order bit
1921  * clear being ASCII and octets with the high-order bit set being
1922  * mapped by the translation table to 2-byte Unicode Basic Multilingual
1923  * Plane characters (including REPLACEMENT CHARACTER), and return a
1924  * pointer to a UTF-8 string.
1925  */
1926 static guint8 *
1927 tvb_get_string_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80])
1928 {
1929         wmem_strbuf_t *str;
1930
1931         str = wmem_strbuf_new(scope, "");
1932
1933         while (length > 0) {
1934                 guint8 ch = tvb_get_guint8(tvb, offset);
1935
1936                 if (ch < 0x80)
1937                         wmem_strbuf_append_c(str, ch);
1938                 else
1939                         wmem_strbuf_append_unichar(str, table[ch-0x80]);
1940                 offset++;
1941                 length--;
1942         }
1943
1944         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
1945         return (guint8 *) wmem_strbuf_get_str(str);
1946 }
1947
1948 /*
1949  * Given a tvbuff, and offset, and a length, treat the string of bytes
1950  * referred to by them as a UCS-2 encoded string containing characters
1951  * from the Basic Multilingual Plane (plane 0) of Unicode, return a
1952  * pointer to a UTF-8 string.
1953  *
1954  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
1955  *
1956  * Specify length in bytes.
1957  *
1958  * XXX - should map lead and trail surrogate values to REPLACEMENT
1959  * CHARACTERs (0xFFFD)?
1960  * XXX - if there are an odd number of bytes, should put a
1961  * REPLACEMENT CHARACTER at the end.
1962  */
1963 static wmem_strbuf_t *
1964 tvb_extract_ucs_2_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
1965 {
1966         gunichar2      uchar;
1967         gint           i;       /* Byte counter for tvbuff */
1968         wmem_strbuf_t *strbuf;
1969
1970         strbuf = wmem_strbuf_new(scope, NULL);
1971
1972         for(i = 0; i + 1 < length; i += 2) {
1973                 if (encoding == ENC_BIG_ENDIAN)
1974                         uchar = tvb_get_ntohs(tvb, offset + i);
1975                 else
1976                         uchar = tvb_get_letohs(tvb, offset + i);
1977
1978                 wmem_strbuf_append_unichar(strbuf, uchar);
1979         }
1980
1981         /*
1982          * XXX - if i < length, this means we were handed an odd
1983          * number of bytes, so we're not a valid UCS-2 string.
1984          */
1985         return strbuf;
1986 }
1987
1988 static gchar *
1989 tvb_get_ucs_2_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
1990 {
1991         wmem_strbuf_t *strbuf;
1992
1993         tvb_ensure_bytes_exist(tvb, offset, length);
1994         strbuf = tvb_extract_ucs_2_string(scope, tvb, offset, length, encoding);
1995         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
1996         return (gchar*)wmem_strbuf_get_str(strbuf);
1997 }
1998
1999 /*
2000  * Given a tvbuff, and offset, and a length, treat the string of bytes
2001  * referred to by them as a UTF-16 encoded string, return a pointer to
2002  * a UTF-8 string.
2003  *
2004  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
2005  *
2006  * Specify length in bytes.
2007  *
2008  * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD).
2009  * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
2010  * XXX - if there are an odd number of bytes, should put a
2011  * REPLACEMENT CHARACTER at the end.
2012  */
2013 static wmem_strbuf_t *
2014 tvb_extract_utf_16_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint size, const guint encoding)
2015 {
2016         wmem_strbuf_t *strbuf;
2017         gunichar2      uchar2, lead_surrogate;
2018         gunichar       uchar;
2019         gint           i;       /* Byte counter for tvbuff */
2020
2021         strbuf = wmem_strbuf_new(scope, NULL);
2022
2023         for(i = 0; i + 1 < size; i += 2) {
2024                 if (encoding == ENC_BIG_ENDIAN)
2025                         uchar2 = tvb_get_ntohs(tvb, offset + i);
2026                 else
2027                         uchar2 = tvb_get_letohs(tvb, offset + i);
2028
2029                 if (IS_LEAD_SURROGATE(uchar2)) {
2030                         /*
2031                          * Lead surrogate.  Must be followed by
2032                          * a trail surrogate.
2033                          */
2034                         i += 2;
2035                         if (i + 1 >= size) {
2036                                 /*
2037                                  * Oops, string ends with a lead surrogate.
2038                                  * Ignore this for now.
2039                                  * XXX - insert "substitute" character?
2040                                  * Report the error in some other
2041                                  * fashion?
2042                                  */
2043                                 break;
2044                         }
2045                         lead_surrogate = uchar2;
2046                         if (encoding == ENC_BIG_ENDIAN)
2047                                 uchar2 = tvb_get_ntohs(tvb, offset + i);
2048                         else
2049                                 uchar2 = tvb_get_letohs(tvb, offset + i);
2050                         if (IS_TRAIL_SURROGATE(uchar2)) {
2051                                 /* Trail surrogate. */
2052                                 uchar = SURROGATE_VALUE(lead_surrogate, uchar2);
2053                                 wmem_strbuf_append_unichar(strbuf, uchar);
2054                         } else {
2055                                 /*
2056                                  * Not a trail surrogate.
2057                                  * Ignore the entire pair.
2058                                  * XXX - insert "substitute" character?
2059                                  * Report the error in some other
2060                                  * fashion?
2061                                  */
2062                                  ;
2063                         }
2064                 } else {
2065                         if (IS_TRAIL_SURROGATE(uchar2)) {
2066                                 /*
2067                                  * Trail surrogate without a preceding
2068                                  * lead surrogate.  Ignore it.
2069                                  * XXX - insert "substitute" character?
2070                                  * Report the error in some other
2071                                  * fashion?
2072                                  */
2073                                 ;
2074                         } else {
2075                                 /*
2076                                  * Non-surrogate; just append it.
2077                                  */
2078                                 wmem_strbuf_append_unichar(strbuf, uchar2);
2079                         }
2080                 }
2081         }
2082
2083         /*
2084          * XXX - if i < length, this means we were handed an odd
2085          * number of bytes, so we're not a valid UTF-16 string.
2086          */
2087         return strbuf;
2088 }
2089
2090 static gchar *
2091 tvb_get_utf_16_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2092 {
2093         wmem_strbuf_t *strbuf;
2094
2095         tvb_ensure_bytes_exist(tvb, offset, length);
2096         strbuf = tvb_extract_utf_16_string(scope, tvb, offset, length, encoding);
2097         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2098         return (gchar*)wmem_strbuf_get_str(strbuf);
2099 }
2100
2101 /*
2102  * Given a tvbuff, and offset, and a length, treat the string of bytes
2103  * referred to by them as a UCS-4 encoded string, return a pointer to
2104  * a UTF-8 string.
2105  *
2106  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN
2107  *
2108  * Specify length in bytes
2109  *
2110  * XXX - should map lead and trail surrogate values to a "substitute"
2111  * UTF-8 character?
2112  * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
2113  * XXX - if the number of bytes isn't a multiple of 4, should put a
2114  * REPLACEMENT CHARACTER at the end.
2115  */
2116 static wmem_strbuf_t *
2117 tvb_extract_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2118 {
2119         gunichar       uchar;
2120         gint           i;       /* Byte counter for tvbuff */
2121         wmem_strbuf_t *strbuf;
2122
2123         strbuf = wmem_strbuf_new(scope, NULL);
2124
2125         for(i = 0; i + 3 < length; i += 4) {
2126                 if (encoding == ENC_BIG_ENDIAN)
2127                         uchar = tvb_get_ntohl(tvb, offset + i);
2128                 else
2129                         uchar = tvb_get_letohl(tvb, offset + i);
2130
2131                 wmem_strbuf_append_unichar(strbuf, uchar);
2132         }
2133
2134         /*
2135          * XXX - if i < length, this means we were handed a number
2136          * of bytes that's not a multiple of 4, so we're not a valid
2137          * UCS-4 string.
2138          */
2139         return strbuf;
2140 }
2141
2142 static gchar *
2143 tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2144 {
2145         wmem_strbuf_t *strbuf;
2146
2147         tvb_ensure_bytes_exist(tvb, offset, length);
2148         strbuf = tvb_extract_ucs_4_string(scope, tvb, offset, length, encoding);
2149         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2150         return (gchar*)wmem_strbuf_get_str(strbuf);
2151 }
2152
2153 /*
2154  * FROM GNOKII
2155  * gsm-encoding.c
2156  * gsm-sms.c
2157  */
2158 #define GN_BYTE_MASK ((1 << bits) - 1)
2159
2160 #define GN_CHAR_ALPHABET_SIZE 128
2161
2162 #define GN_CHAR_ESCAPE 0x1b
2163
2164 static const gunichar gsm_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
2165
2166     /* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet */
2167
2168     '@',   0xa3,  '$',   0xa5,  0xe8,  0xe9,  0xf9,  0xec,
2169     0xf2,  0xc7,  '\n',  0xd8,  0xf8,  '\r',  0xc5,  0xe5,
2170     0x394, '_',   0x3a6, 0x393, 0x39b, 0x3a9, 0x3a0, 0x3a8,
2171     0x3a3, 0x398, 0x39e, 0xa0,  0xc6,  0xe6,  0xdf,  0xc9,
2172     ' ',   '!',   '\"',  '#',   0xa4,  '%',   '&',   '\'',
2173     '(',   ')',   '*',   '+',   ',',   '-',   '.',   '/',
2174     '0',   '1',   '2',   '3',   '4',   '5',   '6',   '7',
2175     '8',   '9',   ':',   ';',   '<',   '=',   '>',   '?',
2176     0xa1,  'A',   'B',   'C',   'D',   'E',   'F',   'G',
2177     'H',   'I',   'J',   'K',   'L',   'M',   'N',   'O',
2178     'P',   'Q',   'R',   'S',   'T',   'U',   'V',   'W',
2179     'X',   'Y',   'Z',   0xc4,  0xd6,  0xd1,  0xdc,  0xa7,
2180     0xbf,  'a',   'b',   'c',   'd',   'e',   'f',   'g',
2181     'h',   'i',   'j',   'k',   'l',   'm',   'n',   'o',
2182     'p',   'q',   'r',   's',   't',   'u',   'v',   'w',
2183     'x',   'y',   'z',   0xe4,  0xf6,  0xf1,  0xfc,  0xe0
2184 };
2185
2186 static gboolean
2187 char_is_escape(unsigned char value)
2188 {
2189     return (value == GN_CHAR_ESCAPE);
2190 }
2191
2192 static gunichar
2193 char_def_alphabet_ext_decode(unsigned char value)
2194 {
2195     switch (value)
2196     {
2197     case 0x0a: return 0x0c; /* form feed */
2198     case 0x14: return '^';
2199     case 0x28: return '{';
2200     case 0x29: return '}';
2201     case 0x2f: return '\\';
2202     case 0x3c: return '[';
2203     case 0x3d: return '~';
2204     case 0x3e: return ']';
2205     case 0x40: return '|';
2206     case 0x65: return 0x20ac; /* euro */
2207     default: return UNREPL; /* invalid character */
2208     }
2209 }
2210
2211 static gunichar
2212 char_def_alphabet_decode(unsigned char value)
2213 {
2214     if (value < GN_CHAR_ALPHABET_SIZE)
2215     {
2216         return gsm_default_alphabet[value];
2217     }
2218     else
2219     {
2220         return UNREPL;
2221     }
2222 }
2223
2224 static gboolean
2225 handle_ts_23_038_char(wmem_strbuf_t *strbuf, guint8 code_point,
2226     gboolean saw_escape)
2227 {
2228         gunichar       uchar;
2229
2230         if (char_is_escape(code_point)) {
2231                 /*
2232                  * XXX - if saw_escape is TRUE here, then this is
2233                  * the case where we escape to "another extension table",
2234                  * but TS 128 038 V11.0 doesn't specify such an extension
2235                  * table.
2236                  */
2237                 saw_escape = TRUE;
2238         } else {
2239                 /*
2240                  * Have we seen an escape?
2241                  */
2242                 if (saw_escape) {
2243                         saw_escape = FALSE;
2244                         uchar = char_def_alphabet_ext_decode(code_point);
2245                 } else {
2246                         uchar = char_def_alphabet_decode(code_point);
2247                 }
2248                 wmem_strbuf_append_unichar(strbuf, uchar);
2249         }
2250         return saw_escape;
2251 }
2252
2253 gchar *
2254 tvb_get_ts_23_038_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
2255         const gint bit_offset, gint no_of_chars)
2256 {
2257         wmem_strbuf_t *strbuf;
2258         gint           char_count;                  /* character counter for tvbuff */
2259         gint           in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
2260         guint8         in_byte, out_byte, rest = 0x00;
2261         gboolean       saw_escape = FALSE;
2262         int            bits;
2263
2264         bits = bit_offset & 0x07;
2265         if (!bits) {
2266                 bits = 7;
2267         }
2268
2269         tvb_ensure_bytes_exist(tvb, in_offset, ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3);
2270         strbuf = wmem_strbuf_new(scope, NULL);
2271         for(char_count = 0; char_count < no_of_chars;) {
2272                 /* Get the next byte from the string. */
2273                 in_byte = tvb_get_guint8(tvb, in_offset);
2274                 in_offset++;
2275
2276                 /*
2277                  * Combine the bits we've accumulated with bits from
2278                  * that byte to make a 7-bit code point.
2279                  */
2280                 out_byte = ((in_byte & GN_BYTE_MASK) << (7 - bits)) | rest;
2281
2282                 /*
2283                  * Leftover bits used in that code point.
2284                  */
2285                 rest = in_byte >> bits;
2286
2287                 /*
2288                  * If we don't start from 0th bit, we shouldn't go to the
2289                  * next char. Under *out_num we have now 0 and under Rest -
2290                  * _first_ part of the char.
2291                  */
2292                 if (char_count || (bits == 7)) {
2293                         saw_escape = handle_ts_23_038_char(strbuf, out_byte,
2294                             saw_escape);
2295                         char_count++;
2296                 }
2297
2298                 /*
2299                  * After reading 7 octets we have read 7 full characters
2300                  * but we have 7 bits as well. This is the next character.
2301                  */
2302                 if ((bits == 1) && (char_count < no_of_chars)) {
2303                         saw_escape = handle_ts_23_038_char(strbuf, rest,
2304                             saw_escape);
2305                         char_count++;
2306                         bits = 7;
2307                         rest = 0x00;
2308                 } else
2309                         bits--;
2310         }
2311
2312         if (saw_escape) {
2313                 /*
2314                  * Escape not followed by anything.
2315                  *
2316                  * XXX - for now, show the escape as a REPLACEMENT
2317                  * CHARACTER.
2318                  */
2319                 wmem_strbuf_append_unichar(strbuf, UNREPL);
2320         }
2321
2322         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2323         return (gchar*)wmem_strbuf_get_str(strbuf);
2324 }
2325
2326 /*
2327  * Given a tvbuff, an offset, a length, and an encoding, allocate a
2328  * buffer big enough to hold a non-null-terminated string of that length
2329  * at that offset, plus a trailing '\0', copy into the buffer the
2330  * string as converted from the appropriate encoding to UTF-8, and
2331  * return a pointer to the string.
2332  */
2333 guint8 *
2334 tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
2335                              const gint length, const guint encoding)
2336 {
2337         const guint8 *ptr;
2338         guint8       *strbuf;
2339
2340         switch (encoding & ENC_CHARENCODING_MASK) {
2341
2342         case ENC_ASCII:
2343         default:
2344                 /*
2345                  * For now, we treat bogus values as meaning
2346                  * "ASCII" rather than reporting an error,
2347                  * for the benefit of old dissectors written
2348                  * when the last argument to proto_tree_add_item()
2349                  * was a gboolean for the byte order, not an
2350                  * encoding value, and passed non-zero values
2351                  * other than TRUE to mean "little-endian".
2352                  */
2353                 strbuf = tvb_get_ascii_string(scope, tvb, offset, length);
2354                 break;
2355
2356         case ENC_UTF_8:
2357                 /*
2358                  * XXX - should map lead and trail surrogate value code
2359                  * points to a "substitute" UTF-8 character?
2360                  * XXX - should map code points > 10FFFF to REPLACEMENT
2361                  * CHARACTERs.
2362                  */
2363                 strbuf = tvb_get_utf_8_string(scope, tvb, offset, length);
2364                 break;
2365
2366         case ENC_UTF_16:
2367                 strbuf = tvb_get_utf_16_string(scope, tvb, offset, length,
2368                     encoding & ENC_LITTLE_ENDIAN);
2369                 break;
2370
2371         case ENC_UCS_2:
2372                 strbuf = tvb_get_ucs_2_string(scope, tvb, offset, length,
2373                     encoding & ENC_LITTLE_ENDIAN);
2374                 break;
2375
2376         case ENC_UCS_4:
2377                 strbuf = tvb_get_ucs_4_string(scope, tvb, offset, length,
2378                     encoding & ENC_LITTLE_ENDIAN);
2379                 break;
2380
2381         case ENC_ISO_8859_1:
2382                 /*
2383                  * ISO 8859-1 printable code point values are equal
2384                  * to the equivalent Unicode code point value, so
2385                  * no translation table is needed.
2386                  */
2387                 strbuf = tvb_get_string_8859_1(scope, tvb, offset, length);
2388                 break;
2389
2390         case ENC_ISO_8859_2:
2391                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_2);
2392                 break;
2393
2394         case ENC_ISO_8859_3:
2395                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_3);
2396                 break;
2397
2398         case ENC_ISO_8859_4:
2399                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_4);
2400                 break;
2401
2402         case ENC_ISO_8859_5:
2403                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_5);
2404                 break;
2405
2406         case ENC_ISO_8859_6:
2407                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_6);
2408                 break;
2409
2410         case ENC_ISO_8859_7:
2411                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_7);
2412                 break;
2413
2414         case ENC_ISO_8859_8:
2415                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_8);
2416                 break;
2417
2418         case ENC_ISO_8859_9:
2419                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_9);
2420                 break;
2421
2422         case ENC_ISO_8859_10:
2423                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_10);
2424                 break;
2425
2426         case ENC_ISO_8859_11:
2427                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_11);
2428                 break;
2429
2430         case ENC_ISO_8859_13:
2431                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_13);
2432                 break;
2433
2434         case ENC_ISO_8859_14:
2435                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_14);
2436                 break;
2437
2438         case ENC_ISO_8859_15:
2439                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_15);
2440                 break;
2441
2442         case ENC_ISO_8859_16:
2443                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_16);
2444                 break;
2445
2446         case ENC_WINDOWS_1250:
2447                 strbuf = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250);
2448                 break;
2449
2450         case ENC_3GPP_TS_23_038_7BITS:
2451                 {
2452                         gint bit_offset = offset << 3;
2453                         gint no_of_chars = (length << 3) / 7;
2454                         strbuf = tvb_get_ts_23_038_7bits_string(scope, tvb, bit_offset, no_of_chars);
2455                 }
2456                 break;
2457
2458         case ENC_EBCDIC:
2459                 /*
2460                  * XXX - do the copy and conversion in one pass.
2461                  *
2462                  * XXX - multiple "dialects" of EBCDIC?
2463                  */
2464                 tvb_ensure_bytes_exist(tvb, offset, length); /* make sure length = -1 fails */
2465                 strbuf = (guint8 *)wmem_alloc(scope, length + 1);
2466                 if (length != 0) {
2467                         ptr = ensure_contiguous(tvb, offset, length);
2468                         memcpy(strbuf, ptr, length);
2469                         EBCDIC_to_ASCII(strbuf, length);
2470                 }
2471                 strbuf[length] = '\0';
2472                 break;
2473         }
2474         return strbuf;
2475 }
2476
2477 /*
2478  * Get an ASCII string; this should not be used in new code.
2479  */
2480 guint8 *
2481 tvb_get_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
2482                              const gint length)
2483 {
2484         return tvb_get_ascii_string(scope, tvb, offset, length);
2485 }
2486
2487 /*
2488  * These routines are like the above routines, except that they handle
2489  * null-terminated strings.  They find the length of that string (and
2490  * throw an exception if the tvbuff ends before we find the null), and
2491  * also return through a pointer the length of the string, in bytes,
2492  * including the terminating null (the terminating null being 2 bytes
2493  * for UCS-2 and UTF-16, 4 bytes for UCS-4, and 1 byte for other
2494  * encodings).
2495  */
2496 static guint8 *
2497 tvb_get_ascii_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
2498 {
2499         guint   size, i;
2500         wmem_strbuf_t *str;
2501
2502         str = wmem_strbuf_new(scope, "");
2503
2504         size   = tvb_strsize(tvb, offset);
2505         for (i = 0; i < size; i++) {
2506                 guint8 ch = tvb_get_guint8(tvb, offset);
2507
2508                 if (ch < 0x80)
2509                         wmem_strbuf_append_c(str, ch);
2510                 else
2511                         wmem_strbuf_append_unichar(str, UNREPL);
2512                 offset++;
2513         }
2514         /* No need to append '\0' - we processed the NUL in the loop above. */
2515
2516         if (lengthp)
2517                 *lengthp = size;
2518
2519         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str
2520            (like when strbuf is no longer needed) */
2521         return (guint8 *) wmem_strbuf_get_str(str);
2522 }
2523
2524 static guint8 *
2525 tvb_get_utf_8_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp)
2526 {
2527         guint   size;
2528         guint8 *strptr;
2529
2530         size   = tvb_strsize(tvb, offset);
2531         strptr = (guint8 *)wmem_alloc(scope, size);
2532         tvb_memcpy(tvb, strptr, offset, size);
2533         if (lengthp)
2534                 *lengthp = size;
2535         return strptr;
2536 }
2537
2538 static guint8 *
2539 tvb_get_stringz_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
2540 {
2541         guint size;
2542
2543         /* XXX, convertion between signed/unsigned integer */
2544         *lengthp = size = tvb_strsize(tvb, offset);
2545
2546         return tvb_get_string_8859_1(scope, tvb, offset, size);
2547 }
2548
2549 static guint8 *
2550 tvb_get_stringz_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80])
2551 {
2552         guint size;
2553
2554         /* XXX, convertion between signed/unsigned integer */
2555         *lengthp = size = tvb_strsize(tvb, offset);
2556
2557         return tvb_get_string_unichar2(scope, tvb, offset, size, table);
2558 }
2559
2560 /*
2561  * Given a tvbuff and an offset, with the offset assumed to refer to
2562  * a null-terminated string, find the length of that string (and throw
2563  * an exception if the tvbuff ends before we find the null), ensure that
2564  * the TVB is flat, and return a pointer to the string (in the TVB).
2565  * Also return the length of the string (including the terminating null)
2566  * through a pointer.
2567  *
2568  * As long as we aren't using composite TVBs, this saves the cycles used
2569  * (often unnecessariliy) in allocating a buffer and copying the string into
2570  * it.  (If we do start using composite TVBs, we may want to replace this
2571  * function with the _ephemeral versoin.)
2572  */
2573 const guint8 *
2574 tvb_get_const_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp)
2575 {
2576         guint         size;
2577         const guint8 *strptr;
2578
2579         size   = tvb_strsize(tvb, offset);
2580         strptr = ensure_contiguous(tvb, offset, size);
2581         if (lengthp)
2582                 *lengthp = size;
2583         return strptr;
2584 }
2585
2586 static gchar *
2587 tvb_get_ucs_2_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
2588 {
2589         gint           size;    /* Number of bytes in string */
2590         wmem_strbuf_t *strbuf;
2591
2592         size = tvb_unicode_strsize(tvb, offset);
2593
2594         strbuf = tvb_extract_ucs_2_string(scope, tvb, offset, size, encoding);
2595
2596         if (lengthp)
2597                 *lengthp = size;
2598
2599         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2600         return (gchar*)wmem_strbuf_get_str(strbuf);
2601 }
2602
2603 static gchar *
2604 tvb_get_utf_16_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
2605 {
2606         gint           size;
2607         wmem_strbuf_t *strbuf;
2608
2609         size = tvb_unicode_strsize(tvb, offset);
2610
2611         strbuf = tvb_extract_utf_16_string(scope, tvb, offset, size, encoding);
2612
2613         if (lengthp)
2614                 *lengthp = size;
2615
2616         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2617         return (gchar*)wmem_strbuf_get_str(strbuf);
2618 }
2619
2620 static gchar *
2621 tvb_get_ucs_4_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
2622 {
2623         gunichar       uchar;
2624         gint           size;    /* Number of bytes in string */
2625         wmem_strbuf_t *strbuf;
2626
2627         DISSECTOR_ASSERT(tvb && tvb->initialized);
2628
2629         size = 0;
2630         do {
2631                 /* Endianness doesn't matter when looking for null */
2632                 uchar = tvb_get_ntohl(tvb, offset + size);
2633                 size += 4;
2634         } while(uchar != 0);
2635
2636         strbuf = tvb_extract_ucs_4_string(scope, tvb, offset, size, encoding);
2637
2638         if (lengthp)
2639                 *lengthp = size; /* Number of *bytes* processed */
2640
2641         /* XXX, discarding constiness, should we have some function which "take-over" strbuf->str (like when strbuf is no longer needed) */
2642         return (gchar*)wmem_strbuf_get_str(strbuf);
2643 }
2644
2645 guint8 *
2646 tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
2647 {
2648         guint   size;
2649         guint8 *strptr;
2650
2651         switch (encoding & ENC_CHARENCODING_MASK) {
2652
2653         case ENC_ASCII:
2654         default:
2655                 /*
2656                  * For now, we treat bogus values as meaning
2657                  * "ASCII" rather than reporting an error,
2658                  * for the benefit of old dissectors written
2659                  * when the last argument to proto_tree_add_item()
2660                  * was a gboolean for the byte order, not an
2661                  * encoding value, and passed non-zero values
2662                  * other than TRUE to mean "little-endian".
2663                  */
2664                 strptr = tvb_get_ascii_stringz(scope, tvb, offset, lengthp);
2665                 break;
2666
2667         case ENC_UTF_8:
2668                 /*
2669                  * XXX - should map all invalid UTF-8 sequences
2670                  * to a "substitute" UTF-8 character.
2671                  * XXX - should map code points > 10FFFF to REPLACEMENT
2672                  * CHARACTERs.
2673                  */
2674                 strptr = tvb_get_utf_8_stringz(scope, tvb, offset, lengthp);
2675                 break;
2676
2677         case ENC_UTF_16:
2678                 strptr = tvb_get_utf_16_stringz(scope, tvb, offset, lengthp,
2679                     encoding & ENC_LITTLE_ENDIAN);
2680                 break;
2681
2682         case ENC_UCS_2:
2683                 strptr = tvb_get_ucs_2_stringz(scope, tvb, offset, lengthp,
2684                     encoding & ENC_LITTLE_ENDIAN);
2685                 break;
2686
2687         case ENC_UCS_4:
2688                 strptr = tvb_get_ucs_4_stringz(scope, tvb, offset, lengthp,
2689                     encoding & ENC_LITTLE_ENDIAN);
2690                 break;
2691
2692         case ENC_ISO_8859_1:
2693                 /*
2694                  * ISO 8859-1 printable code point values are equal
2695                  * to the equivalent Unicode code point value, so
2696                  * no translation table is needed.
2697                  */
2698                 strptr = tvb_get_stringz_8859_1(scope, tvb, offset, lengthp);
2699                 break;
2700
2701         case ENC_ISO_8859_2:
2702                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_2);
2703                 break;
2704
2705         case ENC_ISO_8859_3:
2706                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_3);
2707                 break;
2708
2709         case ENC_ISO_8859_4:
2710                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_4);
2711                 break;
2712
2713         case ENC_ISO_8859_5:
2714                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_5);
2715                 break;
2716
2717         case ENC_ISO_8859_6:
2718                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_6);
2719                 break;
2720
2721         case ENC_ISO_8859_7:
2722                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_7);
2723                 break;
2724
2725         case ENC_ISO_8859_8:
2726                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_8);
2727                 break;
2728
2729         case ENC_ISO_8859_9:
2730                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_9);
2731                 break;
2732
2733         case ENC_ISO_8859_10:
2734                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_10);
2735                 break;
2736
2737         case ENC_ISO_8859_11:
2738                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_11);
2739                 break;
2740
2741         case ENC_ISO_8859_13:
2742                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_13);
2743                 break;
2744
2745         case ENC_ISO_8859_14:
2746                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_14);
2747                 break;
2748
2749         case ENC_ISO_8859_15:
2750                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_15);
2751                 break;
2752
2753         case ENC_ISO_8859_16:
2754                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_16);
2755                 break;
2756
2757         case ENC_WINDOWS_1250:
2758                 strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250);
2759                 break;
2760
2761         case ENC_3GPP_TS_23_038_7BITS:
2762                 REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
2763                 break;
2764
2765         case ENC_EBCDIC:
2766                 /*
2767                  * XXX - do the copy and conversion in one pass.
2768                  *
2769                  * XXX - multiple "dialects" of EBCDIC?
2770                  */
2771                 size = tvb_strsize(tvb, offset);
2772                 strptr = (guint8 *)wmem_alloc(scope, size);
2773                 tvb_memcpy(tvb, strptr, offset, size);
2774                 EBCDIC_to_ASCII(strptr, size);
2775                 if (lengthp)
2776                         *lengthp = size;
2777                 break;
2778         }
2779
2780         return strptr;
2781 }
2782
2783 /*
2784  * Get an ASCII string; this should not be used in new code.
2785  */
2786 guint8 *
2787 tvb_get_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
2788                              gint *lengthp)
2789 {
2790         return tvb_get_ascii_stringz(scope, tvb, offset, lengthp);
2791 }
2792
2793 /* Looks for a stringz (NUL-terminated string) in tvbuff and copies
2794  * no more than bufsize number of bytes, including terminating NUL, to buffer.
2795  * Returns length of string (not including terminating NUL), or -1 if the string was
2796  * truncated in the buffer due to not having reached the terminating NUL.
2797  * In this way, it acts like g_snprintf().
2798  *
2799  * bufsize MUST be greater than 0.
2800  *
2801  * When processing a packet where the remaining number of bytes is less
2802  * than bufsize, an exception is not thrown if the end of the packet
2803  * is reached before the NUL is found. If no NUL is found before reaching
2804  * the end of the short packet, -1 is still returned, and the string
2805  * is truncated with a NUL, albeit not at buffer[bufsize - 1], but
2806  * at the correct spot, terminating the string.
2807  *
2808  * *bytes_copied will contain the number of bytes actually copied,
2809  * including the terminating-NUL.
2810  */
2811 static gint
2812 _tvb_get_nstringz(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8* buffer, gint *bytes_copied)
2813 {
2814         gint     stringlen;
2815         guint    abs_offset;
2816         gint     limit, len;
2817         gboolean decreased_max = FALSE;
2818
2819         /* Only read to end of tvbuff, w/o throwing exception. */
2820         check_offset_length(tvb, offset, -1, &abs_offset, &len);
2821
2822         /* There must at least be room for the terminating NUL. */
2823         DISSECTOR_ASSERT(bufsize != 0);
2824
2825         /* If there's no room for anything else, just return the NUL. */
2826         if (bufsize == 1) {
2827                 buffer[0] = 0;
2828                 *bytes_copied = 1;
2829                 return 0;
2830         }
2831
2832         /* check_offset_length() won't throw an exception if we're
2833          * looking at the byte immediately after the end of the tvbuff. */
2834         if (len == 0) {
2835                 THROW(ReportedBoundsError);
2836         }
2837
2838         /* This should not happen because check_offset_length() would
2839          * have already thrown an exception if 'offset' were out-of-bounds.
2840          */
2841         DISSECTOR_ASSERT(len != -1);
2842
2843         /*
2844          * If we've been passed a negative number, bufsize will
2845          * be huge.
2846          */
2847         DISSECTOR_ASSERT(bufsize <= G_MAXINT);
2848
2849         if ((guint)len < bufsize) {
2850                 limit = len;
2851                 decreased_max = TRUE;
2852         }
2853         else {
2854                 limit = bufsize;
2855         }
2856
2857         stringlen = tvb_strnlen(tvb, abs_offset, limit - 1);
2858         /* If NUL wasn't found, copy the data and return -1 */
2859         if (stringlen == -1) {
2860                 tvb_memcpy(tvb, buffer, abs_offset, limit);
2861                 if (decreased_max) {
2862                         buffer[limit] = 0;
2863                         /* Add 1 for the extra NUL that we set at buffer[limit],
2864                          * pretending that it was copied as part of the string. */
2865                         *bytes_copied = limit + 1;
2866                 }
2867                 else {
2868                         *bytes_copied = limit;
2869                 }
2870                 return -1;
2871         }
2872
2873         /* Copy the string to buffer */
2874         tvb_memcpy(tvb, buffer, abs_offset, stringlen + 1);
2875         *bytes_copied = stringlen + 1;
2876         return stringlen;
2877 }
2878
2879 /* Looks for a stringz (NUL-terminated string) in tvbuff and copies
2880  * no more than bufsize number of bytes, including terminating NUL, to buffer.
2881  * Returns length of string (not including terminating NUL), or -1 if the string was
2882  * truncated in the buffer due to not having reached the terminating NUL.
2883  * In this way, it acts like g_snprintf().
2884  *
2885  * When processing a packet where the remaining number of bytes is less
2886  * than bufsize, an exception is not thrown if the end of the packet
2887  * is reached before the NUL is found. If no NUL is found before reaching
2888  * the end of the short packet, -1 is still returned, and the string
2889  * is truncated with a NUL, albeit not at buffer[bufsize - 1], but
2890  * at the correct spot, terminating the string.
2891  */
2892 gint
2893 tvb_get_nstringz(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8* buffer)
2894 {
2895         gint bytes_copied;
2896
2897         DISSECTOR_ASSERT(tvb && tvb->initialized);
2898
2899         return _tvb_get_nstringz(tvb, offset, bufsize, buffer, &bytes_copied);
2900 }
2901
2902 /* Like tvb_get_nstringz(), but never returns -1. The string is guaranteed to
2903  * have a terminating NUL. If the string was truncated when copied into buffer,
2904  * a NUL is placed at the end of buffer to terminate it.
2905  */
2906 gint
2907 tvb_get_nstringz0(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8* buffer)
2908 {
2909         gint    len, bytes_copied;
2910
2911         DISSECTOR_ASSERT(tvb && tvb->initialized);
2912
2913         len = _tvb_get_nstringz(tvb, offset, bufsize, buffer, &bytes_copied);
2914
2915         if (len == -1) {
2916                 buffer[bufsize - 1] = 0;
2917                 return bytes_copied - 1;
2918         }
2919         else {
2920                 return len;
2921         }
2922 }
2923
2924 /*
2925  * Given a tvbuff, an offset into the tvbuff, and a length that starts
2926  * at that offset (which may be -1 for "all the way to the end of the
2927  * tvbuff"), find the end of the (putative) line that starts at the
2928  * specified offset in the tvbuff, going no further than the specified
2929  * length.
2930  *
2931  * Return the length of the line (not counting the line terminator at
2932  * the end), or, if we don't find a line terminator:
2933  *
2934  *      if "deseg" is true, return -1;
2935  *
2936  *      if "deseg" is false, return the amount of data remaining in
2937  *      the buffer.
2938  *
2939  * Set "*next_offset" to the offset of the character past the line
2940  * terminator, or past the end of the buffer if we don't find a line
2941  * terminator.  (It's not set if we return -1.)
2942  */
2943 gint
2944 tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset, const gboolean desegment)
2945 {
2946         gint   eob_offset;
2947         gint   eol_offset;
2948         int    linelen;
2949         guchar found_needle = 0;
2950
2951         if (len == -1)
2952                 len = tvb_length_remaining(tvb, offset);
2953         /*
2954          * XXX - what if "len" is still -1, meaning "offset is past the
2955          * end of the tvbuff"?
2956          */
2957         eob_offset = offset + len;
2958
2959         /*
2960          * Look either for a CR or an LF.
2961          */
2962         eol_offset = tvb_pbrk_guint8(tvb, offset, len, "\r\n", &found_needle);
2963         if (eol_offset == -1) {
2964                 /*
2965                  * No CR or LF - line is presumably continued in next packet.
2966                  */
2967                 if (desegment) {
2968                         /*
2969                          * Tell our caller we saw no EOL, so they can
2970                          * try to desegment and get the entire line
2971                          * into one tvbuff.
2972                          */
2973                         return -1;
2974                 } else {
2975                         /*
2976                          * Pretend the line runs to the end of the tvbuff.
2977                          */
2978                         linelen = eob_offset - offset;
2979                         if (next_offset)
2980                                 *next_offset = eob_offset;
2981                 }
2982         } else {
2983                 /*
2984                  * Find the number of bytes between the starting offset
2985                  * and the CR or LF.
2986                  */
2987                 linelen = eol_offset - offset;
2988
2989                 /*
2990                  * Is it a CR?
2991                  */
2992                 if (found_needle == '\r') {
2993                         /*
2994                          * Yes - is it followed by an LF?
2995                          */
2996                         if (eol_offset + 1 >= eob_offset) {
2997                                 /*
2998                                  * Dunno - the next byte isn't in this
2999                                  * tvbuff.
3000                                  */
3001                                 if (desegment) {
3002                                         /*
3003                                          * We'll return -1, although that
3004                                          * runs the risk that if the line
3005                                          * really *is* terminated with a CR,
3006                                          * we won't properly dissect this
3007                                          * tvbuff.
3008                                          *
3009                                          * It's probably more likely that
3010                                          * the line ends with CR-LF than
3011                                          * that it ends with CR by itself.
3012                                          */
3013                                         return -1;
3014                                 }
3015                         } else {
3016                                 /*
3017                                  * Well, we can at least look at the next
3018                                  * byte.
3019                                  */
3020                                 if (tvb_get_guint8(tvb, eol_offset + 1) == '\n') {
3021                                         /*
3022                                          * It's an LF; skip over the CR.
3023                                          */
3024                                         eol_offset++;
3025                                 }
3026                         }
3027                 }
3028
3029                 /*
3030                  * Return the offset of the character after the last
3031                  * character in the line, skipping over the last character
3032                  * in the line terminator.
3033                  */
3034                 if (next_offset)
3035                         *next_offset = eol_offset + 1;
3036         }
3037         return linelen;
3038 }
3039
3040 /*
3041  * Given a tvbuff, an offset into the tvbuff, and a length that starts
3042  * at that offset (which may be -1 for "all the way to the end of the
3043  * tvbuff"), find the end of the (putative) line that starts at the
3044  * specified offset in the tvbuff, going no further than the specified
3045  * length.
3046  *
3047  * However, treat quoted strings inside the buffer specially - don't
3048  * treat newlines in quoted strings as line terminators.
3049  *
3050  * Return the length of the line (not counting the line terminator at
3051  * the end), or the amount of data remaining in the buffer if we don't
3052  * find a line terminator.
3053  *
3054  * Set "*next_offset" to the offset of the character past the line
3055  * terminator, or past the end of the buffer if we don't find a line
3056  * terminator.
3057  */
3058 gint
3059 tvb_find_line_end_unquoted(tvbuff_t *tvb, const gint offset, int len, gint *next_offset)
3060 {
3061         gint     cur_offset, char_offset;
3062         gboolean is_quoted;
3063         guchar   c = 0;
3064         gint     eob_offset;
3065         int      linelen;
3066
3067         if (len == -1)
3068                 len = tvb_length_remaining(tvb, offset);
3069         /*
3070          * XXX - what if "len" is still -1, meaning "offset is past the
3071          * end of the tvbuff"?
3072          */
3073         eob_offset = offset + len;
3074
3075         cur_offset = offset;
3076         is_quoted  = FALSE;
3077         for (;;) {
3078                         /*
3079                  * Is this part of the string quoted?
3080                  */
3081                 if (is_quoted) {
3082                         /*
3083                          * Yes - look only for the terminating quote.
3084                          */
3085                         char_offset = tvb_find_guint8(tvb, cur_offset, len,
3086                                 '"');
3087                 } else {
3088                         /*
3089                          * Look either for a CR, an LF, or a '"'.
3090                          */
3091                         char_offset = tvb_pbrk_guint8(tvb, cur_offset, len, "\r\n\"", &c);
3092                 }
3093                 if (char_offset == -1) {
3094                         /*
3095                          * Not found - line is presumably continued in
3096                          * next packet.
3097                          * We pretend the line runs to the end of the tvbuff.
3098                          */
3099                         linelen = eob_offset - offset;
3100                         if (next_offset)
3101                                 *next_offset = eob_offset;
3102                         break;
3103                 }
3104
3105                 if (is_quoted) {
3106                         /*
3107                          * We're processing a quoted string.
3108                          * We only looked for ", so we know it's a ";
3109                          * as we're processing a quoted string, it's a
3110                          * closing quote.
3111                          */
3112                         is_quoted = FALSE;
3113                 } else {
3114                         /*
3115                          * OK, what is it?
3116                          */
3117                         if (c == '"') {
3118                                 /*
3119                                  * Un-quoted "; it begins a quoted
3120                                  * string.
3121                                  */
3122                                 is_quoted = TRUE;
3123                         } else {
3124                                 /*
3125                                  * It's a CR or LF; we've found a line
3126                                  * terminator.
3127                                  *
3128                                  * Find the number of bytes between the
3129                                  * starting offset and the CR or LF.
3130                                  */
3131                                 linelen = char_offset - offset;
3132
3133                                 /*
3134                                  * Is it a CR?
3135                                  */
3136                                 if (c == '\r') {
3137                                         /*
3138                                          * Yes; is it followed by an LF?
3139                                          */
3140                                         if (char_offset + 1 < eob_offset &&
3141                                                 tvb_get_guint8(tvb, char_offset + 1)
3142                                                   == '\n') {
3143                                                 /*
3144                                                  * Yes; skip over the CR.
3145                                                  */
3146                                                 char_offset++;
3147                                         }
3148                                 }
3149
3150                                 /*
3151                                  * Return the offset of the character after
3152                                  * the last character in the line, skipping
3153                                  * over the last character in the line
3154                                  * terminator, and quit.
3155                                  */
3156                                 if (next_offset)
3157                                         *next_offset = char_offset + 1;
3158                                 break;
3159                         }
3160                 }
3161
3162                 /*
3163                  * Step past the character we found.
3164                  */
3165                 cur_offset = char_offset + 1;
3166                 if (cur_offset >= eob_offset) {
3167                         /*
3168                          * The character we found was the last character
3169                          * in the tvbuff - line is presumably continued in
3170                          * next packet.
3171                          * We pretend the line runs to the end of the tvbuff.
3172                          */
3173                         linelen = eob_offset - offset;
3174                         if (next_offset)
3175                                 *next_offset = eob_offset;
3176                         break;
3177                 }
3178         }
3179         return linelen;
3180 }
3181
3182 /*
3183  * Copied from the mgcp dissector. (This function should be moved to /epan )
3184  * tvb_skip_wsp - Returns the position in tvb of the first non-whitespace
3185  *                                character following offset or offset + maxlength -1 whichever
3186  *                                is smaller.
3187  *
3188  * Parameters:
3189  * tvb - The tvbuff in which we are skipping whitespace.
3190  * offset - The offset in tvb from which we begin trying to skip whitespace.
3191  * maxlength - The maximum distance from offset that we may try to skip
3192  * whitespace.
3193  *
3194  * Returns: The position in tvb of the first non-whitespace
3195  *                      character following offset or offset + maxlength -1 whichever
3196  *                      is smaller.
3197  */
3198 gint
3199 tvb_skip_wsp(tvbuff_t *tvb, const gint offset, const gint maxlength)
3200 {
3201         gint   counter = offset;
3202         gint   end, tvb_len;
3203         guint8 tempchar;
3204
3205         /* Get the length remaining */
3206         tvb_len = tvb_length(tvb);
3207         end     = offset + maxlength;
3208         if (end >= tvb_len)
3209         {
3210                 end = tvb_len;
3211         }
3212
3213         /* Skip past spaces, tabs, CRs and LFs until run out or meet something else */
3214         for (counter = offset;
3215                  counter < end &&
3216                   ((tempchar = tvb_get_guint8(tvb,counter)) == ' ' ||
3217                   tempchar == '\t' || tempchar == '\r' || tempchar == '\n');
3218                  counter++);
3219
3220         return (counter);
3221 }
3222
3223 gint
3224 tvb_skip_wsp_return(tvbuff_t *tvb, const gint offset) {
3225         gint   counter = offset;
3226         guint8 tempchar;
3227
3228         for(counter = offset; counter > 0 &&
3229                 ((tempchar = tvb_get_guint8(tvb,counter)) == ' ' ||
3230                 tempchar == '\t' || tempchar == '\n' || tempchar == '\r'); counter--);
3231         counter++;
3232         return (counter);
3233 }
3234
3235 int
3236 tvb_skip_guint8(tvbuff_t *tvb, int offset, const int maxlength, const guint8 ch)
3237 {
3238         int end, tvb_len;
3239
3240         /* Get the length remaining */
3241         tvb_len = tvb_length(tvb);
3242         end     = offset + maxlength;
3243         if (end >= tvb_len)
3244                 end = tvb_len;
3245
3246         while (offset < end) {
3247                 guint8 tempch = tvb_get_guint8(tvb, offset);
3248
3249                 if (tempch != ch)
3250                         break;
3251                 offset++;
3252         }
3253
3254         return offset;
3255 }
3256
3257 /*
3258  * Format a bunch of data from a tvbuff as bytes, returning a pointer
3259  * to the string with the formatted data, with "punct" as a byte
3260  * separator.
3261  */
3262 gchar *
3263 tvb_bytes_to_ep_str_punct(tvbuff_t *tvb, const gint offset, const gint len, const gchar punct)
3264 {
3265         return bytes_to_ep_str_punct(ensure_contiguous(tvb, offset, len), len, punct);
3266 }
3267
3268
3269 /*
3270  * Given a tvbuff, an offset into the tvbuff, and a length that starts
3271  * at that offset (which may be -1 for "all the way to the end of the
3272  * tvbuff"), fetch BCD encoded digits from a tvbuff starting from either
3273  * the low or high half byte, formating the digits according to an input digit set,
3274  * if NUll a default digit set of 0-9 returning "?" for overdecadic digits will be used.
3275  * A pointer to the packet scope allocated string will be returned.
3276  * Note a tvbuff content of 0xf is considered a 'filler' and will end the conversion.
3277  */
3278 static dgt_set_t Dgt1_9_bcd = {
3279         {
3280                 /*  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e  f*/
3281                 '0','1','2','3','4','5','6','7','8','9','?','?','?','?','?','?'
3282         }
3283 };
3284 const gchar *
3285 tvb_bcd_dig_to_wmem_packet_str(tvbuff_t *tvb, const gint offset, const gint len, dgt_set_t *dgt, gboolean skip_first)
3286 {
3287         int     length;
3288         guint8  octet;
3289         int     i        = 0;
3290         char   *digit_str;
3291         gint    t_offset = offset;
3292
3293         if (!dgt)
3294                 dgt = &Dgt1_9_bcd;
3295
3296         if (len == -1) {
3297                 length = tvb_length(tvb);
3298                 if (length < offset) {
3299                         return "";
3300                 }
3301         } else {
3302                 length = offset + len;
3303         }
3304         digit_str = (char *)wmem_alloc(wmem_packet_scope(), (length - offset)*2+1);
3305
3306         while (t_offset < length) {
3307
3308                 octet = tvb_get_guint8(tvb,t_offset);
3309                 if (!skip_first) {
3310                         digit_str[i] = dgt->out[octet & 0x0f];
3311                         i++;
3312                 }
3313                 skip_first = FALSE;
3314
3315                 /*
3316                  * unpack second value in byte
3317                  */
3318                 octet = octet >> 4;
3319
3320                 if (octet == 0x0f)      /* odd number bytes - hit filler */
3321                         break;
3322
3323                 digit_str[i] = dgt->out[octet & 0x0f];
3324                 i++;
3325                 t_offset++;
3326
3327         }
3328         digit_str[i]= '\0';
3329         return digit_str;
3330
3331 }
3332
3333 /*
3334  * Format a bunch of data from a tvbuff as bytes, returning a pointer
3335  * to the string with the formatted data.
3336  */
3337 gchar *
3338 tvb_bytes_to_ep_str(tvbuff_t *tvb, const gint offset, const gint len)
3339 {
3340         return bytes_to_ep_str(ensure_contiguous(tvb, offset, len), len);
3341 }
3342
3343 /* Find a needle tvbuff within a haystack tvbuff. */
3344 gint
3345 tvb_find_tvb(tvbuff_t *haystack_tvb, tvbuff_t *needle_tvb, const gint haystack_offset)
3346 {
3347         guint         haystack_abs_offset, haystack_abs_length;
3348         const guint8 *haystack_data;
3349         const guint8 *needle_data;
3350         const guint   needle_len = needle_tvb->length;
3351         const guint8 *location;
3352
3353         DISSECTOR_ASSERT(haystack_tvb && haystack_tvb->initialized);
3354
3355         if (haystack_tvb->length < 1 || needle_tvb->length < 1) {
3356                 return -1;
3357         }
3358
3359         /* Get pointers to the tvbuffs' data. */
3360         haystack_data = ensure_contiguous(haystack_tvb, 0, -1);
3361         needle_data   = ensure_contiguous(needle_tvb, 0, -1);
3362
3363         check_offset_length(haystack_tvb, haystack_offset, -1,
3364                         &haystack_abs_offset, &haystack_abs_length);
3365
3366         location = epan_memmem(haystack_data + haystack_abs_offset, haystack_abs_length,
3367                         needle_data, needle_len);
3368
3369         if (location) {
3370                 return (gint) (location - haystack_data);
3371         }
3372
3373         return -1;
3374 }
3375
3376 gint
3377 tvb_raw_offset(tvbuff_t *tvb)
3378 {
3379         return ((tvb->raw_offset==-1) ? (tvb->raw_offset = tvb_offset_from_real_beginning(tvb)) : tvb->raw_offset);
3380 }
3381
3382 void
3383 tvb_set_fragment(tvbuff_t *tvb)
3384 {
3385         tvb->flags |= TVBUFF_FRAGMENT;
3386 }
3387
3388 struct tvbuff *
3389 tvb_get_ds_tvb(tvbuff_t *tvb)
3390 {
3391         return(tvb->ds_tvb);
3392 }
3393
3394 /*
3395  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
3396  *
3397  * Local variables:
3398  * c-basic-offset: 8
3399  * tab-width: 8
3400  * indent-tabs-mode: t
3401  * End:
3402  *
3403  * vi: set shiftwidth=8 tabstop=8 noexpandtab:
3404  * :indentSize=8:tabSize=8:noTabs=false:
3405  */