ldb:attrib_handlers: reduce non-transitive behaviour in ldb_comparison_fold
authorDouglas Bagnall <douglas.bagnall@catalyst.net.nz>
Fri, 26 Apr 2024 03:58:44 +0000 (15:58 +1200)
committerAndrew Bartlett <abartlet@samba.org>
Tue, 7 May 2024 23:25:35 +0000 (23:25 +0000)
If two strings are invalid UTF-8, the string is first compared with
memcmp(), which compares as unsigned char.

If the strings are of different lengths and one is a substring of the
other, the memcmp() returns 0 and a second comparison is made which
assumes the next character in the shorter string is '\0' -- but this
comparison was done using SIGNED chars (on most systems). That leads
to non-transitive comparisons.

Consider the strings {"a\xff", "a", "ab\xff"} under that system.

   "a\xff"  < "a",      because (char)0xff == -1.

   "ab\xff" > "a",     because  'b' == 98.

   "ab\xff" < "a\xff", because memcmp("ab\xff", "a\xff", 2) avoiding the
                       signed char tiebreaker.

(Before c49c48afe09a1a78989628bbffd49dd3efc154dd, the final character
might br arbitrarily cast into another character -- in latin-1, for
example, the 0xff here would have been seen as 'ÿ', which would be
uppercased to 'Ÿ', which is U+0178, which would be truncated to
'\x78', a positive char.

On the other hand e.g. 0xfe, 'þ', would have mapped to 0xde, 'Þ',
remaining negative).

BUG: https://bugzilla.samba.org/show_bug.cgi?id=15625

Signed-off-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
lib/ldb/common/attrib_handlers.c

index 1e3c550e40403564f2d0806e617872f14cd0d1d4..2344dc0c8afc8f890e126bafcd8b977b61229c40 100644 (file)
@@ -394,17 +394,27 @@ utf8str:
        b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
 
        if (!b1 || !b2) {
-               /* One of the strings was not UTF8, so we have no
-                * options but to do a binary compare */
+               /*
+                * One of the strings was not UTF8, so we have no
+                * options but to do a binary compare.
+                */
                talloc_free(b1);
                talloc_free(b2);
                ret = memcmp(s1, s2, MIN(n1, n2));
                if (ret == 0) {
-                       if (n1 == n2) return 0;
+                       if (n1 == n2) {
+                               return 0;
+                       }
                        if (n1 > n2) {
-                               return (int)ldb_ascii_toupper(s1[n2]);
+                               if (s1[n2] == '\0') {
+                                       return 0;
+                               }
+                               return 1;
                        } else {
-                               return -(int)ldb_ascii_toupper(s2[n1]);
+                               if (s2[n1] == '\0') {
+                                       return 0;
+                               }
+                               return -1;
                        }
                }
                return ret;