script/attr_count_read

   1 #!/usr/bin/env python3
   2 #
   3 # Copyright (C) Catalyst IT Ltd. 2019
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 #
  18
  19 from __future__ import print_function
  20 import sys
  21 import argparse
  22 import struct
  23 import os
  24 from collections import OrderedDict, Counter
  25 from pprint import pprint
  26
  27 sys.path.insert(0, "bin/python")
  28 import tdb
  29
  30
  31 def unpack_uint(filename, casefold=True):
  32     db = tdb.Tdb(filename)
  33     d = {}
  34     for k in db:
  35         v = struct.unpack("I", db[k])[0]
  36         k2 = k.decode('utf-8')
  37         if casefold:
  38             k2 = k2.lower()
  39         if k2 in d: # because casefold
  40             d[k2] += v
  41         else:
  42             d[k2] = v
  43     return d
  44
  45
  46 def unpack_ssize_t_pair(filename, casefold):
  47     db = tdb.Tdb(filename)
  48     pairs = []
  49     for k in db:
  50         key = struct.unpack("nn", k)
  51         v = struct.unpack("I", db[k])[0]
  52         pairs.append((v, key))
  53
  54     pairs.sort(reverse=True)
  55     #print(pairs)
  56     return [(k, v) for (v, k) in pairs]
  57
  58
  59 DATABASES = [
  60     ('requested', "debug/attr_counts_requested.tdb", unpack_uint,
  61      "The attribute was specifically requested."),
  62     ('duplicates', "debug/attr_counts_duplicates.tdb", unpack_uint,
  63      "Requested more than once in the same request."),
  64     ('empty request', "debug/attr_counts_empty_req.tdb", unpack_uint,
  65      "No attributes were requested, but these were returned"),
  66     ('null request', "debug/attr_counts_null_req.tdb", unpack_uint,
  67      "The attribute list was NULL and these were returned."),
  68     ('found', "debug/attr_counts_found.tdb", unpack_uint,
  69      "The attribute was specifically requested and it was found."),
  70     ('not found', "debug/attr_counts_not_found.tdb", unpack_uint,
  71      "The attribute was specifically requested but was not found."),
  72     ('unwanted', "debug/attr_counts_unwanted.tdb", unpack_uint,
  73      "The attribute was not requested and it was found."),
  74     ('star match', "debug/attr_counts_star_match.tdb", unpack_uint,
  75      'The attribute was not specifically requested but "*" was.'),
  76     ('req vs found', "debug/attr_counts_req_vs_found.tdb", unpack_ssize_t_pair,
  77      "How many attributes were requested versus how many were returned."),
  78 ]
  79
  80
  81 def plot_pair_data(name, data, doc, lim=90):
  82     # Note we keep the matplotlib import internal to this function for
  83     # two reasons:
  84     # 1. Some people won't have matplotlib, but might want to run the
  85     #    script.
  86     # 2. The import takes hundreds of milliseconds, which is a
  87     #    nuisance if you don't wat graphs.
  88     #
  89     # This plot could be improved!
  90     import matplotlib.pylab as plt
  91     fig, ax = plt.subplots()
  92     if lim:
  93         data2 = []
  94         for p, c in data:
  95             if p[0] > lim or p[1] > lim:
  96                 print("not plotting %s: %s" % (p, c))
  97                 continue
  98             data2.append((p, c))
  99         skipped = len(data) - len(data2)
 100         if skipped:
 101             name += " (excluding %d out of range values)" % skipped
 102             data = data2
 103     xy, counts = zip(*data)
 104     x, y = zip(*xy)
 105     bins_x = max(x) + 4
 106     bins_y = max(y)
 107     ax.set_title(name)
 108     ax.scatter(x, y, c=counts)
 109     plt.show()
 110
 111
 112 def print_pair_data(name, data, doc):
 113     print(name)
 114     print(doc)
 115     t = "%14s | %14s | %14s"
 116     print(t % ("requested", "returned", "count"))
 117     print(t % (('-' * 14,) * 3))
 118
 119     for xy, count in data:
 120         x, y = xy
 121         if x == -2:
 122             x = 'NULL'
 123         elif x == -4:
 124             x = '*'
 125         print(t % (x, y, count))
 126
 127
 128 def print_counts(count_data):
 129     all_attrs = Counter()
 130     for c in count_data:
 131         all_attrs.update(c[1])
 132
 133     print("found %d attrs" % len(all_attrs))
 134     longest = max(len(x) for x in all_attrs)
 135
 136     #pprint(all_attrs)
 137     rows = OrderedDict()
 138     for a, _ in all_attrs.most_common():
 139         rows[a] = [a]
 140
 141     for col_name, counts, doc in count_data:
 142         for attr, row in rows.items():
 143             d = counts.get(attr, '')
 144             row.append(d)
 145
 146         print("%15s: %s" % (col_name, doc))
 147     print()
 148
 149     t = "%{}s".format(longest)
 150     for c in count_data:
 151         t += " | %{}s".format(max(len(c[0]), 7))
 152
 153     h = t % (("attribute",) + tuple(c[0] for c in count_data))
 154     print(h)
 155     print("-" * len(h))
 156
 157     for attr, row in rows.items():
 158         print(t % tuple(row))
 159         pass
 160
 161
 162 def main():
 163     parser = argparse.ArgumentParser()
 164     parser.add_argument('LDB_PRIVATE_DIR',
 165                         help="read attr counts in this directory")
 166     parser.add_argument('--plot', action="store_true",
 167                         help='attempt to draw graphs')
 168     parser.add_argument('--no-casefold', action="store_false",
 169                         default=True, dest="casefold",
 170                         help='See all the encountered case varients')
 171     args = parser.parse_args()
 172
 173     if not os.path.isdir(args.LDB_PRIVATE_DIR):
 174         parser.print_usage()
 175         sys.exit(1)
 176
 177     count_data = []
 178     pair_data = []
 179     for k, filename, unpacker, doc in DATABASES:
 180         filename = os.path.join(args.LDB_PRIVATE_DIR, filename)
 181         try:
 182             d = unpacker(filename, casefold=args.casefold)
 183         except (RuntimeError, IOError) as e:
 184             print("could not parse %s: %s" % (filename, e))
 185             continue
 186         if unpacker is unpack_ssize_t_pair:
 187             pair_data.append((k, d, doc))
 188         else:
 189             count_data.append((k, d, doc))
 190
 191     for k, v, doc in pair_data:
 192         if args.plot:
 193             plot_pair_data(k, v, doc)
 194         print_pair_data(k, v, doc)
 195
 196     print()
 197     print_counts(count_data)
 198
 199 main()