1 /*--------------------------------------------------------------------*/
2 /*--- Cachegrind: cache configuration. cg-arch.c ---*/
3 /*--------------------------------------------------------------------*/
6 This file is part of Cachegrind, a Valgrind tool for cache
9 Copyright (C) 2011-2012 Nicholas Nethercote
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 The GNU General Public License is contained in the file COPYING.
30 #include "pub_tool_basics.h"
31 #include "pub_tool_libcassert.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcprint.h"
34 #include "pub_tool_options.h"
35 #include "pub_tool_machine.h"
39 static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
40 Bool all_caches_clo_defined);
42 // Checks cache config is ok. Returns NULL if ok, or a pointer to an error
44 static const HChar* check_cache(cache_t* cache)
46 // Simulator requires set count to be a power of two.
47 if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
48 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
50 return "Cache set count is not a power of two.\n";
53 // Simulator requires line size to be a power of two.
54 if (-1 == VG_(log2)(cache->line_size)) {
55 return "Cache line size is not a power of two.\n";
58 // Then check line size >= 16 -- any smaller and a single instruction could
59 // straddle three cache lines, which breaks a simulation assertion and is
61 if (cache->line_size < MIN_LINE_SIZE) {
62 return "Cache line size is too small.\n";
65 /* Then check cache size > line size (causes seg faults if not). */
66 if (cache->size <= cache->line_size) {
67 return "Cache size <= line size.\n";
70 /* Then check assoc <= (size / line size) (seg faults otherwise). */
71 if (cache->assoc > (cache->size / cache->line_size)) {
72 return "Cache associativity > (size / line size).\n";
79 static void parse_cache_opt ( cache_t* cache, Char* opt, Char* optval )
83 const HChar* checkRes;
85 // Option argument looks like "65536,2,64". Extract them.
86 i1 = VG_(strtoll10)(optval, &endptr); if (*endptr != ',') goto bad;
87 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
88 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
90 // Check for overflow.
91 cache->size = (Int)i1;
92 cache->assoc = (Int)i2;
93 cache->line_size = (Int)i3;
94 if (cache->size != i1) goto overflow;
95 if (cache->assoc != i2) goto overflow;
96 if (cache->line_size != i3) goto overflow;
98 checkRes = check_cache(cache);
100 VG_(fmsg)("%s", checkRes);
107 VG_(fmsg_bad_option)(opt, "");
110 VG_(fmsg_bad_option)(opt,
111 "One of the cache parameters was too large and overflowed.\n");
115 Bool VG_(str_clo_cache_opt)(Char *arg,
122 if VG_STR_CLO(arg, "--I1", tmp_str) {
123 parse_cache_opt(clo_I1c, arg, tmp_str);
125 } else if VG_STR_CLO(arg, "--D1", tmp_str) {
126 parse_cache_opt(clo_D1c, arg, tmp_str);
128 } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
129 VG_STR_CLO(arg, "--LL", tmp_str)) {
130 parse_cache_opt(clo_LLc, arg, tmp_str);
136 static void umsg_cache_img(const HChar* desc, cache_t* c)
138 VG_(umsg)(" %s: %'d B, %d-way, %d B lines\n", desc,
139 c->size, c->assoc, c->line_size);
142 // Verifies if c is a valid cache.
143 // An invalid value causes an assert, unless clo_redefined is True.
144 static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
146 const HChar* checkRes;
148 checkRes = check_cache(c);
150 VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
152 umsg_cache_img(desc, c);
153 if (!clo_redefined) {
154 VG_(umsg)("As it probably should be supported, please report a bug!\n");
155 VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
162 /* If the LL cache config isn't something the simulation functions
163 can handle, try to adjust it so it is. Caches are characterised
164 by (total size T, line size L, associativity A), and then we
167 number of sets S = T / (L * A)
169 The required constraints are:
171 * L must be a power of 2, but it always is in practice, so
174 * A can be any value >= 1
176 * T can be any value, but ..
178 * S must be a power of 2.
180 That sometimes gives a problem. For example, some Core iX based
181 Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
182 sets. The "fix" in this case is to increase the associativity
183 by 50% to 24, which reduces the number of sets to 8192, making
184 it a power of 2. That's what the following code does (handing
185 the "3/2 rescaling case".) We might need to deal with other
186 ratios later (5/4 ?).
188 The "fix" is "justified" (cough, cough) by alleging that
189 increases of associativity above about 4 have very little effect
190 on the actual miss rate. It would be far more inaccurate to
191 fudge this by changing the size of the simulated cache --
192 changing the associativity is a much better option.
196 maybe_tweak_LLc(cache_t *LLc)
198 if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
199 Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
202 /* nSets is not a power of 2 */
203 && VG_(log2_64)( (ULong)nSets ) == -1
204 /* nSets is 50% above a power of 2 */
205 && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
206 /* associativity can be increased by exactly 50% */
207 && (LLc->assoc % 2) == 0
209 /* # sets is 1.5 * a power of two, but the associativity is
210 even, so we can increase that up by 50% and implicitly
211 scale the # sets down accordingly. */
212 Int new_assoc = LLc->assoc + (LLc->assoc / 2);
213 VG_(dmsg)("warning: pretending that LL cache has associativity"
214 " %d instead of actual %d\n", new_assoc, LLc->assoc);
215 LLc->assoc = new_assoc;
220 void VG_(post_clo_init_configure_caches)(cache_t* I1c,
227 #define DEFINED(L) (-1 != L->size || -1 != L->assoc || -1 != L->line_size)
229 // Count how many were defined on the command line.
230 Bool all_caches_clo_defined =
235 // Set the cache config (using auto-detection, if supported by the
237 configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
239 maybe_tweak_LLc( LLc );
241 // Check the default/auto-detected values.
242 // Allow the user to override invalid auto-detected caches
243 // with command line.
244 check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
245 check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
246 check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
248 // Then replace with any defined on the command line. (Already checked in
249 // VG(parse_clo_cache_opt)().)
250 if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
251 if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
252 if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
254 if (VG_(clo_verbosity) >= 2) {
255 VG_(umsg)("Cache configuration used:\n");
256 umsg_cache_img ("I1", I1c);
257 umsg_cache_img ("D1", D1c);
258 umsg_cache_img ("LL", LLc);
263 void VG_(print_cache_clo_opts)()
266 " --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
267 " --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
268 " --LL=<size>,<assoc>,<line_size> set LL cache manually\n"
273 // Traverse the cache info and return a cache of the given kind and level.
274 // Return NULL if no such cache exists.
275 static const VexCache *
276 locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
280 for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
281 if (c->level == level && c->kind == kind) {
285 return NULL; // not found
289 // Gives the auto-detected configuration of I1, D1 and LL caches. They get
290 // overridden by any cache configurations specified on the command line.
292 configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
293 Bool all_caches_clo_defined)
296 const VexCacheInfo *ci;
297 const VexCache *i1, *d1, *ll;
299 VG_(machine_get_VexArchInfo)(NULL, &vai);
300 ci = &vai.hwcache_info;
302 // Extract what we need
303 i1 = locate_cache(ci, INSN_CACHE, 1);
304 d1 = locate_cache(ci, DATA_CACHE, 1);
305 ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
307 if (ci->num_caches > 0 && ll == NULL) {
308 VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
311 if (ll && ci->num_levels > 2) {
312 VG_(dmsg)("warning: L%u cache found, using its data for the "
313 "LL simulation.\n", ci->num_levels);
316 if (i1 && d1 && ll) {
317 if (i1->is_trace_cache) {
318 /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
319 * conversion to byte size is a total guess; treat the 12K and 16K
320 * cases the same since the cache byte size must be a power of two for
321 * everything to work!. Also guessing 32 bytes for the line size...
323 UInt adjusted_size, guessed_line_size = 32;
325 if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
326 adjusted_size = 16 * 1024;
328 adjusted_size = 32 * 1024;
330 VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
332 VG_(dmsg)(" Simulating a %d KB I-cache with %d B lines\n",
333 adjusted_size / 1024, guessed_line_size);
335 *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
337 *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
339 *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
340 *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
345 // Cache information could not be queried; choose some default
346 // architecture specific default setting.
348 #if defined(VGA_ppc32)
350 // Default cache configuration
351 *I1c = (cache_t) { 65536, 2, 64 };
352 *D1c = (cache_t) { 65536, 2, 64 };
353 *LLc = (cache_t) { 262144, 8, 64 };
355 #elif defined(VGA_ppc64)
357 // Default cache configuration
358 *I1c = (cache_t) { 65536, 2, 64 };
359 *D1c = (cache_t) { 65536, 2, 64 };
360 *LLc = (cache_t) { 262144, 8, 64 };
362 #elif defined(VGA_arm)
364 // Set caches to default (for Cortex-A8 ?)
365 *I1c = (cache_t) { 16384, 4, 64 };
366 *D1c = (cache_t) { 16384, 4, 64 };
367 *LLc = (cache_t) { 262144, 8, 64 };
369 #elif defined(VGA_s390x)
371 // Here is the cache data from older machine models:
374 // z900 256k/256/4 256k/256/4 16MB
375 // z800 256k/256/4 256k/256/4 8MB
376 // z990 256k/256/4 256k/256/4 32MB
377 // z890 256k/256/4 256k/256/4 32MB
378 // z9 256k/256/4 256k/256/4 40MB
381 // (1) IBM System z9 109 Technical Introduction
382 // www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
383 // (2) The microarchitecture of the IBM eServer z900 processor
384 // IBM Journal of Research and Development
385 // Volume 46, Number 4/5, pp 381-395, July/September 2002
386 // (3) The IBM eServer z990 microprocessor
387 // IBM Journal of Research and Development
388 // Volume 48, Number 3/4, pp 295-309, May/July 2004
389 // (4) Charles Webb, IBM
391 // L2 data is unfortunately incomplete. Otherwise, we could support
392 // machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
394 // Default cache configuration is z10-EC (Source: ECAG insn)
395 *I1c = (cache_t) { 65536, 4, 256 };
396 *D1c = (cache_t) { 131072, 8, 256 };
397 *LLc = (cache_t) { 50331648, 24, 256 };
399 #elif defined(VGA_mips32)
401 // Set caches to default (for MIPS32-r2(mips 74kc))
402 *I1c = (cache_t) { 32768, 4, 32 };
403 *D1c = (cache_t) { 32768, 4, 32 };
404 *LLc = (cache_t) { 524288, 8, 32 };
406 #elif defined(VGA_x86) || defined(VGA_amd64)
408 *I1c = (cache_t) { 65536, 2, 64 };
409 *D1c = (cache_t) { 65536, 2, 64 };
410 *LLc = (cache_t) { 262144, 8, 64 };
414 #error "Unknown arch"
418 if (!all_caches_clo_defined) {
419 const HChar warning[] =
420 "Warning: Cannot auto-detect cache config, using defaults.\n"
421 " Run with -v to see.\n";
422 VG_(dmsg)("%s", warning);
426 /*--------------------------------------------------------------------*/
428 /*--------------------------------------------------------------------*/