iommu/vt-d: Fix wrong use of pasid config
[ksmbd.git] / drivers / iommu / intel / perfmon.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Support Intel IOMMU PerfMon
4  * Copyright(c) 2023 Intel Corporation.
5  */
6 #define pr_fmt(fmt)     "DMAR: " fmt
7 #define dev_fmt(fmt)    pr_fmt(fmt)
8
9 #include <linux/dmar.h>
10 #include "iommu.h"
11 #include "perfmon.h"
12
13 PMU_FORMAT_ATTR(event,          "config:0-27");         /* ES: Events Select */
14 PMU_FORMAT_ATTR(event_group,    "config:28-31");        /* EGI: Event Group Index */
15
16 static struct attribute *iommu_pmu_format_attrs[] = {
17         &format_attr_event_group.attr,
18         &format_attr_event.attr,
19         NULL
20 };
21
22 static struct attribute_group iommu_pmu_format_attr_group = {
23         .name = "format",
24         .attrs = iommu_pmu_format_attrs,
25 };
26
27 /* The available events are added in attr_update later */
28 static struct attribute *attrs_empty[] = {
29         NULL
30 };
31
32 static struct attribute_group iommu_pmu_events_attr_group = {
33         .name = "events",
34         .attrs = attrs_empty,
35 };
36
37 static cpumask_t iommu_pmu_cpu_mask;
38
39 static ssize_t
40 cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
41 {
42         return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
43 }
44 static DEVICE_ATTR_RO(cpumask);
45
46 static struct attribute *iommu_pmu_cpumask_attrs[] = {
47         &dev_attr_cpumask.attr,
48         NULL
49 };
50
51 static struct attribute_group iommu_pmu_cpumask_attr_group = {
52         .attrs = iommu_pmu_cpumask_attrs,
53 };
54
55 static const struct attribute_group *iommu_pmu_attr_groups[] = {
56         &iommu_pmu_format_attr_group,
57         &iommu_pmu_events_attr_group,
58         &iommu_pmu_cpumask_attr_group,
59         NULL
60 };
61
62 static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev)
63 {
64         /*
65          * The perf_event creates its own dev for each PMU.
66          * See pmu_dev_alloc()
67          */
68         return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu);
69 }
70
71 #define IOMMU_PMU_ATTR(_name, _format, _filter)                         \
72         PMU_FORMAT_ATTR(_name, _format);                                \
73                                                                         \
74 static struct attribute *_name##_attr[] = {                             \
75         &format_attr_##_name.attr,                                      \
76         NULL                                                            \
77 };                                                                      \
78                                                                         \
79 static umode_t                                                          \
80 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \
81 {                                                                       \
82         struct device *dev = kobj_to_dev(kobj);                         \
83         struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);            \
84                                                                         \
85         if (!iommu_pmu)                                                 \
86                 return 0;                                               \
87         return (iommu_pmu->filter & _filter) ? attr->mode : 0;          \
88 }                                                                       \
89                                                                         \
90 static struct attribute_group _name = {                                 \
91         .name           = "format",                                     \
92         .attrs          = _name##_attr,                                 \
93         .is_visible     = _name##_is_visible,                           \
94 };
95
96 IOMMU_PMU_ATTR(filter_requester_id_en,  "config1:0",            IOMMU_PMU_FILTER_REQUESTER_ID);
97 IOMMU_PMU_ATTR(filter_domain_en,        "config1:1",            IOMMU_PMU_FILTER_DOMAIN);
98 IOMMU_PMU_ATTR(filter_pasid_en,         "config1:2",            IOMMU_PMU_FILTER_PASID);
99 IOMMU_PMU_ATTR(filter_ats_en,           "config1:3",            IOMMU_PMU_FILTER_ATS);
100 IOMMU_PMU_ATTR(filter_page_table_en,    "config1:4",            IOMMU_PMU_FILTER_PAGE_TABLE);
101 IOMMU_PMU_ATTR(filter_requester_id,     "config1:16-31",        IOMMU_PMU_FILTER_REQUESTER_ID);
102 IOMMU_PMU_ATTR(filter_domain,           "config1:32-47",        IOMMU_PMU_FILTER_DOMAIN);
103 IOMMU_PMU_ATTR(filter_pasid,            "config2:0-21",         IOMMU_PMU_FILTER_PASID);
104 IOMMU_PMU_ATTR(filter_ats,              "config2:24-28",        IOMMU_PMU_FILTER_ATS);
105 IOMMU_PMU_ATTR(filter_page_table,       "config2:32-36",        IOMMU_PMU_FILTER_PAGE_TABLE);
106
107 #define iommu_pmu_en_requester_id(e)            ((e) & 0x1)
108 #define iommu_pmu_en_domain(e)                  (((e) >> 1) & 0x1)
109 #define iommu_pmu_en_pasid(e)                   (((e) >> 2) & 0x1)
110 #define iommu_pmu_en_ats(e)                     (((e) >> 3) & 0x1)
111 #define iommu_pmu_en_page_table(e)              (((e) >> 4) & 0x1)
112 #define iommu_pmu_get_requester_id(filter)      (((filter) >> 16) & 0xffff)
113 #define iommu_pmu_get_domain(filter)            (((filter) >> 32) & 0xffff)
114 #define iommu_pmu_get_pasid(filter)             ((filter) & 0x3fffff)
115 #define iommu_pmu_get_ats(filter)               (((filter) >> 24) & 0x1f)
116 #define iommu_pmu_get_page_table(filter)        (((filter) >> 32) & 0x1f)
117
118 #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig)           \
119 {                                                                               \
120         if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) {  \
121                 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +  \
122                             IOMMU_PMU_CFG_SIZE +                                \
123                             (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,  \
124                             iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\
125         }                                                                       \
126 }
127
128 #define iommu_pmu_clear_filter(_filter, _idx)                                   \
129 {                                                                               \
130         if (iommu_pmu->filter & _filter) {                                      \
131                 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET +  \
132                             IOMMU_PMU_CFG_SIZE +                                \
133                             (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET,  \
134                             0);                                                 \
135         }                                                                       \
136 }
137
138 /*
139  * Define the event attr related functions
140  * Input: _name: event attr name
141  *        _string: string of the event in sysfs
142  *        _g_idx: event group encoding
143  *        _event: event encoding
144  */
145 #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event)                    \
146         PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string)               \
147                                                                                 \
148 static struct attribute *_name##_attr[] = {                                     \
149         &event_attr_##_name.attr.attr,                                          \
150         NULL                                                                    \
151 };                                                                              \
152                                                                                 \
153 static umode_t                                                                  \
154 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i)         \
155 {                                                                               \
156         struct device *dev = kobj_to_dev(kobj);                                 \
157         struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev);                    \
158                                                                                 \
159         if (!iommu_pmu)                                                         \
160                 return 0;                                                       \
161         return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0;            \
162 }                                                                               \
163                                                                                 \
164 static struct attribute_group _name = {                                         \
165         .name           = "events",                                             \
166         .attrs          = _name##_attr,                                         \
167         .is_visible     = _name##_is_visible,                                   \
168 };
169
170 IOMMU_PMU_EVENT_ATTR(iommu_clocks,              "event_group=0x0,event=0x001", 0x0, 0x001)
171 IOMMU_PMU_EVENT_ATTR(iommu_requests,            "event_group=0x0,event=0x002", 0x0, 0x002)
172 IOMMU_PMU_EVENT_ATTR(pw_occupancy,              "event_group=0x0,event=0x004", 0x0, 0x004)
173 IOMMU_PMU_EVENT_ATTR(ats_blocked,               "event_group=0x0,event=0x008", 0x0, 0x008)
174 IOMMU_PMU_EVENT_ATTR(iommu_mrds,                "event_group=0x1,event=0x001", 0x1, 0x001)
175 IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked,         "event_group=0x1,event=0x020", 0x1, 0x020)
176 IOMMU_PMU_EVENT_ATTR(pg_req_posted,             "event_group=0x1,event=0x040", 0x1, 0x040)
177 IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup,         "event_group=0x2,event=0x001", 0x2, 0x001)
178 IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit,            "event_group=0x2,event=0x002", 0x2, 0x002)
179 IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup,        "event_group=0x2,event=0x004", 0x2, 0x004)
180 IOMMU_PMU_EVENT_ATTR(pasid_cache_hit,           "event_group=0x2,event=0x008", 0x2, 0x008)
181 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup,         "event_group=0x2,event=0x010", 0x2, 0x010)
182 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit,            "event_group=0x2,event=0x020", 0x2, 0x020)
183 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup,         "event_group=0x2,event=0x040", 0x2, 0x040)
184 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit,            "event_group=0x2,event=0x080", 0x2, 0x080)
185 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup,        "event_group=0x2,event=0x100", 0x2, 0x100)
186 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit,           "event_group=0x2,event=0x200", 0x2, 0x200)
187 IOMMU_PMU_EVENT_ATTR(iotlb_lookup,              "event_group=0x3,event=0x001", 0x3, 0x001)
188 IOMMU_PMU_EVENT_ATTR(iotlb_hit,                 "event_group=0x3,event=0x002", 0x3, 0x002)
189 IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup,           "event_group=0x3,event=0x004", 0x3, 0x004)
190 IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit,              "event_group=0x3,event=0x008", 0x3, 0x008)
191 IOMMU_PMU_EVENT_ATTR(int_cache_lookup,          "event_group=0x4,event=0x001", 0x4, 0x001)
192 IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted,   "event_group=0x4,event=0x002", 0x4, 0x002)
193 IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted,      "event_group=0x4,event=0x004", 0x4, 0x004)
194
195 static const struct attribute_group *iommu_pmu_attr_update[] = {
196         &filter_requester_id_en,
197         &filter_domain_en,
198         &filter_pasid_en,
199         &filter_ats_en,
200         &filter_page_table_en,
201         &filter_requester_id,
202         &filter_domain,
203         &filter_pasid,
204         &filter_ats,
205         &filter_page_table,
206         &iommu_clocks,
207         &iommu_requests,
208         &pw_occupancy,
209         &ats_blocked,
210         &iommu_mrds,
211         &iommu_mem_blocked,
212         &pg_req_posted,
213         &ctxt_cache_lookup,
214         &ctxt_cache_hit,
215         &pasid_cache_lookup,
216         &pasid_cache_hit,
217         &ss_nonleaf_lookup,
218         &ss_nonleaf_hit,
219         &fs_nonleaf_lookup,
220         &fs_nonleaf_hit,
221         &hpt_nonleaf_lookup,
222         &hpt_nonleaf_hit,
223         &iotlb_lookup,
224         &iotlb_hit,
225         &hpt_leaf_lookup,
226         &hpt_leaf_hit,
227         &int_cache_lookup,
228         &int_cache_hit_nonposted,
229         &int_cache_hit_posted,
230         NULL
231 };
232
233 static inline void __iomem *
234 iommu_event_base(struct iommu_pmu *iommu_pmu, int idx)
235 {
236         return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride;
237 }
238
239 static inline void __iomem *
240 iommu_config_base(struct iommu_pmu *iommu_pmu, int idx)
241 {
242         return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET;
243 }
244
245 static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event)
246 {
247         return container_of(event->pmu, struct iommu_pmu, pmu);
248 }
249
250 static inline u64 iommu_event_config(struct perf_event *event)
251 {
252         u64 config = event->attr.config;
253
254         return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) |
255                (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) |
256                IOMMU_EVENT_CFG_INT;
257 }
258
259 static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu,
260                                       struct perf_event *event)
261 {
262         return event->pmu == &iommu_pmu->pmu;
263 }
264
265 static int iommu_pmu_validate_event(struct perf_event *event)
266 {
267         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
268         u32 event_group = iommu_event_group(event->attr.config);
269
270         if (event_group >= iommu_pmu->num_eg)
271                 return -EINVAL;
272
273         return 0;
274 }
275
276 static int iommu_pmu_validate_group(struct perf_event *event)
277 {
278         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
279         struct perf_event *sibling;
280         int nr = 0;
281
282         /*
283          * All events in a group must be scheduled simultaneously.
284          * Check whether there is enough counters for all the events.
285          */
286         for_each_sibling_event(sibling, event->group_leader) {
287                 if (!is_iommu_pmu_event(iommu_pmu, sibling) ||
288                     sibling->state <= PERF_EVENT_STATE_OFF)
289                         continue;
290
291                 if (++nr > iommu_pmu->num_cntr)
292                         return -EINVAL;
293         }
294
295         return 0;
296 }
297
298 static int iommu_pmu_event_init(struct perf_event *event)
299 {
300         struct hw_perf_event *hwc = &event->hw;
301
302         if (event->attr.type != event->pmu->type)
303                 return -ENOENT;
304
305         /* sampling not supported */
306         if (event->attr.sample_period)
307                 return -EINVAL;
308
309         if (event->cpu < 0)
310                 return -EINVAL;
311
312         if (iommu_pmu_validate_event(event))
313                 return -EINVAL;
314
315         hwc->config = iommu_event_config(event);
316
317         return iommu_pmu_validate_group(event);
318 }
319
320 static void iommu_pmu_event_update(struct perf_event *event)
321 {
322         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
323         struct hw_perf_event *hwc = &event->hw;
324         u64 prev_count, new_count, delta;
325         int shift = 64 - iommu_pmu->cntr_width;
326
327 again:
328         prev_count = local64_read(&hwc->prev_count);
329         new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
330         if (local64_xchg(&hwc->prev_count, new_count) != prev_count)
331                 goto again;
332
333         /*
334          * The counter width is enumerated. Always shift the counter
335          * before using it.
336          */
337         delta = (new_count << shift) - (prev_count << shift);
338         delta >>= shift;
339
340         local64_add(delta, &event->count);
341 }
342
343 static void iommu_pmu_start(struct perf_event *event, int flags)
344 {
345         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
346         struct intel_iommu *iommu = iommu_pmu->iommu;
347         struct hw_perf_event *hwc = &event->hw;
348         u64 count;
349
350         if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
351                 return;
352
353         if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX))
354                 return;
355
356         if (flags & PERF_EF_RELOAD)
357                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
358
359         hwc->state = 0;
360
361         /* Always reprogram the period */
362         count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx));
363         local64_set((&hwc->prev_count), count);
364
365         /*
366          * The error of ecmd will be ignored.
367          * - The existing perf_event subsystem doesn't handle the error.
368          *   Only IOMMU PMU returns runtime HW error. We don't want to
369          *   change the existing generic interfaces for the specific case.
370          * - It's a corner case caused by HW, which is very unlikely to
371          *   happen. There is nothing SW can do.
372          * - The worst case is that the user will get <not count> with
373          *   perf command, which can give the user some hints.
374          */
375         ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0);
376
377         perf_event_update_userpage(event);
378 }
379
380 static void iommu_pmu_stop(struct perf_event *event, int flags)
381 {
382         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
383         struct intel_iommu *iommu = iommu_pmu->iommu;
384         struct hw_perf_event *hwc = &event->hw;
385
386         if (!(hwc->state & PERF_HES_STOPPED)) {
387                 ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0);
388
389                 iommu_pmu_event_update(event);
390
391                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
392         }
393 }
394
395 static inline int
396 iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu,
397                                   int idx, struct perf_event *event)
398 {
399         u32 event_group = iommu_event_group(event->attr.config);
400         u32 select = iommu_event_select(event->attr.config);
401
402         if (!(iommu_pmu->cntr_evcap[idx][event_group] & select))
403                 return -EINVAL;
404
405         return 0;
406 }
407
408 static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
409                                   struct perf_event *event)
410 {
411         struct hw_perf_event *hwc = &event->hw;
412         int idx;
413
414         /*
415          * The counters which support limited events are usually at the end.
416          * Schedule them first to accommodate more events.
417          */
418         for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) {
419                 if (test_and_set_bit(idx, iommu_pmu->used_mask))
420                         continue;
421                 /* Check per-counter event capabilities */
422                 if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event))
423                         break;
424                 clear_bit(idx, iommu_pmu->used_mask);
425         }
426         if (idx < 0)
427                 return -EINVAL;
428
429         iommu_pmu->event_list[idx] = event;
430         hwc->idx = idx;
431
432         /* config events */
433         dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config);
434
435         iommu_pmu_set_filter(requester_id, event->attr.config1,
436                              IOMMU_PMU_FILTER_REQUESTER_ID, idx,
437                              event->attr.config1);
438         iommu_pmu_set_filter(domain, event->attr.config1,
439                              IOMMU_PMU_FILTER_DOMAIN, idx,
440                              event->attr.config1);
441         iommu_pmu_set_filter(pasid, event->attr.config2,
442                              IOMMU_PMU_FILTER_PASID, idx,
443                              event->attr.config1);
444         iommu_pmu_set_filter(ats, event->attr.config2,
445                              IOMMU_PMU_FILTER_ATS, idx,
446                              event->attr.config1);
447         iommu_pmu_set_filter(page_table, event->attr.config2,
448                              IOMMU_PMU_FILTER_PAGE_TABLE, idx,
449                              event->attr.config1);
450
451         return 0;
452 }
453
454 static int iommu_pmu_add(struct perf_event *event, int flags)
455 {
456         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
457         struct hw_perf_event *hwc = &event->hw;
458         int ret;
459
460         ret = iommu_pmu_assign_event(iommu_pmu, event);
461         if (ret < 0)
462                 return ret;
463
464         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
465
466         if (flags & PERF_EF_START)
467                 iommu_pmu_start(event, 0);
468
469         return 0;
470 }
471
472 static void iommu_pmu_del(struct perf_event *event, int flags)
473 {
474         struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event);
475         int idx = event->hw.idx;
476
477         iommu_pmu_stop(event, PERF_EF_UPDATE);
478
479         iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx);
480         iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx);
481         iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx);
482         iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx);
483         iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx);
484
485         iommu_pmu->event_list[idx] = NULL;
486         event->hw.idx = -1;
487         clear_bit(idx, iommu_pmu->used_mask);
488
489         perf_event_update_userpage(event);
490 }
491
492 static void iommu_pmu_enable(struct pmu *pmu)
493 {
494         struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
495         struct intel_iommu *iommu = iommu_pmu->iommu;
496
497         ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0);
498 }
499
500 static void iommu_pmu_disable(struct pmu *pmu)
501 {
502         struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu);
503         struct intel_iommu *iommu = iommu_pmu->iommu;
504
505         ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0);
506 }
507
508 static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu)
509 {
510         struct perf_event *event;
511         u64 status;
512         int i;
513
514         /*
515          * Two counters may be overflowed very close. Always check
516          * whether there are more to handle.
517          */
518         while ((status = dmar_readq(iommu_pmu->overflow))) {
519                 for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) {
520                         /*
521                          * Find the assigned event of the counter.
522                          * Accumulate the value into the event->count.
523                          */
524                         event = iommu_pmu->event_list[i];
525                         if (!event) {
526                                 pr_warn_once("Cannot find the assigned event for counter %d\n", i);
527                                 continue;
528                         }
529                         iommu_pmu_event_update(event);
530                 }
531
532                 dmar_writeq(iommu_pmu->overflow, status);
533         }
534 }
535
536 static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id)
537 {
538         struct intel_iommu *iommu = dev_id;
539
540         if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG))
541                 return IRQ_NONE;
542
543         iommu_pmu_counter_overflow(iommu->pmu);
544
545         /* Clear the status bit */
546         dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS);
547
548         return IRQ_HANDLED;
549 }
550
551 static int __iommu_pmu_register(struct intel_iommu *iommu)
552 {
553         struct iommu_pmu *iommu_pmu = iommu->pmu;
554
555         iommu_pmu->pmu.name             = iommu->name;
556         iommu_pmu->pmu.task_ctx_nr      = perf_invalid_context;
557         iommu_pmu->pmu.event_init       = iommu_pmu_event_init;
558         iommu_pmu->pmu.pmu_enable       = iommu_pmu_enable;
559         iommu_pmu->pmu.pmu_disable      = iommu_pmu_disable;
560         iommu_pmu->pmu.add              = iommu_pmu_add;
561         iommu_pmu->pmu.del              = iommu_pmu_del;
562         iommu_pmu->pmu.start            = iommu_pmu_start;
563         iommu_pmu->pmu.stop             = iommu_pmu_stop;
564         iommu_pmu->pmu.read             = iommu_pmu_event_update;
565         iommu_pmu->pmu.attr_groups      = iommu_pmu_attr_groups;
566         iommu_pmu->pmu.attr_update      = iommu_pmu_attr_update;
567         iommu_pmu->pmu.capabilities     = PERF_PMU_CAP_NO_EXCLUDE;
568         iommu_pmu->pmu.module           = THIS_MODULE;
569
570         return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1);
571 }
572
573 static inline void __iomem *
574 get_perf_reg_address(struct intel_iommu *iommu, u32 offset)
575 {
576         u32 off = dmar_readl(iommu->reg + offset);
577
578         return iommu->reg + off;
579 }
580
581 int alloc_iommu_pmu(struct intel_iommu *iommu)
582 {
583         struct iommu_pmu *iommu_pmu;
584         int i, j, ret;
585         u64 perfcap;
586         u32 cap;
587
588         if (!ecap_pms(iommu->ecap))
589                 return 0;
590
591         /* The IOMMU PMU requires the ECMD support as well */
592         if (!cap_ecmds(iommu->cap))
593                 return -ENODEV;
594
595         perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG);
596         /* The performance monitoring is not supported. */
597         if (!perfcap)
598                 return -ENODEV;
599
600         /* Sanity check for the number of the counters and event groups */
601         if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap))
602                 return -ENODEV;
603
604         /* The interrupt on overflow is required */
605         if (!pcap_interrupt(perfcap))
606                 return -ENODEV;
607
608         /* Check required Enhanced Command Capability */
609         if (!ecmd_has_pmu_essential(iommu))
610                 return -ENODEV;
611
612         iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL);
613         if (!iommu_pmu)
614                 return -ENOMEM;
615
616         iommu_pmu->num_cntr = pcap_num_cntr(perfcap);
617         if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) {
618                 pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!",
619                              iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX);
620                 iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX;
621         }
622
623         iommu_pmu->cntr_width = pcap_cntr_width(perfcap);
624         iommu_pmu->filter = pcap_filters_mask(perfcap);
625         iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap);
626         iommu_pmu->num_eg = pcap_num_event_group(perfcap);
627
628         iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL);
629         if (!iommu_pmu->evcap) {
630                 ret = -ENOMEM;
631                 goto free_pmu;
632         }
633
634         /* Parse event group capabilities */
635         for (i = 0; i < iommu_pmu->num_eg; i++) {
636                 u64 pcap;
637
638                 pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG +
639                                   i * IOMMU_PMU_CAP_REGS_STEP);
640                 iommu_pmu->evcap[i] = pecap_es(pcap);
641         }
642
643         iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL);
644         if (!iommu_pmu->cntr_evcap) {
645                 ret = -ENOMEM;
646                 goto free_pmu_evcap;
647         }
648         for (i = 0; i < iommu_pmu->num_cntr; i++) {
649                 iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL);
650                 if (!iommu_pmu->cntr_evcap[i]) {
651                         ret = -ENOMEM;
652                         goto free_pmu_cntr_evcap;
653                 }
654                 /*
655                  * Set to the global capabilities, will adjust according
656                  * to per-counter capabilities later.
657                  */
658                 for (j = 0; j < iommu_pmu->num_eg; j++)
659                         iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j];
660         }
661
662         iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG);
663         iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG);
664         iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG);
665
666         /*
667          * Check per-counter capabilities. All counters should have the
668          * same capabilities on Interrupt on Overflow Support and Counter
669          * Width.
670          */
671         for (i = 0; i < iommu_pmu->num_cntr; i++) {
672                 cap = dmar_readl(iommu_pmu->cfg_reg +
673                                  i * IOMMU_PMU_CFG_OFFSET +
674                                  IOMMU_PMU_CFG_CNTRCAP_OFFSET);
675                 if (!iommu_cntrcap_pcc(cap))
676                         continue;
677
678                 /*
679                  * It's possible that some counters have a different
680                  * capability because of e.g., HW bug. Check the corner
681                  * case here and simply drop those counters.
682                  */
683                 if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) ||
684                     !iommu_cntrcap_ios(cap)) {
685                         iommu_pmu->num_cntr = i;
686                         pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n",
687                                 iommu_pmu->num_cntr);
688                 }
689
690                 /* Clear the pre-defined events group */
691                 for (j = 0; j < iommu_pmu->num_eg; j++)
692                         iommu_pmu->cntr_evcap[i][j] = 0;
693
694                 /* Override with per-counter event capabilities */
695                 for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) {
696                         cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET +
697                                          IOMMU_PMU_CFG_CNTREVCAP_OFFSET +
698                                          (j * IOMMU_PMU_OFF_REGS_STEP));
699                         iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap);
700                         /*
701                          * Some events may only be supported by a specific counter.
702                          * Track them in the evcap as well.
703                          */
704                         iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap);
705                 }
706         }
707
708         iommu_pmu->iommu = iommu;
709         iommu->pmu = iommu_pmu;
710
711         return 0;
712
713 free_pmu_cntr_evcap:
714         for (i = 0; i < iommu_pmu->num_cntr; i++)
715                 kfree(iommu_pmu->cntr_evcap[i]);
716         kfree(iommu_pmu->cntr_evcap);
717 free_pmu_evcap:
718         kfree(iommu_pmu->evcap);
719 free_pmu:
720         kfree(iommu_pmu);
721
722         return ret;
723 }
724
725 void free_iommu_pmu(struct intel_iommu *iommu)
726 {
727         struct iommu_pmu *iommu_pmu = iommu->pmu;
728
729         if (!iommu_pmu)
730                 return;
731
732         if (iommu_pmu->evcap) {
733                 int i;
734
735                 for (i = 0; i < iommu_pmu->num_cntr; i++)
736                         kfree(iommu_pmu->cntr_evcap[i]);
737                 kfree(iommu_pmu->cntr_evcap);
738         }
739         kfree(iommu_pmu->evcap);
740         kfree(iommu_pmu);
741         iommu->pmu = NULL;
742 }
743
744 static int iommu_pmu_set_interrupt(struct intel_iommu *iommu)
745 {
746         struct iommu_pmu *iommu_pmu = iommu->pmu;
747         int irq, ret;
748
749         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu);
750         if (irq <= 0)
751                 return -EINVAL;
752
753         snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id);
754
755         iommu->perf_irq = irq;
756         ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler,
757                                    IRQF_ONESHOT, iommu_pmu->irq_name, iommu);
758         if (ret) {
759                 dmar_free_hwirq(irq);
760                 iommu->perf_irq = 0;
761                 return ret;
762         }
763         return 0;
764 }
765
766 static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
767 {
768         if (!iommu->perf_irq)
769                 return;
770
771         free_irq(iommu->perf_irq, iommu);
772         dmar_free_hwirq(iommu->perf_irq);
773         iommu->perf_irq = 0;
774 }
775
776 static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
777 {
778         struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
779
780         if (cpumask_empty(&iommu_pmu_cpu_mask))
781                 cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
782
783         if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
784                 iommu_pmu->cpu = cpu;
785
786         return 0;
787 }
788
789 static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
790 {
791         struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
792         int target = cpumask_first(&iommu_pmu_cpu_mask);
793
794         /*
795          * The iommu_pmu_cpu_mask has been updated when offline the CPU
796          * for the first iommu_pmu. Migrate the other iommu_pmu to the
797          * new target.
798          */
799         if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
800                 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
801                 iommu_pmu->cpu = target;
802                 return 0;
803         }
804
805         if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
806                 return 0;
807
808         target = cpumask_any_but(cpu_online_mask, cpu);
809
810         if (target < nr_cpu_ids)
811                 cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
812         else
813                 return 0;
814
815         perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
816         iommu_pmu->cpu = target;
817
818         return 0;
819 }
820
821 static int nr_iommu_pmu;
822 static enum cpuhp_state iommu_cpuhp_slot;
823
824 static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
825 {
826         int ret;
827
828         if (!nr_iommu_pmu) {
829                 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
830                                               "driver/iommu/intel/perfmon:online",
831                                               iommu_pmu_cpu_online,
832                                               iommu_pmu_cpu_offline);
833                 if (ret < 0)
834                         return ret;
835                 iommu_cpuhp_slot = ret;
836         }
837
838         ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
839         if (ret) {
840                 if (!nr_iommu_pmu)
841                         cpuhp_remove_multi_state(iommu_cpuhp_slot);
842                 return ret;
843         }
844         nr_iommu_pmu++;
845
846         return 0;
847 }
848
849 static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
850 {
851         cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
852
853         if (--nr_iommu_pmu)
854                 return;
855
856         cpuhp_remove_multi_state(iommu_cpuhp_slot);
857 }
858
859 void iommu_pmu_register(struct intel_iommu *iommu)
860 {
861         struct iommu_pmu *iommu_pmu = iommu->pmu;
862
863         if (!iommu_pmu)
864                 return;
865
866         if (__iommu_pmu_register(iommu))
867                 goto err;
868
869         if (iommu_pmu_cpuhp_setup(iommu_pmu))
870                 goto unregister;
871
872         /* Set interrupt for overflow */
873         if (iommu_pmu_set_interrupt(iommu))
874                 goto cpuhp_free;
875
876         return;
877
878 cpuhp_free:
879         iommu_pmu_cpuhp_free(iommu_pmu);
880 unregister:
881         perf_pmu_unregister(&iommu_pmu->pmu);
882 err:
883         pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
884         free_iommu_pmu(iommu);
885 }
886
887 void iommu_pmu_unregister(struct intel_iommu *iommu)
888 {
889         struct iommu_pmu *iommu_pmu = iommu->pmu;
890
891         if (!iommu_pmu)
892                 return;
893
894         iommu_pmu_unset_interrupt(iommu);
895         iommu_pmu_cpuhp_free(iommu_pmu);
896         perf_pmu_unregister(&iommu_pmu->pmu);
897 }