1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2024 Rivos Inc.
7 #include <linux/cpumask.h>
8 #include <linux/jump_label.h>
10 #include <linux/smp.h>
11 #include <linux/types.h>
12 #include <asm/cpufeature.h>
13 #include <asm/hwprobe.h>
15 #include "copy-unaligned.h"
17 #define MISALIGNED_ACCESS_JIFFIES_LG2 1
18 #define MISALIGNED_BUFFER_SIZE 0x4000
19 #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
20 #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
22 DEFINE_PER_CPU(long, misaligned_access_speed);
24 #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
25 static cpumask_t fast_misaligned_access;
26 static int check_unaligned_access(void *param)
28 int cpu = smp_processor_id();
29 u64 start_cycles, end_cycles;
33 unsigned long start_jiffies, now;
34 struct page *page = param;
37 long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
39 if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
42 /* Make an unaligned destination buffer. */
43 dst = (void *)((unsigned long)page_address(page) | 0x1);
44 /* Unalign src as well, but differently (off by 1 + 2 = 3). */
45 src = dst + (MISALIGNED_BUFFER_SIZE / 2);
49 __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
51 start_jiffies = jiffies;
52 while ((now = jiffies) == start_jiffies)
56 * For a fixed amount of time, repeatedly try the function, and take
57 * the best time in cycles as the measurement.
59 while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
60 start_cycles = get_cycles64();
61 /* Ensure the CSR read can't reorder WRT to the copy. */
63 __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
64 /* Ensure the copy ends before the end time is snapped. */
66 end_cycles = get_cycles64();
67 if ((end_cycles - start_cycles) < word_cycles)
68 word_cycles = end_cycles - start_cycles;
72 __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
73 start_jiffies = jiffies;
74 while ((now = jiffies) == start_jiffies)
77 while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
78 start_cycles = get_cycles64();
80 __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
82 end_cycles = get_cycles64();
83 if ((end_cycles - start_cycles) < byte_cycles)
84 byte_cycles = end_cycles - start_cycles;
89 /* Don't divide by zero. */
90 if (!word_cycles || !byte_cycles) {
91 pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
97 if (word_cycles < byte_cycles)
98 speed = RISCV_HWPROBE_MISALIGNED_FAST;
100 ratio = div_u64((byte_cycles * 100), word_cycles);
101 pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
105 (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
107 per_cpu(misaligned_access_speed, cpu) = speed;
110 * Set the value of fast_misaligned_access of a CPU. These operations
111 * are atomic to avoid race conditions.
113 if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
114 cpumask_set_cpu(cpu, &fast_misaligned_access);
116 cpumask_clear_cpu(cpu, &fast_misaligned_access);
121 static void check_unaligned_access_nonboot_cpu(void *param)
123 unsigned int cpu = smp_processor_id();
124 struct page **pages = param;
126 if (smp_processor_id() != 0)
127 check_unaligned_access(pages[cpu]);
130 DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
132 static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
134 if (cpumask_weight(mask) == weight)
135 static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
137 static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
140 static void set_unaligned_access_static_branches_except_cpu(int cpu)
143 * Same as set_unaligned_access_static_branches, except excludes the
144 * given CPU from the result. When a CPU is hotplugged into an offline
145 * state, this function is called before the CPU is set to offline in
146 * the cpumask, and thus the CPU needs to be explicitly excluded.
149 cpumask_t fast_except_me;
151 cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
152 cpumask_clear_cpu(cpu, &fast_except_me);
154 modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
157 static void set_unaligned_access_static_branches(void)
160 * This will be called after check_unaligned_access_all_cpus so the
161 * result of unaligned access speed for all CPUs will be available.
163 * To avoid the number of online cpus changing between reading
164 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
165 * held before calling this function.
168 cpumask_t fast_and_online;
170 cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
172 modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
175 static int lock_and_set_unaligned_access_static_branch(void)
178 set_unaligned_access_static_branches();
184 arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
186 static int riscv_online_cpu(unsigned int cpu)
188 static struct page *buf;
190 /* We are already set since the last check */
191 if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
194 buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
196 pr_warn("Allocation failure, not measuring misaligned performance\n");
200 check_unaligned_access(buf);
201 __free_pages(buf, MISALIGNED_BUFFER_ORDER);
204 set_unaligned_access_static_branches();
209 static int riscv_offline_cpu(unsigned int cpu)
211 set_unaligned_access_static_branches_except_cpu(cpu);
216 /* Measure unaligned access speed on all CPUs present at boot in parallel. */
217 static int check_unaligned_access_speed_all_cpus(void)
220 unsigned int cpu_count = num_possible_cpus();
221 struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
224 pr_warn("Allocation failure, not measuring misaligned performance\n");
229 * Allocate separate buffers for each CPU so there's no fighting over
232 for_each_cpu(cpu, cpu_online_mask) {
233 bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
235 pr_warn("Allocation failure, not measuring misaligned performance\n");
240 /* Check everybody except 0, who stays behind to tend jiffies. */
241 on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
244 smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
247 * Setup hotplug callbacks for any new CPUs that come online or go
250 cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
251 riscv_online_cpu, riscv_offline_cpu);
254 for_each_cpu(cpu, cpu_online_mask) {
256 __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
263 static int check_unaligned_access_all_cpus(void)
265 bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
267 if (!all_cpus_emulated)
268 return check_unaligned_access_speed_all_cpus();
272 #else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
273 static int check_unaligned_access_all_cpus(void)
275 check_unaligned_access_emulated_all_cpus();
281 arch_initcall(check_unaligned_access_all_cpus);